Index: source/data/translit/ro_ro_FONIPA.txt |
diff --git a/source/data/translit/ro_ro_FONIPA.txt b/source/data/translit/ro_ro_FONIPA.txt |
index 0f1258c1629b105a442469dc5783a96ccaedac6e..f96589640f3b10eb62a009c1c56ffb43a5834f21 100644 |
--- a/source/data/translit/ro_ro_FONIPA.txt |
+++ b/source/data/translit/ro_ro_FONIPA.txt |
@@ -1,15 +1,23 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
# File: ro_ro_FONIPA.txt |
-# Generated from CLDR |
+# Generated from CLDR |
+# |
+ |
+# Romanian orthography to phonemic transcription. |
+# http://en.wikipedia.org/wiki/Romanian_phonology |
# |
+# TODO: Currently this transform does not palatalize consonants. |
+$VowelEI = [e i î]; |
+$VowelAOU = [a â ă o u]; |
+$Vowel = [$VowelEI $VowelAOU]; |
+$Boundary = [^[:L:][:M:][:N:]]; |
:: NFC () ; |
:: Lower () ; |
+# Special cases. |
eoai → eo\u032Faj ; # eg. leoaică → /leo\u032Fajkə/, not /le\u032Fo\u032Faikə/ |
+# Triphthongs. |
eai → e\u032Faj ; |
eau → e\u032Faw ; |
eoa → e\u032Fo\u032Fa ; |
@@ -18,7 +26,7 @@ ia\-i → jaj ; |
iau → jaw ; |
iei → jej ; |
ieu → jew ; |
-[\uffff] { eu → jew ; |
+$Boundary {eu} → jew ; |
ioa → jo\u032Fa ; |
ioi → joj ; |
i\-oi → joj ; |
@@ -27,10 +35,11 @@ oai → o\u032Faj ; |
uai → waj ; |
uau → waw ; |
uăi → wəj ; |
+# Diphthongs. |
ai → aj ; |
âi → ɨj ; |
ăi → əj ; |
-au } r → au ; |
+au} r → au ; |
au → aw ; |
âu → ɨw ; |
ău → əw ; |
@@ -45,7 +54,7 @@ i\-a → ja ; |
ie → je ; |
ii → ij ; |
io → jo ; |
-iu } [aâăeiîou$] → iw ; |
+iu} [$Vowel $Boundary] → iw ; |
iu → ju ; |
oa → o\u032Fa ; |
oi → oj ; |
@@ -61,13 +70,13 @@ a → a ; |
ă → ə ; |
b → b ; |
ch → k ; |
-c } [ei] → t \u0361 ʃ ; |
+{c} [ei] → t\u0361ʃ ; |
c → k ; |
d → d ; |
e → e ; |
f → f ; |
gh → ɡ ; |
-g } [ei] → d \u0361 ʒ ; |
+{g} [ei] → d\u0361ʒ ; |
g → ɡ ; |
h → h ; |
i → i ; |
@@ -76,14 +85,43 @@ j → ʒ ; |
k → k ; |
l → l ; |
m → m ; |
+ng → ŋ ; |
n → n ; |
o → o ; |
p → p ; |
+q → k ; |
r → r ; |
s → s ; |
ş → ʃ ; |
+ș → ʃ ; |
t → t ; |
-ţ → t \u0361 s ; |
+ţ → t\u0361s ; |
+ț → t\u0361s ; |
u → u ; |
v → v ; |
+x → ks ; |
+y → i ; |
z → z ; |
+[:P:]+ → ' '; |
+# Romanian does not have any gemination. |
+# https://en.wikipedia.org/wiki/Gemination#Latin_and_Romance_languages |
+::null; |
+pp+ → p; |
+bb+ → b; |
+tt+ → t; |
+dd+ → d; |
+kk+ → k; |
+dd+ → d; |
+ɡɡ+ → ɡ; |
+ff+ → f; |
+vv+ → v; |
+hh+ → h; |
+ss+ → s; |
+zz+ → z; |
+ʃʃ+ → ʃ; |
+ʒʒ+ → ʒ; |
+rr+ → r; |
+ll+ → l; |
+jj+ → j; |
+ww+ → w; |
+ |