| Index: source/data/translit/Latin_ASCII.txt
|
| diff --git a/source/data/translit/Latin_ASCII.txt b/source/data/translit/Latin_ASCII.txt
|
| index 38f870886a626d06fa2fcd3cb3649a1aed932618..c111e80a7348300bafb0383e877eb3d8ed588e61 100644
|
| --- a/source/data/translit/Latin_ASCII.txt
|
| +++ b/source/data/translit/Latin_ASCII.txt
|
| @@ -1,16 +1,29 @@
|
| -# ***************************************************************************
|
| -# *
|
| -# * Copyright (C) 2004-2015, International Business Machines
|
| -# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
| -# *
|
| -# ***************************************************************************
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| +#
|
| # File: Latin_ASCII.txt
|
| -# Generated from CLDR
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# This handles only Latin, Common, and IDEOGRAPHIC NUMBER ZERO (Han).
|
| #
|
| :: [[:Latin:][:Common:][:Inherited:][〇]] ;
|
| +#
|
| +# Don't want NFKD, because that would convert things like superscripts and
|
| +# subscripts, which we do not want. So the individual transforms below
|
| +# include an appropriate subset of the NFKD ones.
|
| +# Here we remove accents from Latin characters. We then recompose to permit rules
|
| +# such as mapping NOT EQUAL TO to an ASCII equivalent e.g. "!=" if we choose to.
|
| +#
|
| :: NFD() ;
|
| [:Latin:] { [:Mn:]+ → ; # maps to nothing; remove all Mn following Latin letter
|
| :: NFC() ;
|
| +#
|
| +# Some of the following mappings (noted) are from CLDR ‹character-fallback› data.
|
| +# (Note, here "‹character-fallback›" uses U+2039/U+203A to avoid XML issues)
|
| +#
|
| +# Latin letters and IPA
|
| +#
|
| Æ → AE ; # 00C6;LATIN CAPITAL LETTER AE (from ‹character-fallback›)
|
| Ð → D ; # 00D0;LATIN CAPITAL LETTER ETH
|
| Ø → O ; # 00D8;LATIN CAPITAL LETTER O WITH STROKE
|
| @@ -222,6 +235,7 @@
|
| ỽ → v ; # 1EFD;LATIN SMALL LETTER MIDDLE-WELSH V
|
| Ỿ → Y ; # 1EFE;LATIN CAPITAL LETTER Y WITH LOOP
|
| ỿ → y ; # 1EFF;LATIN SMALL LETTER Y WITH LOOP
|
| +# Presentation forms
|
| ff → ff ; # FB00;LATIN SMALL LIGATURE FF (compat)
|
| fi → fi ; # FB01;LATIN SMALL LIGATURE FI (compat)
|
| fl → fl ; # FB02;LATIN SMALL LIGATURE FL (compat)
|
| @@ -229,6 +243,7 @@
|
| ffl → ffl ; # FB04;LATIN SMALL LIGATURE FFL (compat)
|
| ſt → st ; # FB05;LATIN SMALL LIGATURE LONG S T (compat)
|
| st → st ; # FB06;LATIN SMALL LIGATURE ST (compat)
|
| +# Fullwidth
|
| A → A ; # FF21;FULLWIDTH LATIN CAPITAL LETTER A (compat)
|
| B → B ; # FF22;FULLWIDTH LATIN CAPITAL LETTER B (compat)
|
| C → C ; # FF23;FULLWIDTH LATIN CAPITAL LETTER C (compat)
|
| @@ -281,6 +296,9 @@
|
| x → x ; # FF58;FULLWIDTH LATIN SMALL LETTER X (compat)
|
| y → y ; # FF59;FULLWIDTH LATIN SMALL LETTER Y (compat)
|
| z → z ; # FF5A;FULLWIDTH LATIN SMALL LETTER Z (compat)
|
| +#
|
| +# Currency and letterlike
|
| +#
|
| © → '(C)' ; # 00A9;COPYRIGHT SIGN (from ‹character-fallback›)
|
| ® → '(R)' ; # 00AE;REGISTERED SIGN (from ‹character-fallback›)
|
| ₠ → CE ; # 20A0;EURO-CURRENCY SIGN (from ‹character-fallback›)
|
| @@ -329,6 +347,9 @@
|
| ⅇ → e ; # 2147;DOUBLE-STRUCK ITALIC SMALL E (compat)
|
| ⅈ → i ; # 2148;DOUBLE-STRUCK ITALIC SMALL I (compat)
|
| ⅉ → j ; # 2149;DOUBLE-STRUCK ITALIC SMALL J (compat)
|
| +#
|
| +# Squared Latin
|
| +#
|
| ㍱ → hPa ; # 3371;SQUARE HPA (compat)
|
| ㍲ → da ; # 3372;SQUARE DA (compat)
|
| ㍳ → AU ; # 3373;SQUARE AU (compat)
|
| @@ -410,6 +431,9 @@
|
| ㏝ → Wb ; # 33DD;SQUARE WB (compat)
|
| ㏞ → 'V/m' ; # 33DE;SQUARE V OVER M (compat) (from ‹character-fallback›)
|
| ㏟ → 'A/m' ; # 33DF;SQUARE A OVER M (compat) (from ‹character-fallback›)
|
| +#
|
| +# Enclosed Latin
|
| +#
|
| ⒜ → '(a)' ; # 249C;PARENTHESIZED LATIN SMALL LETTER A (compat)
|
| ⒝ → '(b)' ; # 249D;PARENTHESIZED LATIN SMALL LETTER B (compat)
|
| ⒞ → '(c)' ; # 249E;PARENTHESIZED LATIN SMALL LETTER C (compat)
|
| @@ -436,6 +460,9 @@
|
| ⒳ → '(x)' ; # 24B3;PARENTHESIZED LATIN SMALL LETTER X (compat)
|
| ⒴ → '(y)' ; # 24B4;PARENTHESIZED LATIN SMALL LETTER Y (compat)
|
| ⒵ → '(z)' ; # 24B5;PARENTHESIZED LATIN SMALL LETTER Z (compat)
|
| +#
|
| +# Roman numerals
|
| +#
|
| Ⅰ → I ; # 2160;ROMAN NUMERAL ONE (compat)
|
| Ⅱ → II ; # 2161;ROMAN NUMERAL TWO (compat)
|
| Ⅲ → III ; # 2162;ROMAN NUMERAL THREE (compat)
|
| @@ -468,6 +495,9 @@
|
| ⅽ → c ; # 217D;SMALL ROMAN NUMERAL ONE HUNDRED (compat)
|
| ⅾ → d ; # 217E;SMALL ROMAN NUMERAL FIVE HUNDRED (compat)
|
| ⅿ → m ; # 217F;SMALL ROMAN NUMERAL ONE THOUSAND (compat)
|
| +#
|
| +# Fractions
|
| +#
|
| ¼ → ' 1/4' ; # 00BC;VULGAR FRACTION ONE QUARTER (from ‹character-fallback›)
|
| ½ → ' 1/2' ; # 00BD;VULGAR FRACTION ONE HALF (from ‹character-fallback›)
|
| ¾ → ' 3/4' ; # 00BE;VULGAR FRACTION THREE QUARTERS (from ‹character-fallback›)
|
| @@ -484,6 +514,9 @@
|
| ⅝ → ' 5/8' ; # 215D;VULGAR FRACTION FIVE EIGHTHS (from ‹character-fallback›)
|
| ⅞ → ' 7/8' ; # 215E;VULGAR FRACTION SEVEN EIGHTHS (from ‹character-fallback›)
|
| ⅟ → ' 1/' ; # 215F;FRACTION NUMERATOR ONE (from ‹character-fallback›)
|
| +#
|
| +# Enclosed numeric
|
| +#
|
| ⑴ → '(1)' ; # 2474;PARENTHESIZED DIGIT ONE (compat)
|
| ⑵ → '(2)' ; # 2475;PARENTHESIZED DIGIT TWO (compat)
|
| ⑶ → '(3)' ; # 2476;PARENTHESIZED DIGIT THREE (compat)
|
| @@ -524,6 +557,9 @@
|
| ⒙ → '18.' ; # 2499;NUMBER EIGHTEEN FULL STOP (compat)
|
| ⒚ → '19.' ; # 249A;NUMBER NINETEEN FULL STOP (compat)
|
| ⒛ → '20.' ; # 249B;NUMBER TWENTY FULL STOP (compat)
|
| +#
|
| +# Other numeric (ideographic and fullwidth)
|
| +#
|
| 〇 → 0 ; # 3007;IDEOGRAPHIC NUMBER ZERO
|
| 0 → 0 ; # FF10;FULLWIDTH DIGIT ZERO (compat)
|
| 1 → 1 ; # FF11;FULLWIDTH DIGIT ONE (compat)
|
| @@ -535,6 +571,9 @@
|
| 7 → 7 ; # FF17;FULLWIDTH DIGIT SEVEN (compat)
|
| 8 → 8 ; # FF18;FULLWIDTH DIGIT EIGHT (compat)
|
| 9 → 9 ; # FF19;FULLWIDTH DIGIT NINE (compat)
|
| +#
|
| +# Spaces
|
| +#
|
| \u00A0 → ' ' ; # 00A0;NO-BREAK SPACE
|
| \u2002 → ' ' ; # 2002;EN SPACE (compat)
|
| \u2003 → ' ' ; # 2003;EM SPACE (compat)
|
| @@ -547,6 +586,16 @@
|
| \u200A → ' ' ; # 200A;HAIR SPACE (compat)
|
| \u205F → ' ' ; # 205F;MEDIUM MATHEMATICAL SPACE (compat)
|
| \u3000 → ' ' ; # 3000;IDEOGRAPHIC SPACE (from ‹character-fallback›)
|
| +#
|
| +# Quotes, apostrophes
|
| +#
|
| +ʹ → \' ; # 02B9;MODIFIER LETTER PRIME
|
| +ʺ → \" ; # 02BA;MODIFIER LETTER DOUBLE PRIME
|
| +ʻ → \' ; # 02BB;MODIFIER LETTER TURNED COMMA
|
| +ʼ → \' ; # 02BC;MODIFIER LETTER APOSTROPHE
|
| +ʽ → \' ; # 02BD;MODIFIER LETTER REVERSED COMMA
|
| +ˈ → \' ; # 02C8;MODIFIER LETTER VERTICAL LINE
|
| +ˋ → '`' ; # 02CB;MODIFIER LETTER GRAVE ACCENT
|
| ‘ → \' ; # 2018;LEFT SINGLE QUOTATION MARK (from ‹character-fallback›)
|
| ’ → \' ; # 2019;RIGHT SINGLE QUOTATION MARK (from ‹character-fallback›)
|
| ‚ → ',' ; # 201A;SINGLE LOW-9 QUOTATION MARK (from ‹character-fallback›)
|
| @@ -565,6 +614,9 @@
|
| » → '>>' ; # 00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (from ‹character-fallback›)
|
| ‹ → '<' ; # 2039;SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
| › → '>' ; # 203A;SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
| +#
|
| +# Dashes, hyphens...
|
| +#
|
| \u00AD → '-' ; # 00AD;SOFT HYPHEN (from ‹character-fallback›)
|
| ‐ → '-' ; # 2010;HYPHEN (from ‹character-fallback›)
|
| ‑ → '-' ; # 2011;NON-BREAKING HYPHEN (from ‹character-fallback›)
|
| @@ -577,6 +629,15 @@
|
| ﹘ → '-' ; # FE58;SMALL EM DASH (compat)
|
| ﹣ → '-' ; # FE63;SMALL HYPHEN-MINUS (compat)
|
| - → '-' ; # FF0D;FULLWIDTH HYPHEN-MINUS (compat)
|
| +#
|
| +# Other misc punctuation and symbols
|
| +#
|
| +˂ → '<' ; # 02C2;MODIFIER LETTER LEFT ARROWHEAD
|
| +˃ → '>' ; # 02C3;MODIFIER LETTER RIGHT ARROWHEAD
|
| +˄ → '^' ; # 02C4;MODIFIER LETTER UP ARROWHEAD
|
| +ˆ → '^' ; # 02C6;MODIFIER LETTER CIRCUMFLEX ACCENT
|
| +ː → ':' ; # 02D0;MODIFIER LETTER TRIANGULAR COLON
|
| +˜ → '~' ; # 02DC;SMALL TILDE
|
| ‖ → '||' ; # 2016;DOUBLE VERTICAL LINE
|
| ․ → '.' ; # 2024;ONE DOT LEADER (compat)
|
| ‥ → '..' ; # 2025;TWO DOT LEADER (compat)
|
| @@ -589,6 +650,7 @@
|
| ⁈ → '?!' ; # 2048;QUESTION EXCLAMATION MARK (compat)
|
| ⁉ → '!?' ; # 2049;EXCLAMATION QUESTION MARK (compat)
|
| ⁎ → '*' ; # 204E;LOW ASTERISK
|
| +# CJK
|
| 、 → ',' ; # 3001;IDEOGRAPHIC COMMA
|
| 。 → '.' ; # 3002;IDEOGRAPHIC FULL STOP
|
| 〈 → '<' ; # 3008;LEFT ANGLE BRACKET
|
| @@ -601,6 +663,7 @@
|
| 〙 → ']' ; # 3019;RIGHT WHITE TORTOISE SHELL BRACKET
|
| 〚 → '[' ; # 301A;LEFT WHITE SQUARE BRACKET
|
| 〛 → ']' ; # 301B;RIGHT WHITE SQUARE BRACKET
|
| +# Vertical and small forms
|
| ︐ → ',' ; # FE10;PRESENTATION FORM FOR VERTICAL COMMA (compat)
|
| ︑ → ',' ; # FE11;PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA (compat)
|
| ︒ → '.' ; # FE12;PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP (compat)
|
| @@ -646,6 +709,7 @@
|
| ﹩ → '$' ; # FE69;SMALL DOLLAR SIGN (compat)
|
| ﹪ → '%' ; # FE6A;SMALL PERCENT SIGN (compat)
|
| ﹫ → '@' ; # FE6B;SMALL COMMERCIAL AT (compat)
|
| +# Fullwidth and halfwidth
|
| ! → '!' ; # FF01;FULLWIDTH EXCLAMATION MARK (compat)
|
| # → '#' ; # FF03;FULLWIDTH NUMBER SIGN (compat)
|
| $ → '$' ; # FF04;FULLWIDTH DOLLAR SIGN (compat)
|
| @@ -679,8 +743,13 @@
|
| ⦆ → '))' ; # FF60;FULLWIDTH RIGHT WHITE PARENTHESIS (compat)(from ‹character-fallback›)
|
| 。 → '.' ; # FF61;HALFWIDTH IDEOGRAPHIC FULL STOP (compat)
|
| 、 → ',' ; # FF64;HALFWIDTH IDEOGRAPHIC COMMA (compat)
|
| +#
|
| +# Other math operators (non-ASCII-range)
|
| +#
|
| × → '*' ; # 00D7;MULTIPLICATION SIGN
|
| ÷ → '/' ; # 00F7;DIVISION SIGN
|
| +˖ → '+' ; # 02D6;MODIFIER LETTER PLUS SIGN
|
| +˗ → '-' ; # 02D7;MODIFIER LETTER MINUS SIGN
|
| − → '-' ; # 2212;MINUS SIGN (from ‹character-fallback›)
|
| ∕ → '/' ; # 2215;DIVISION SLASH (from ‹character-fallback›)
|
| ∖ → '\' ; # 2216;SET MINUS (from ‹character-fallback›)
|
| @@ -693,3 +762,4 @@
|
| ⩴ → '::=' ; # 2A74;DOUBLE COLON EQUAL (compat)
|
| ⩵ → '==' ; # 2A75;TWO CONSECUTIVE EQUALS SIGNS (compat)
|
| ⩶ → '===' ; # 2A76;THREE CONSECUTIVE EQUALS SIGNS (compat)
|
| +
|
|
|