diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index 9a51dcec6..72958c985 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -52,9 +52,12 @@ }, "dependencies": { "@luxass/msw-utils": "catalog:prod", + "@luxass/utils": "catalog:prod", "@ucdjs-internal/shared": "workspace:*", + "@ucdjs/env": "workspace:*", "@ucdjs/fs-bridge": "workspace:*", "@ucdjs/schemas": "workspace:*", + "@unicode-utils/core": "catalog:prod", "msw": "catalog:testing", "zod": "catalog:prod" }, diff --git a/packages/test-utils/src/mock-store/add-paths.ts b/packages/test-utils/src/mock-store/add-paths.ts new file mode 100644 index 000000000..8fe1014ec --- /dev/null +++ b/packages/test-utils/src/mock-store/add-paths.ts @@ -0,0 +1,40 @@ +import type { MockStoreNode, MockStoreNodeWithPath } from "./types"; + +/** + * Recursively traverses the file tree and adds paths to all nodes. + * The path format is: /{prefix}/basePath/pathname or /basePath/pathname if prefix is empty + * + * @param {MockStoreNode[]} nodes - The file nodes without paths + * @param {string} prefix - The prefix to include in the path (e.g., version). If empty, path starts with basePath + * @param {string} [basePath] - The base path to prepend (defaults to "ucd") + * @returns File nodes with paths added + */ +export function addPathsToFileNodes( + nodes: MockStoreNode[], + prefix: string, + basePath: string = "ucd", +): MockStoreNodeWithPath[] { + return nodes.map((node) => { + const pathSegments = [basePath, node.name].filter(Boolean).join("/"); + const fullPath = prefix ? `/${prefix}/${pathSegments}` : `/${pathSegments}`; + + if (node.type === "directory") { + const dirNode = node as Extract; + return { + ...dirNode, + path: `${fullPath}/`, + children: addPathsToFileNodes( + dirNode.children, + prefix, + pathSegments, + ), + }; + } + + const fileNode = node as Extract; + return { + ...fileNode, + path: fullPath, + }; + }); +} diff --git a/packages/test-utils/src/mock-store/default-files/arabic-shaping.ts b/packages/test-utils/src/mock-store/default-files/arabic-shaping.ts new file mode 100644 index 000000000..fe573b361 --- /dev/null +++ b/packages/test-utils/src/mock-store/default-files/arabic-shaping.ts @@ -0,0 +1,1022 @@ +import { dedent } from "@luxass/utils"; + +export const defaultArabicShapingFileContent = dedent` + # ArabicShaping-17.0.0.txt + # Date: 2025-08-14 + # © 2025 Unicode®, Inc. + # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. + # For terms of use and license, see https://www.unicode.org/terms_of_use.html + # + # This file is a normative contributory data file in the + # Unicode Character Database. + # + # This file defines the Joining_Type and Joining_Group property + # values for Arabic, Syriac, N'Ko, Mandaic, and Manichaean positional + # shaping, repeating in machine readable form the information + # exemplified in various tables of The Unicode Standard core specification. + # + # This file also defines Joining_Type values for Mongolian, Phags-pa, + # Psalter Pahlavi, Sogdian, Old Uyghur, Chorasmian, and Adlam positional + # shaping, and Joining_Type and Joining_Group values for Hanifi Rohingya + # positional shaping, which are not listed in tables in the core + # specification. + # + # Script Section Table(s) + # + # Arabic 9.2 9-3, 9-4, 9-5, 9-7, 9-8, 9-9, 9-10, 9-11, 9-13 + # Syriac 9.3 9-15, 9-16, 9-17, 9-18, 9-19 + # Mandaic 9.5 9-22, 9-23 + # Manichaean 10.5 10-4, 10-5, 10-6, 10-7 + # Psalter Pahlavi 10.6 -- + # Chorasmian 10.8 -- + # Mongolian 13.5 -- + # Phags-pa 14.4 14-7 + # Sogdian 14.10 -- + # Old Uyghur 14.11 -- + # Hanifi Rohingya 16.14 -- + # N'Ko 19.4 19-5 + # Adlam 19.9 -- + # + # Each line contains four fields, separated by a semicolon. + # + # Field 0: the code point of a character, in hexadecimal form. + # + # Field 1: gives a short schematic name for that character. + # The schematic name is descriptive of the shape, based as + # consistently as possible on a name for the skeleton and + # then the diacritic marks applied to the skeleton, if any. + # Note that this schematic name is considered a comment, + # and does not constitute a formal property value. + # + # Field 2: defines the joining type (property name: Joining_Type) + # R Right_Joining + # L Left_Joining + # D Dual_Joining + # C Join_Causing + # U Non_Joining + # T Transparent + # + # See Section 9.2, Arabic for more information on these joining types. + # Note that for cursive joining scripts which are typically rendered + # top-to-bottom, rather than right-to-left, Joining_Type=L conventionally + # refers to bottom joining, and Joining_Type=R conventionally refers + # to top joining. See Section 14.4, Phags-pa for more information on the + # interpretation of joining types in vertical layout. + # + # Field 3: defines the joining group (property name: Joining_Group) + # + # The values of the joining group are based schematically on character + # names. Where a schematic character name consists of two or more parts + # separated by spaces, the formal Joining_Group property value, as specified in + # PropertyValueAliases.txt, consists of the same name parts joined by + # underscores. Hence, the entry: + # + # 0629; TEH MARBUTA; R; TEH MARBUTA + # + # corresponds to [Joining_Group = Teh_Marbuta]. + # + # Note: The property value now designated [Joining_Group = Teh_Marbuta_Goal] + # used to apply to both of the following characters + # in earlier versions of the standard: + # + # U+06C2 ARABIC LETTER HEH GOAL WITH HAMZA ABOVE + # U+06C3 ARABIC LETTER TEH MARBUTA GOAL + # + # However, it currently applies only to U+06C3, and *not* to U+06C2. + # To avoid destabilizing existing Joining_Group property aliases, the + # prior Joining_Group value for U+06C3 (Hamza_On_Heh_Goal) has been + # retained as a property value alias, despite the fact that it + # no longer applies to its namesake character, U+06C2. + # See PropertyValueAliases.txt. + # + # When other cursive scripts are added to the Unicode Standard in the + # future, the joining group value of all its letters will default to + # jg=No_Joining_Group in this data file. Other, more specific + # joining group values will be defined only if an explicit proposal + # to define those values exactly has been approved by the UTC. This + # is the convention exemplified by the N'Ko, Mandaic, Mongolian, + # Phags-pa, Psalter Pahlavi, Sogdian, Old Uyghur, Chorasmian, and Adlam scripts. + # Only the Arabic, Manichaean, and Syriac scripts currently have + # explicit joining group values defined for all characters, including + # those which have only a single character in a particular Joining_Group + # class. Hanifi Rohingya has explicit Joining_Group values assigned only for + # the few characters which share a particular Joining_Group class, but + # assigns jg=No_Joining_Group to all the singletons. + # + # Note: Code points that are not explicitly listed in this file are + # either of Joining_Type T or U: + # + # - Those that are not explicitly listed and that are of General_Category Mn, Me, or Cf + # are Joining_Type=T. + # - All others not explicitly listed are Joining_Type=U. + # + # For an explicit listing of all characters of Joining_Type=T, see + # the derived property file DerivedJoiningType.txt. + # For an implementation that needs to parse for the values of + # Joining_Type, it is recommended to use DerivedJoiningType.txt + # instead of ArabicShaping.txt, to avoid the separate required step of + # calculating the set for Joining_Type=T based on General_Category values. + # + # ############################################################# + + # Unicode; Schematic Name; Joining Type; Joining Group + + # Arabic Characters + + 0600; ARABIC NUMBER SIGN; U; No_Joining_Group + 0601; ARABIC SIGN SANAH; U; No_Joining_Group + 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group + 0603; ARABIC SIGN SAFHA; U; No_Joining_Group + 0604; ARABIC SIGN SAMVAT; U; No_Joining_Group + 0605; ARABIC NUMBER MARK ABOVE; U; No_Joining_Group + 0608; ARABIC RAY; U; No_Joining_Group + 060B; AFGHANI SIGN; U; No_Joining_Group + 0620; KASHMIRI YEH; D; KASHMIRI YEH + 0621; HAMZA; U; No_Joining_Group + 0622; ALEF WITH MADDA ABOVE; R; ALEF + 0623; ALEF WITH HAMZA ABOVE; R; ALEF + 0624; WAW WITH HAMZA ABOVE; R; WAW + 0625; ALEF WITH HAMZA BELOW; R; ALEF + 0626; DOTLESS YEH WITH HAMZA ABOVE; D; YEH + 0627; ALEF; R; ALEF + 0628; BEH; D; BEH + 0629; TEH MARBUTA; R; TEH MARBUTA + 062A; DOTLESS BEH WITH 2 DOTS ABOVE; D; BEH + 062B; DOTLESS BEH WITH 3 DOTS ABOVE; D; BEH + 062C; HAH WITH DOT BELOW; D; HAH + 062D; HAH; D; HAH + 062E; HAH WITH DOT ABOVE; D; HAH + 062F; DAL; R; DAL + 0630; DAL WITH DOT ABOVE; R; DAL + 0631; REH; R; REH + 0632; REH WITH DOT ABOVE; R; REH + 0633; SEEN; D; SEEN + 0634; SEEN WITH 3 DOTS ABOVE; D; SEEN + 0635; SAD; D; SAD + 0636; SAD WITH DOT ABOVE; D; SAD + 0637; TAH; D; TAH + 0638; TAH WITH DOT ABOVE; D; TAH + 0639; AIN; D; AIN + 063A; AIN WITH DOT ABOVE; D; AIN + 063B; KEHEH WITH 2 DOTS ABOVE; D; GAF + 063C; KEHEH WITH 3 DOTS BELOW; D; GAF + 063D; FARSI YEH WITH INVERTED V ABOVE; D; FARSI YEH + 063E; FARSI YEH WITH 2 DOTS ABOVE; D; FARSI YEH + 063F; FARSI YEH WITH 3 DOTS ABOVE; D; FARSI YEH + 0640; TATWEEL; C; No_Joining_Group + 0641; FEH; D; FEH + 0642; QAF; D; QAF + 0643; KAF; D; KAF + 0644; LAM; D; LAM + 0645; MEEM; D; MEEM + 0646; NOON; D; NOON + 0647; HEH; D; HEH + 0648; WAW; R; WAW + 0649; DOTLESS YEH; D; YEH + 064A; YEH; D; YEH + 066E; DOTLESS BEH; D; BEH + 066F; DOTLESS QAF; D; QAF + 0671; ALEF WITH WASLA ABOVE; R; ALEF + 0672; ALEF WITH WAVY HAMZA ABOVE; R; ALEF + 0673; ALEF WITH WAVY HAMZA BELOW; R; ALEF + 0674; HIGH HAMZA; U; No_Joining_Group + 0675; HIGH HAMZA ALEF; R; ALEF + 0676; HIGH HAMZA WAW; R; WAW + 0677; HIGH HAMZA WAW WITH COMMA ABOVE; R; WAW + 0678; HIGH HAMZA DOTLESS YEH; D; YEH + 0679; DOTLESS BEH WITH TAH ABOVE; D; BEH + 067A; DOTLESS BEH WITH VERTICAL 2 DOTS ABOVE; D; BEH + 067B; DOTLESS BEH WITH VERTICAL 2 DOTS BELOW; D; BEH + 067C; DOTLESS BEH WITH ATTACHED RING BELOW AND 2 DOTS ABOVE; D; BEH + 067D; DOTLESS BEH WITH INVERTED 3 DOTS ABOVE; D; BEH + 067E; DOTLESS BEH WITH 3 DOTS BELOW; D; BEH + 067F; DOTLESS BEH WITH 4 DOTS ABOVE; D; BEH + 0680; DOTLESS BEH WITH 4 DOTS BELOW; D; BEH + 0681; HAH WITH HAMZA ABOVE; D; HAH + 0682; HAH WITH VERTICAL 2 DOTS ABOVE; D; HAH + 0683; HAH WITH 2 DOTS BELOW; D; HAH + 0684; HAH WITH VERTICAL 2 DOTS BELOW; D; HAH + 0685; HAH WITH 3 DOTS ABOVE; D; HAH + 0686; HAH WITH 3 DOTS BELOW; D; HAH + 0687; HAH WITH 4 DOTS BELOW; D; HAH + 0688; DAL WITH TAH ABOVE; R; DAL + 0689; DAL WITH ATTACHED RING BELOW; R; DAL + 068A; DAL WITH DOT BELOW; R; DAL + 068B; DAL WITH DOT BELOW AND TAH ABOVE; R; DAL + 068C; DAL WITH 2 DOTS ABOVE; R; DAL + 068D; DAL WITH 2 DOTS BELOW; R; DAL + 068E; DAL WITH 3 DOTS ABOVE; R; DAL + 068F; DAL WITH INVERTED 3 DOTS ABOVE; R; DAL + 0690; DAL WITH 4 DOTS ABOVE; R; DAL + 0691; REH WITH TAH ABOVE; R; REH + 0692; REH WITH V ABOVE; R; REH + 0693; REH WITH ATTACHED RING BELOW; R; REH + 0694; REH WITH DOT BELOW; R; REH + 0695; REH WITH V BELOW; R; REH + 0696; REH WITH DOT BELOW AND DOT WITHIN; R; REH + 0697; REH WITH 2 DOTS ABOVE; R; REH + 0698; REH WITH 3 DOTS ABOVE; R; REH + 0699; REH WITH 4 DOTS ABOVE; R; REH + 069A; SEEN WITH DOT BELOW AND DOT ABOVE; D; SEEN + 069B; SEEN WITH 3 DOTS BELOW; D; SEEN + 069C; SEEN WITH 3 DOTS BELOW AND 3 DOTS ABOVE; D; SEEN + 069D; SAD WITH 2 DOTS BELOW; D; SAD + 069E; SAD WITH 3 DOTS ABOVE; D; SAD + 069F; TAH WITH 3 DOTS ABOVE; D; TAH + 06A0; AIN WITH 3 DOTS ABOVE; D; AIN + 06A1; DOTLESS FEH; D; FEH + 06A2; DOTLESS FEH WITH DOT BELOW; D; FEH + 06A3; FEH WITH DOT BELOW; D; FEH + 06A4; DOTLESS FEH WITH 3 DOTS ABOVE; D; FEH + 06A5; DOTLESS FEH WITH 3 DOTS BELOW; D; FEH + 06A6; DOTLESS FEH WITH 4 DOTS ABOVE; D; FEH + 06A7; DOTLESS QAF WITH DOT ABOVE; D; QAF + 06A8; DOTLESS QAF WITH 3 DOTS ABOVE; D; QAF + 06A9; KEHEH; D; GAF + 06AA; SWASH KAF; D; SWASH KAF + 06AB; KEHEH WITH ATTACHED RING BELOW; D; GAF + 06AC; KAF WITH DOT ABOVE; D; KAF + 06AD; KAF WITH 3 DOTS ABOVE; D; KAF + 06AE; KAF WITH 3 DOTS BELOW; D; KAF + 06AF; GAF; D; GAF + 06B0; GAF WITH ATTACHED RING BELOW; D; GAF + 06B1; GAF WITH 2 DOTS ABOVE; D; GAF + 06B2; GAF WITH 2 DOTS BELOW; D; GAF + 06B3; GAF WITH VERTICAL 2 DOTS BELOW; D; GAF + 06B4; GAF WITH 3 DOTS ABOVE; D; GAF + 06B5; LAM WITH V ABOVE; D; LAM + 06B6; LAM WITH DOT ABOVE; D; LAM + 06B7; LAM WITH 3 DOTS ABOVE; D; LAM + 06B8; LAM WITH 3 DOTS BELOW; D; LAM + 06B9; NOON WITH DOT BELOW; D; NOON + 06BA; DOTLESS NOON; D; NOON + 06BB; DOTLESS NOON WITH TAH ABOVE; D; NOON + 06BC; NOON WITH ATTACHED RING BELOW; D; NOON + 06BD; NYA; D; NYA + 06BE; KNOTTED HEH; D; KNOTTED HEH + 06BF; HAH WITH 3 DOTS BELOW AND DOT ABOVE; D; HAH + 06C0; DOTLESS TEH MARBUTA WITH HAMZA ABOVE; R; TEH MARBUTA + 06C1; HEH GOAL; D; HEH GOAL + 06C2; HEH GOAL WITH HAMZA ABOVE; D; HEH GOAL + 06C3; TEH MARBUTA GOAL; R; TEH MARBUTA GOAL + 06C4; WAW WITH ATTACHED RING WITHIN; R; WAW + 06C5; WAW WITH LOOP; R; WAW + 06C6; WAW WITH V ABOVE; R; WAW + 06C7; WAW WITH COMMA ABOVE; R; WAW + 06C8; WAW WITH ALEF ABOVE; R; WAW + 06C9; WAW WITH INVERTED V ABOVE; R; WAW + 06CA; WAW WITH 2 DOTS ABOVE; R; WAW + 06CB; WAW WITH 3 DOTS ABOVE; R; WAW + 06CC; FARSI YEH; D; FARSI YEH + 06CD; YEH WITH TAIL; R; YEH WITH TAIL + 06CE; FARSI YEH WITH V ABOVE; D; FARSI YEH + 06CF; WAW WITH DOT ABOVE; R; WAW + 06D0; DOTLESS YEH WITH VERTICAL 2 DOTS BELOW; D; YEH + 06D1; DOTLESS YEH WITH 3 DOTS BELOW; D; YEH + 06D2; YEH BARREE; R; YEH BARREE + 06D3; YEH BARREE WITH HAMZA ABOVE; R; YEH BARREE + 06D5; DOTLESS TEH MARBUTA; R; TEH MARBUTA + 06DD; ARABIC END OF AYAH; U; No_Joining_Group + 06EE; DAL WITH INVERTED V ABOVE; R; DAL + 06EF; REH WITH INVERTED V ABOVE; R; REH + 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN + 06FB; SAD WITH DOT BELOW AND DOT ABOVE; D; SAD + 06FC; AIN WITH DOT BELOW AND DOT ABOVE; D; AIN + 06FF; KNOTTED HEH WITH INVERTED V ABOVE; D; KNOTTED HEH + + # Syriac Characters + + 070F; SYRIAC ABBREVIATION MARK; T; No_Joining_Group + 0710; ALAPH; R; ALAPH + 0712; BETH; D; BETH + 0713; GAMAL; D; GAMAL + 0714; GAMAL GARSHUNI; D; GAMAL + 0715; DALATH; R; DALATH RISH + 0716; DOTLESS DALATH RISH; R; DALATH RISH + 0717; HE; R; HE + 0718; WAW; R; SYRIAC WAW + 0719; ZAIN; R; ZAIN + 071A; HETH; D; HETH + 071B; TETH; D; TETH + 071C; TETH GARSHUNI; D; TETH + 071D; YUDH; D; YUDH + 071E; YUDH HE; R; YUDH HE + 071F; KAPH; D; KAPH + 0720; LAMADH; D; LAMADH + 0721; MIM; D; MIM + 0722; NUN; D; NUN + 0723; SEMKATH; D; SEMKATH + 0724; FINAL SEMKATH; D; FINAL SEMKATH + 0725; E; D; E + 0726; PE; D; PE + 0727; REVERSED PE; D; REVERSED PE + 0728; SADHE; R; SADHE + 0729; QAPH; D; QAPH + 072A; RISH; R; DALATH RISH + 072B; SHIN; D; SHIN + 072C; TAW; R; TAW + 072D; PERSIAN BHETH; D; BETH + 072E; PERSIAN GHAMAL; D; GAMAL + 072F; PERSIAN DHALATH; R; DALATH RISH + 074D; SOGDIAN ZHAIN; R; ZHAIN + 074E; SOGDIAN KHAPH; D; KHAPH + 074F; SOGDIAN FE; D; FE + + # Arabic Supplement Characters + + 0750; DOTLESS BEH WITH HORIZONTAL 3 DOTS BELOW; D; BEH + 0751; BEH WITH 3 DOTS ABOVE; D; BEH + 0752; DOTLESS BEH WITH INVERTED 3 DOTS BELOW; D; BEH + 0753; DOTLESS BEH WITH INVERTED 3 DOTS BELOW AND 2 DOTS ABOVE; D; BEH + 0754; DOTLESS BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH + 0755; DOTLESS BEH WITH INVERTED V BELOW; D; BEH + 0756; DOTLESS BEH WITH V ABOVE; D; BEH + 0757; HAH WITH 2 DOTS ABOVE; D; HAH + 0758; HAH WITH INVERTED 3 DOTS BELOW; D; HAH + 0759; DAL WITH VERTICAL 2 DOTS BELOW AND TAH ABOVE; R; DAL + 075A; DAL WITH INVERTED V BELOW; R; DAL + 075B; REH WITH BAR; R; REH + 075C; SEEN WITH 4 DOTS ABOVE; D; SEEN + 075D; AIN WITH 2 DOTS ABOVE; D; AIN + 075E; AIN WITH INVERTED 3 DOTS ABOVE; D; AIN + 075F; AIN WITH VERTICAL 2 DOTS ABOVE; D; AIN + 0760; DOTLESS FEH WITH 2 DOTS BELOW; D; FEH + 0761; DOTLESS FEH WITH INVERTED 3 DOTS BELOW; D; FEH + 0762; KEHEH WITH DOT ABOVE; D; GAF + 0763; KEHEH WITH 3 DOTS ABOVE; D; GAF + 0764; KEHEH WITH INVERTED 3 DOTS BELOW; D; GAF + 0765; MEEM WITH DOT ABOVE; D; MEEM + 0766; MEEM WITH DOT BELOW; D; MEEM + 0767; NOON WITH 2 DOTS BELOW; D; NOON + 0768; NOON WITH TAH ABOVE; D; NOON + 0769; NOON WITH V ABOVE; D; NOON + 076A; LAM WITH BAR; D; LAM + 076B; REH WITH VERTICAL 2 DOTS ABOVE; R; REH + 076C; REH WITH HAMZA ABOVE; R; REH + 076D; SEEN WITH VERTICAL 2 DOTS ABOVE; D; SEEN + 076E; HAH WITH TAH BELOW; D; HAH + 076F; HAH WITH TAH AND 2 DOTS BELOW; D; HAH + 0770; SEEN WITH 2 DOTS AND TAH ABOVE; D; SEEN + 0771; REH WITH 2 DOTS AND TAH ABOVE; R; REH + 0772; HAH WITH TAH ABOVE; D; HAH + 0773; ALEF WITH DIGIT TWO ABOVE; R; ALEF + 0774; ALEF WITH DIGIT THREE ABOVE; R; ALEF + 0775; FARSI YEH WITH DIGIT TWO ABOVE; D; FARSI YEH + 0776; FARSI YEH WITH DIGIT THREE ABOVE; D; FARSI YEH + 0777; DOTLESS YEH WITH DIGIT FOUR BELOW; D; YEH + 0778; WAW WITH DIGIT TWO ABOVE; R; WAW + 0779; WAW WITH DIGIT THREE ABOVE; R; WAW + 077A; BURUSHASKI YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE + 077B; BURUSHASKI YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE + 077C; HAH WITH DIGIT FOUR BELOW; D; HAH + 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN + 077E; SEEN WITH INVERTED V ABOVE; D; SEEN + 077F; KAF WITH 2 DOTS ABOVE; D; KAF + + # N'Ko Characters + + 07CA; NKO A; D; No_Joining_Group + 07CB; NKO EE; D; No_Joining_Group + 07CC; NKO I; D; No_Joining_Group + 07CD; NKO E; D; No_Joining_Group + 07CE; NKO U; D; No_Joining_Group + 07CF; NKO OO; D; No_Joining_Group + 07D0; NKO O; D; No_Joining_Group + 07D1; NKO DAGBASINNA; D; No_Joining_Group + 07D2; NKO N; D; No_Joining_Group + 07D3; NKO BA; D; No_Joining_Group + 07D4; NKO PA; D; No_Joining_Group + 07D5; NKO TA; D; No_Joining_Group + 07D6; NKO JA; D; No_Joining_Group + 07D7; NKO CHA; D; No_Joining_Group + 07D8; NKO DA; D; No_Joining_Group + 07D9; NKO RA; D; No_Joining_Group + 07DA; NKO RRA; D; No_Joining_Group + 07DB; NKO SA; D; No_Joining_Group + 07DC; NKO GBA; D; No_Joining_Group + 07DD; NKO FA; D; No_Joining_Group + 07DE; NKO KA; D; No_Joining_Group + 07DF; NKO LA; D; No_Joining_Group + 07E0; NKO NA WOLOSO; D; No_Joining_Group + 07E1; NKO MA; D; No_Joining_Group + 07E2; NKO NYA; D; No_Joining_Group + 07E3; NKO NA; D; No_Joining_Group + 07E4; NKO HA; D; No_Joining_Group + 07E5; NKO WA; D; No_Joining_Group + 07E6; NKO YA; D; No_Joining_Group + 07E7; NKO NYA WOLOSO; D; No_Joining_Group + 07E8; NKO JONA JA; D; No_Joining_Group + 07E9; NKO JONA CHA; D; No_Joining_Group + 07EA; NKO JONA RA; D; No_Joining_Group + 07FA; NKO LAJANYALAN; C; No_Joining_Group + + # Mandaic Characters + + 0840; MANDAIC HALQA; R; No_Joining_Group + 0841; MANDAIC AB; D; No_Joining_Group + 0842; MANDAIC AG; D; No_Joining_Group + 0843; MANDAIC AD; D; No_Joining_Group + 0844; MANDAIC AH; D; No_Joining_Group + 0845; MANDAIC USHENNA; D; No_Joining_Group + 0846; MANDAIC AZ; R; No_Joining_Group + 0847; MANDAIC IT; R; No_Joining_Group + 0848; MANDAIC ATT; D; No_Joining_Group + 0849; MANDAIC AKSA; R; No_Joining_Group + 084A; MANDAIC AK; D; No_Joining_Group + 084B; MANDAIC AL; D; No_Joining_Group + 084C; MANDAIC AM; D; No_Joining_Group + 084D; MANDAIC AN; D; No_Joining_Group + 084E; MANDAIC AS; D; No_Joining_Group + 084F; MANDAIC IN; D; No_Joining_Group + 0850; MANDAIC AP; D; No_Joining_Group + 0851; MANDAIC ASZ; D; No_Joining_Group + 0852; MANDAIC AQ; D; No_Joining_Group + 0853; MANDAIC AR; D; No_Joining_Group + 0854; MANDAIC ASH; R; No_Joining_Group + 0855; MANDAIC AT; D; No_Joining_Group + 0856; MANDAIC DUSHENNA; R; No_Joining_Group + 0857; MANDAIC KAD; R; No_Joining_Group + 0858; MANDAIC AIN; R; No_Joining_Group + + # Syriac Supplement Characters + + 0860; MALAYALAM NGA; D; MALAYALAM NGA + 0861; MALAYALAM JA; U; MALAYALAM JA + 0862; MALAYALAM NYA; D; MALAYALAM NYA + 0863; MALAYALAM TTA; D; MALAYALAM TTA + 0864; MALAYALAM NNA; D; MALAYALAM NNA + 0865; MALAYALAM NNNA; D; MALAYALAM NNNA + 0866; MALAYALAM BHA; U; MALAYALAM BHA + 0867; MALAYALAM RA; R; MALAYALAM RA + 0868; MALAYALAM LLA; D; MALAYALAM LLA + 0869; MALAYALAM LLLA; R; MALAYALAM LLLA + 086A; MALAYALAM SSA; R; MALAYALAM SSA + + # Arabic Extended-B Characters + + 0870; ALEF WITH ATTACHED FATHA; R; ALEF + 0871; ALEF WITH ATTACHED TOP RIGHT FATHA; R; ALEF + 0872; ALEF WITH RIGHT MIDDLE STROKE; R; ALEF + 0873; ALEF WITH LEFT MIDDLE STROKE; R; ALEF + 0874; ALEF WITH ATTACHED KASRA; R; ALEF + 0875; ALEF WITH ATTACHED BOTTOM RIGHT KASRA; R; ALEF + 0876; ALEF WITH ATTACHED ROUND DOT ABOVE; R; ALEF + 0877; ALEF WITH ATTACHED RIGHT ROUND DOT; R; ALEF + 0878; ALEF WITH ATTACHED LEFT ROUND DOT; R; ALEF + 0879; ALEF WITH ATTACHED ROUND DOT BELOW; R; ALEF + 087A; ALEF WITH DOT ABOVE; R; ALEF + 087B; ALEF WITH ATTACHED TOP RIGHT FATHA AND DOT ABOVE; R; ALEF + 087C; ALEF WITH RIGHT MIDDLE STROKE AND DOT ABOVE; R; ALEF + 087D; ALEF WITH ATTACHED BOTTOM RIGHT KASRA AND DOT ABOVE; R; ALEF + 087E; ALEF WITH ATTACHED TOP RIGHT FATHA AND LEFT RING; R; ALEF + 087F; ALEF WITH RIGHT MIDDLE STROKE AND LEFT RING; R; ALEF + 0880; ALEF WITH ATTACHED BOTTOM RIGHT KASRA AND LEFT RING; R; ALEF + 0881; ALEF WITH ATTACHED RIGHT HAMZA; R; ALEF + 0882; ALEF WITH ATTACHED LEFT HAMZA; R; ALEF + 0883; TATWEEL WITH OVERSTRUCK HAMZA; C; No_Joining_Group + 0884; TATWEEL WITH OVERSTRUCK WAW; C; No_Joining_Group + 0885; TATWEEL WITH TWO DOTS BELOW; C; No_Joining_Group + 0886; THIN YEH; D; THIN YEH + 0887; ARABIC BASELINE ROUND DOT; U; No_Joining_Group + 0888; ARABIC RAISED ROUND DOT; U; No_Joining_Group + 0889; DOTLESS NOON WITH INVERTED V ABOVE; D; NOON + 088A; HAH WITH INVERTED V BELOW; D; HAH + 088B; TAH WITH DOT BELOW; D; TAH + 088C; TAH WITH 3 DOTS BELOW; D; TAH + 088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF + 088E; VERTICAL TAIL; R; VERTICAL TAIL + 088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON + 0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group + 0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group + + # Arabic Extended-A Characters + + 08A0; DOTLESS BEH WITH V BELOW; D; BEH + 08A1; BEH WITH HAMZA ABOVE; D; BEH + 08A2; HAH WITH DOT BELOW AND 2 DOTS ABOVE; D; HAH + 08A3; TAH WITH 2 DOTS ABOVE; D; TAH + 08A4; DOTLESS FEH WITH DOT BELOW AND 3 DOTS ABOVE; D; FEH + 08A5; QAF WITH DOT BELOW; D; QAF + 08A6; LAM WITH DOUBLE BAR; D; LAM + 08A7; MEEM WITH 3 DOTS ABOVE; D; MEEM + 08A8; YEH WITH HAMZA ABOVE; D; YEH + 08A9; YEH WITH DOT ABOVE; D; YEH + 08AA; REH WITH LOOP; R; REH + 08AB; WAW WITH DOT WITHIN; R; WAW + 08AC; ROHINGYA YEH; R; ROHINGYA YEH + 08AD; LOW ALEF; U; No_Joining_Group + 08AE; DAL WITH 3 DOTS BELOW; R; DAL + 08AF; SAD WITH 3 DOTS BELOW; D; SAD + 08B0; KEHEH WITH STROKE BELOW; D; GAF + 08B1; STRAIGHT WAW; R; STRAIGHT WAW + 08B2; REH WITH DOT AND INVERTED V ABOVE; R; REH + 08B3; AIN WITH 3 DOTS BELOW; D; AIN + 08B4; KAF WITH DOT BELOW; D; KAF + 08B5; DOTLESS QAF WITH DOT BELOW; D; QAF + 08B6; BEH WITH MEEM ABOVE; D; BEH + 08B7; DOTLESS BEH WITH 3 DOTS BELOW AND MEEM ABOVE; D; BEH + 08B8; DOTLESS BEH WITH TEH ABOVE; D; BEH + 08B9; REH WITH NOON ABOVE; R; REH + 08BA; YEH WITH NOON ABOVE; D; YEH + 08BB; AFRICAN FEH; D; AFRICAN FEH + 08BC; AFRICAN QAF; D; AFRICAN QAF + 08BD; AFRICAN NOON; D; AFRICAN NOON + 08BE; DOTLESS BEH WITH 3 DOTS BELOW AND V ABOVE; D; BEH + 08BF; DOTLESS BEH WITH 2 DOTS AND V ABOVE; D; BEH + 08C0; DOTLESS BEH WITH TAH AND V ABOVE; D; BEH + 08C1; HAH WITH 3 DOTS BELOW AND V ABOVE; D; HAH + 08C2; KEHEH WITH V ABOVE; D; GAF + 08C3; AIN WITH DIAMOND 4 DOTS ABOVE; D; AIN + 08C4; AFRICAN QAF WITH 3 DOTS ABOVE; D; AFRICAN QAF + 08C5; HAH WITH DOT BELOW AND 3 DOTS ABOVE; D; HAH + 08C6; HAH WITH DIAMOND 4 DOTS BELOW; D; HAH + 08C7; LAM WITH TAH ABOVE; D; LAM + 08C8; KEHEH WITH ELONGATED HAMZA ABOVE; D; GAF + 08E2; ARABIC DISPUTED END OF AYAH; U; No_Joining_Group + + # Mongolian Characters + + 1806; MONGOLIAN TODO SOFT HYPHEN; U; No_Joining_Group + 1807; MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER; D; No_Joining_Group + 180A; MONGOLIAN NIRUGU; C; No_Joining_Group + 180E; MONGOLIAN VOWEL SEPARATOR; U; No_Joining_Group + 1820; MONGOLIAN A; D; No_Joining_Group + 1821; MONGOLIAN E; D; No_Joining_Group + 1822; MONGOLIAN I; D; No_Joining_Group + 1823; MONGOLIAN O; D; No_Joining_Group + 1824; MONGOLIAN U; D; No_Joining_Group + 1825; MONGOLIAN OE; D; No_Joining_Group + 1826; MONGOLIAN UE; D; No_Joining_Group + 1827; MONGOLIAN EE; D; No_Joining_Group + 1828; MONGOLIAN NA; D; No_Joining_Group + 1829; MONGOLIAN ANG; D; No_Joining_Group + 182A; MONGOLIAN BA; D; No_Joining_Group + 182B; MONGOLIAN PA; D; No_Joining_Group + 182C; MONGOLIAN QA; D; No_Joining_Group + 182D; MONGOLIAN GA; D; No_Joining_Group + 182E; MONGOLIAN MA; D; No_Joining_Group + 182F; MONGOLIAN LA; D; No_Joining_Group + 1830; MONGOLIAN SA; D; No_Joining_Group + 1831; MONGOLIAN SHA; D; No_Joining_Group + 1832; MONGOLIAN TA; D; No_Joining_Group + 1833; MONGOLIAN DA; D; No_Joining_Group + 1834; MONGOLIAN CHA; D; No_Joining_Group + 1835; MONGOLIAN JA; D; No_Joining_Group + 1836; MONGOLIAN YA; D; No_Joining_Group + 1837; MONGOLIAN RA; D; No_Joining_Group + 1838; MONGOLIAN WA; D; No_Joining_Group + 1839; MONGOLIAN FA; D; No_Joining_Group + 183A; MONGOLIAN KA; D; No_Joining_Group + 183B; MONGOLIAN KHA; D; No_Joining_Group + 183C; MONGOLIAN TSA; D; No_Joining_Group + 183D; MONGOLIAN ZA; D; No_Joining_Group + 183E; MONGOLIAN HAA; D; No_Joining_Group + 183F; MONGOLIAN ZRA; D; No_Joining_Group + 1840; MONGOLIAN LHA; D; No_Joining_Group + 1841; MONGOLIAN ZHI; D; No_Joining_Group + 1842; MONGOLIAN CHI; D; No_Joining_Group + 1843; MONGOLIAN TODO LONG VOWEL SIGN; D; No_Joining_Group + 1844; MONGOLIAN TODO E; D; No_Joining_Group + 1845; MONGOLIAN TODO I; D; No_Joining_Group + 1846; MONGOLIAN TODO O; D; No_Joining_Group + 1847; MONGOLIAN TODO U; D; No_Joining_Group + 1848; MONGOLIAN TODO OE; D; No_Joining_Group + 1849; MONGOLIAN TODO UE; D; No_Joining_Group + 184A; MONGOLIAN TODO ANG; D; No_Joining_Group + 184B; MONGOLIAN TODO BA; D; No_Joining_Group + 184C; MONGOLIAN TODO PA; D; No_Joining_Group + 184D; MONGOLIAN TODO QA; D; No_Joining_Group + 184E; MONGOLIAN TODO GA; D; No_Joining_Group + 184F; MONGOLIAN TODO MA; D; No_Joining_Group + 1850; MONGOLIAN TODO TA; D; No_Joining_Group + 1851; MONGOLIAN TODO DA; D; No_Joining_Group + 1852; MONGOLIAN TODO CHA; D; No_Joining_Group + 1853; MONGOLIAN TODO JA; D; No_Joining_Group + 1854; MONGOLIAN TODO TSA; D; No_Joining_Group + 1855; MONGOLIAN TODO YA; D; No_Joining_Group + 1856; MONGOLIAN TODO WA; D; No_Joining_Group + 1857; MONGOLIAN TODO KA; D; No_Joining_Group + 1858; MONGOLIAN TODO GAA; D; No_Joining_Group + 1859; MONGOLIAN TODO HAA; D; No_Joining_Group + 185A; MONGOLIAN TODO JIA; D; No_Joining_Group + 185B; MONGOLIAN TODO NIA; D; No_Joining_Group + 185C; MONGOLIAN TODO DZA; D; No_Joining_Group + 185D; MONGOLIAN SIBE E; D; No_Joining_Group + 185E; MONGOLIAN SIBE I; D; No_Joining_Group + 185F; MONGOLIAN SIBE IY; D; No_Joining_Group + 1860; MONGOLIAN SIBE UE; D; No_Joining_Group + 1861; MONGOLIAN SIBE U; D; No_Joining_Group + 1862; MONGOLIAN SIBE ANG; D; No_Joining_Group + 1863; MONGOLIAN SIBE KA; D; No_Joining_Group + 1864; MONGOLIAN SIBE GA; D; No_Joining_Group + 1865; MONGOLIAN SIBE HA; D; No_Joining_Group + 1866; MONGOLIAN SIBE PA; D; No_Joining_Group + 1867; MONGOLIAN SIBE SHA; D; No_Joining_Group + 1868; MONGOLIAN SIBE TA; D; No_Joining_Group + 1869; MONGOLIAN SIBE DA; D; No_Joining_Group + 186A; MONGOLIAN SIBE JA; D; No_Joining_Group + 186B; MONGOLIAN SIBE FA; D; No_Joining_Group + 186C; MONGOLIAN SIBE GAA; D; No_Joining_Group + 186D; MONGOLIAN SIBE HAA; D; No_Joining_Group + 186E; MONGOLIAN SIBE TSA; D; No_Joining_Group + 186F; MONGOLIAN SIBE ZA; D; No_Joining_Group + 1870; MONGOLIAN SIBE RAA; D; No_Joining_Group + 1871; MONGOLIAN SIBE CHA; D; No_Joining_Group + 1872; MONGOLIAN SIBE ZHA; D; No_Joining_Group + 1873; MONGOLIAN MANCHU I; D; No_Joining_Group + 1874; MONGOLIAN MANCHU KA; D; No_Joining_Group + 1875; MONGOLIAN MANCHU RA; D; No_Joining_Group + 1876; MONGOLIAN MANCHU FA; D; No_Joining_Group + 1877; MONGOLIAN MANCHU ZHA; D; No_Joining_Group + 1878; MONGOLIAN CHA WITH 2 DOTS; D; No_Joining_Group + 1880; MONGOLIAN ALI GALI ANUSVARA ONE; U; No_Joining_Group + 1881; MONGOLIAN ALI GALI VISARGA ONE; U; No_Joining_Group + 1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group + 1883; MONGOLIAN ALI GALI UBADAMA; U; No_Joining_Group + 1884; MONGOLIAN ALI GALI INVERTED UBADAMA; U; No_Joining_Group + 1885; MONGOLIAN ALI GALI BALUDA; T; No_Joining_Group + 1886; MONGOLIAN ALI GALI THREE BALUDA; T; No_Joining_Group + 1887; MONGOLIAN ALI GALI A; D; No_Joining_Group + 1888; MONGOLIAN ALI GALI I; D; No_Joining_Group + 1889; MONGOLIAN ALI GALI KA; D; No_Joining_Group + 188A; MONGOLIAN ALI GALI NGA; D; No_Joining_Group + 188B; MONGOLIAN ALI GALI CA; D; No_Joining_Group + 188C; MONGOLIAN ALI GALI TTA; D; No_Joining_Group + 188D; MONGOLIAN ALI GALI TTHA; D; No_Joining_Group + 188E; MONGOLIAN ALI GALI DDA; D; No_Joining_Group + 188F; MONGOLIAN ALI GALI NNA; D; No_Joining_Group + 1890; MONGOLIAN ALI GALI TA; D; No_Joining_Group + 1891; MONGOLIAN ALI GALI DA; D; No_Joining_Group + 1892; MONGOLIAN ALI GALI PA; D; No_Joining_Group + 1893; MONGOLIAN ALI GALI PHA; D; No_Joining_Group + 1894; MONGOLIAN ALI GALI SSA; D; No_Joining_Group + 1895; MONGOLIAN ALI GALI ZHA; D; No_Joining_Group + 1896; MONGOLIAN ALI GALI ZA; D; No_Joining_Group + 1897; MONGOLIAN ALI GALI AH; D; No_Joining_Group + 1898; MONGOLIAN TODO ALI GALI TA; D; No_Joining_Group + 1899; MONGOLIAN TODO ALI GALI ZHA; D; No_Joining_Group + 189A; MONGOLIAN MANCHU ALI GALI GHA; D; No_Joining_Group + 189B; MONGOLIAN MANCHU ALI GALI NGA; D; No_Joining_Group + 189C; MONGOLIAN MANCHU ALI GALI CA; D; No_Joining_Group + 189D; MONGOLIAN MANCHU ALI GALI JHA; D; No_Joining_Group + 189E; MONGOLIAN MANCHU ALI GALI TTA; D; No_Joining_Group + 189F; MONGOLIAN MANCHU ALI GALI DDHA; D; No_Joining_Group + 18A0; MONGOLIAN MANCHU ALI GALI TA; D; No_Joining_Group + 18A1; MONGOLIAN MANCHU ALI GALI DHA; D; No_Joining_Group + 18A2; MONGOLIAN MANCHU ALI GALI SSA; D; No_Joining_Group + 18A3; MONGOLIAN MANCHU ALI GALI CYA; D; No_Joining_Group + 18A4; MONGOLIAN MANCHU ALI GALI ZHA; D; No_Joining_Group + 18A5; MONGOLIAN MANCHU ALI GALI ZA; D; No_Joining_Group + 18A6; MONGOLIAN ALI GALI HALF U; D; No_Joining_Group + 18A7; MONGOLIAN ALI GALI HALF YA; D; No_Joining_Group + 18A8; MONGOLIAN MANCHU ALI GALI BHA; D; No_Joining_Group + 18AA; MONGOLIAN MANCHU ALI GALI LHA; D; No_Joining_Group + + # Other + + 200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group + 200D; ZERO WIDTH JOINER; C; No_Joining_Group + 202F; NARROW NO-BREAK SPACE; U; No_Joining_Group + 2066; LEFT-TO-RIGHT ISOLATE; U; No_Joining_Group + 2067; RIGHT-TO-LEFT ISOLATE; U; No_Joining_Group + 2068; FIRST STRONG ISOLATE; U; No_Joining_Group + 2069; POP DIRECTIONAL ISOLATE; U; No_Joining_Group + + # Phags-Pa Characters + + A840; PHAGS-PA KA; D; No_Joining_Group + A841; PHAGS-PA KHA; D; No_Joining_Group + A842; PHAGS-PA GA; D; No_Joining_Group + A843; PHAGS-PA NGA; D; No_Joining_Group + A844; PHAGS-PA CA; D; No_Joining_Group + A845; PHAGS-PA CHA; D; No_Joining_Group + A846; PHAGS-PA JA; D; No_Joining_Group + A847; PHAGS-PA NYA; D; No_Joining_Group + A848; PHAGS-PA TA; D; No_Joining_Group + A849; PHAGS-PA THA; D; No_Joining_Group + A84A; PHAGS-PA DA; D; No_Joining_Group + A84B; PHAGS-PA NA; D; No_Joining_Group + A84C; PHAGS-PA PA; D; No_Joining_Group + A84D; PHAGS-PA PHA; D; No_Joining_Group + A84E; PHAGS-PA BA; D; No_Joining_Group + A84F; PHAGS-PA MA; D; No_Joining_Group + A850; PHAGS-PA TSA; D; No_Joining_Group + A851; PHAGS-PA TSHA; D; No_Joining_Group + A852; PHAGS-PA DZA; D; No_Joining_Group + A853; PHAGS-PA WA; D; No_Joining_Group + A854; PHAGS-PA ZHA; D; No_Joining_Group + A855; PHAGS-PA ZA; D; No_Joining_Group + A856; PHAGS-PA SMALL A; D; No_Joining_Group + A857; PHAGS-PA YA; D; No_Joining_Group + A858; PHAGS-PA RA; D; No_Joining_Group + A859; PHAGS-PA LA; D; No_Joining_Group + A85A; PHAGS-PA SHA; D; No_Joining_Group + A85B; PHAGS-PA SA; D; No_Joining_Group + A85C; PHAGS-PA HA; D; No_Joining_Group + A85D; PHAGS-PA A; D; No_Joining_Group + A85E; PHAGS-PA I; D; No_Joining_Group + A85F; PHAGS-PA U; D; No_Joining_Group + A860; PHAGS-PA E; D; No_Joining_Group + A861; PHAGS-PA O; D; No_Joining_Group + A862; PHAGS-PA QA; D; No_Joining_Group + A863; PHAGS-PA XA; D; No_Joining_Group + A864; PHAGS-PA FA; D; No_Joining_Group + A865; PHAGS-PA GGA; D; No_Joining_Group + A866; PHAGS-PA EE; D; No_Joining_Group + A867; PHAGS-PA SUBJOINED WA; D; No_Joining_Group + A868; PHAGS-PA SUBJOINED YA; D; No_Joining_Group + A869; PHAGS-PA TTA; D; No_Joining_Group + A86A; PHAGS-PA TTHA; D; No_Joining_Group + A86B; PHAGS-PA DDA; D; No_Joining_Group + A86C; PHAGS-PA NNA; D; No_Joining_Group + A86D; PHAGS-PA ALTERNATE YA; D; No_Joining_Group + A86E; PHAGS-PA VOICELESS SHA; D; No_Joining_Group + A86F; PHAGS-PA VOICED HA; D; No_Joining_Group + A870; PHAGS-PA ASPIRATED FA; D; No_Joining_Group + A871; PHAGS-PA SUBJOINED RA; D; No_Joining_Group + A872; PHAGS-PA SUPERFIXED RA; L; No_Joining_Group + A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group + + # Manichaean Characters + + 10AC0; MANICHAEAN ALEPH; D; MANICHAEAN ALEPH + 10AC1; MANICHAEAN BETH; D; MANICHAEAN BETH + 10AC2; MANICHAEAN BETH WITH 2 DOTS ABOVE; D; MANICHAEAN BETH + 10AC3; MANICHAEAN GIMEL; D; MANICHAEAN GIMEL + 10AC4; MANICHAEAN GIMEL WITH ATTACHED RING BELOW; D; MANICHAEAN GIMEL + 10AC5; MANICHAEAN DALETH; R; MANICHAEAN DALETH + 10AC6; MANICHAEAN HE; U; No_Joining_Group + 10AC7; MANICHAEAN WAW; R; MANICHAEAN WAW + 10AC8; MANICHAEAN UD; U; No_Joining_Group + 10AC9; MANICHAEAN ZAYIN; R; MANICHAEAN ZAYIN + 10ACA; MANICHAEAN ZAYIN WITH 2 DOTS ABOVE; R; MANICHAEAN ZAYIN + 10ACB; MANICHAEAN JAYIN; U; No_Joining_Group + 10ACC; MANICHAEAN JAYIN WITH 2 DOTS ABOVE; U; No_Joining_Group + 10ACD; MANICHAEAN HETH; L; MANICHAEAN HETH + 10ACE; MANICHAEAN TETH; R; MANICHAEAN TETH + 10ACF; MANICHAEAN YODH; R; MANICHAEAN YODH + 10AD0; MANICHAEAN KAPH; R; MANICHAEAN KAPH + 10AD1; MANICHAEAN KAPH WITH DOT ABOVE; R; MANICHAEAN KAPH + 10AD2; MANICHAEAN KAPH WITH 2 DOTS ABOVE; R; MANICHAEAN KAPH + 10AD3; MANICHAEAN LAMEDH; D; MANICHAEAN LAMEDH + 10AD4; MANICHAEAN DHAMEDH; D; MANICHAEAN DHAMEDH + 10AD5; MANICHAEAN THAMEDH; D; MANICHAEAN THAMEDH + 10AD6; MANICHAEAN MEM; D; MANICHAEAN MEM + 10AD7; MANICHAEAN NUN; L; MANICHAEAN NUN + 10AD8; MANICHAEAN SAMEKH; D; MANICHAEAN SAMEKH + 10AD9; MANICHAEAN AYIN; D; MANICHAEAN AYIN + 10ADA; MANICHAEAN AYIN WITH 2 DOTS ABOVE; D; MANICHAEAN AYIN + 10ADB; MANICHAEAN PE; D; MANICHAEAN PE + 10ADC; MANICHAEAN PE WITH DOT ABOVE; D; MANICHAEAN PE + 10ADD; MANICHAEAN SADHE; R; MANICHAEAN SADHE + 10ADE; MANICHAEAN QOPH; D; MANICHAEAN QOPH + 10ADF; MANICHAEAN QOPH WITH DOT ABOVE; D; MANICHAEAN QOPH + 10AE0; MANICHAEAN QOPH WITH 2 DOTS ABOVE; D; MANICHAEAN QOPH + 10AE1; MANICHAEAN RESH; R; MANICHAEAN RESH + 10AE2; MANICHAEAN SHIN; U; No_Joining_Group + 10AE3; MANICHAEAN SHIN WITH 2 DOTS ABOVE; U; No_Joining_Group + 10AE4; MANICHAEAN TAW; R; MANICHAEAN TAW + 10AEB; MANICHAEAN ONE; D; MANICHAEAN ONE + 10AEC; MANICHAEAN FIVE; D; MANICHAEAN FIVE + 10AED; MANICHAEAN TEN; D; MANICHAEAN TEN + 10AEE; MANICHAEAN TWENTY; D; MANICHAEAN TWENTY + 10AEF; MANICHAEAN HUNDRED; R; MANICHAEAN HUNDRED + + # Psalter Pahlavi Characters + + 10B80; PSALTER PAHLAVI ALEPH; D; No_Joining_Group + 10B81; PSALTER PAHLAVI BETH; R; No_Joining_Group + 10B82; PSALTER PAHLAVI GIMEL; D; No_Joining_Group + 10B83; PSALTER PAHLAVI DALETH; R; No_Joining_Group + 10B84; PSALTER PAHLAVI HE; R; No_Joining_Group + 10B85; PSALTER PAHLAVI WAW-AYIN-RESH; R; No_Joining_Group + 10B86; PSALTER PAHLAVI ZAYIN; D; No_Joining_Group + 10B87; PSALTER PAHLAVI HETH; D; No_Joining_Group + 10B88; PSALTER PAHLAVI YODH; D; No_Joining_Group + 10B89; PSALTER PAHLAVI KAPH; R; No_Joining_Group + 10B8A; PSALTER PAHLAVI LAMEDH; D; No_Joining_Group + 10B8B; PSALTER PAHLAVI MEM-QOPH; D; No_Joining_Group + 10B8C; PSALTER PAHLAVI NUN; R; No_Joining_Group + 10B8D; PSALTER PAHLAVI SAMEKH; D; No_Joining_Group + 10B8E; PSALTER PAHLAVI PE; R; No_Joining_Group + 10B8F; PSALTER PAHLAVI SADHE; R; No_Joining_Group + 10B90; PSALTER PAHLAVI SHIN; D; No_Joining_Group + 10B91; PSALTER PAHLAVI TAW; R; No_Joining_Group + 10BA9; PSALTER PAHLAVI ONE; R; No_Joining_Group + 10BAA; PSALTER PAHLAVI TWO; R; No_Joining_Group + 10BAB; PSALTER PAHLAVI THREE; R; No_Joining_Group + 10BAC; PSALTER PAHLAVI FOUR; R; No_Joining_Group + 10BAD; PSALTER PAHLAVI TEN; D; No_Joining_Group + 10BAE; PSALTER PAHLAVI TWENTY; D; No_Joining_Group + 10BAF; PSALTER PAHLAVI HUNDRED; U; No_Joining_Group + + # Hanifi Rohingya Characters + + 10D00; HANIFI ROHINGYA A; L; No_Joining_Group + 10D01; HANIFI ROHINGYA BA; D; No_Joining_Group + 10D02; HANIFI ROHINGYA PA; D; HANIFI ROHINGYA PA + 10D03; HANIFI ROHINGYA TA; D; No_Joining_Group + 10D04; HANIFI ROHINGYA TTA; D; No_Joining_Group + 10D05; HANIFI ROHINGYA JA; D; No_Joining_Group + 10D06; HANIFI ROHINGYA CA; D; No_Joining_Group + 10D07; HANIFI ROHINGYA HA; D; No_Joining_Group + 10D08; HANIFI ROHINGYA KHA; D; No_Joining_Group + 10D09; HANIFI ROHINGYA PA WITH DOT ABOVE; D; HANIFI ROHINGYA PA + 10D0A; HANIFI ROHINGYA DA; D; No_Joining_Group + 10D0B; HANIFI ROHINGYA DDA; D; No_Joining_Group + 10D0C; HANIFI ROHINGYA RA; D; No_Joining_Group + 10D0D; HANIFI ROHINGYA RRA; D; No_Joining_Group + 10D0E; HANIFI ROHINGYA ZA; D; No_Joining_Group + 10D0F; HANIFI ROHINGYA SA; D; No_Joining_Group + 10D10; HANIFI ROHINGYA SHA; D; No_Joining_Group + 10D11; HANIFI ROHINGYA KA; D; No_Joining_Group + 10D12; HANIFI ROHINGYA GA; D; No_Joining_Group + 10D13; HANIFI ROHINGYA LA; D; No_Joining_Group + 10D14; HANIFI ROHINGYA MA; D; No_Joining_Group + 10D15; HANIFI ROHINGYA NA; D; No_Joining_Group + 10D16; HANIFI ROHINGYA WA; D; No_Joining_Group + 10D17; HANIFI ROHINGYA KINNA WA; D; No_Joining_Group + 10D18; HANIFI ROHINGYA YA; D; No_Joining_Group + 10D19; HANIFI ROHINGYA KINNA YA; D; HANIFI ROHINGYA KINNA YA + 10D1A; HANIFI ROHINGYA NGA; D; No_Joining_Group + 10D1B; HANIFI ROHINGYA NYA; D; No_Joining_Group + 10D1C; HANIFI ROHINGYA PA WITH 3 DOTS ABOVE; D; HANIFI ROHINGYA PA + 10D1D; HANIFI ROHINGYA VOWEL A; D; No_Joining_Group + 10D1E; HANIFI ROHINGYA DOTLESS KINNA YA WITH LEFT-FACING HOOK BELOW; D; HANIFI ROHINGYA KINNA YA + 10D1F; HANIFI ROHINGYA VOWEL U; D; No_Joining_Group + 10D20; HANIFI ROHINGYA DOTLESS KINNA YA WITH RIGHT-FACING HOOK BELOW; D; HANIFI ROHINGYA KINNA YA + 10D21; HANIFI ROHINGYA VOWEL O; D; No_Joining_Group + 10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group + 10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA + + # Arabic Extended-C Characters + + 10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL + 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH + 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF + 10EC6; THIN NOON; D; THIN NOON + 10EC7; DOTLESS YEH WITH 4 DOTS BELOW; D; YEH + + # Sogdian Characters + + 10F30; SOGDIAN ALEPH; D; No_Joining_Group + 10F31; SOGDIAN BETH; D; No_Joining_Group + 10F32; SOGDIAN GIMEL; D; No_Joining_Group + 10F33; SOGDIAN HE; R; No_Joining_Group + 10F34; SOGDIAN WAW; D; No_Joining_Group + 10F35; SOGDIAN ZAYIN; D; No_Joining_Group + 10F36; SOGDIAN HETH; D; No_Joining_Group + 10F37; SOGDIAN YODH; D; No_Joining_Group + 10F38; SOGDIAN KAPH; D; No_Joining_Group + 10F39; SOGDIAN LAMEDH; D; No_Joining_Group + 10F3A; SOGDIAN MEM; D; No_Joining_Group + 10F3B; SOGDIAN NUN; D; No_Joining_Group + 10F3C; SOGDIAN SAMEKH; D; No_Joining_Group + 10F3D; SOGDIAN AYIN; D; No_Joining_Group + 10F3E; SOGDIAN PE; D; No_Joining_Group + 10F3F; SOGDIAN SADHE; D; No_Joining_Group + 10F40; SOGDIAN RESH-AYIN; D; No_Joining_Group + 10F41; SOGDIAN SHIN; D; No_Joining_Group + 10F42; SOGDIAN TAW; D; No_Joining_Group + 10F43; SOGDIAN FETH; D; No_Joining_Group + 10F44; SOGDIAN LESH; D; No_Joining_Group + 10F45; SOGDIAN INDEPENDENT SHIN; U; No_Joining_Group + 10F51; SOGDIAN ONE; D; No_Joining_Group + 10F52; SOGDIAN TEN; D; No_Joining_Group + 10F53; SOGDIAN TWENTY; D; No_Joining_Group + 10F54; SOGDIAN ONE HUNDRED; R; No_Joining_Group + + # Old Uyghur Characters + + 10F70; OLD UYGHUR ALEPH; D; No_Joining_Group + 10F71; OLD UYGHUR BETH; D; No_Joining_Group + 10F72; OLD UYGHUR GIMEL-HETH; D; No_Joining_Group + 10F73; OLD UYGHUR WAW; D; No_Joining_Group + 10F74; OLD UYGHUR ZAYIN; R; No_Joining_Group + 10F75; OLD UYGHUR FINAL HETH; R; No_Joining_Group + 10F76; OLD UYGHUR YODH; D; No_Joining_Group + 10F77; OLD UYGHUR KAPH; D; No_Joining_Group + 10F78; OLD UYGHUR LAMEDH; D; No_Joining_Group + 10F79; OLD UYGHUR MEM; D; No_Joining_Group + 10F7A; OLD UYGHUR NUN; D; No_Joining_Group + 10F7B; OLD UYGHUR SAMEKH; D; No_Joining_Group + 10F7C; OLD UYGHUR PE; D; No_Joining_Group + 10F7D; OLD UYGHUR SADHE; D; No_Joining_Group + 10F7E; OLD UYGHUR RESH; D; No_Joining_Group + 10F7F; OLD UYGHUR SHIN; D; No_Joining_Group + 10F80; OLD UYGHUR TAW; D; No_Joining_Group + 10F81; OLD UYGHUR LESH; D; No_Joining_Group + + # Chorasmian Characters + + 10FB0; CHORASMIAN ALEPH; D; No_Joining_Group + 10FB1; CHORASMIAN SMALL ALEPH; U; No_Joining_Group + 10FB2; CHORASMIAN BETH; D; No_Joining_Group + 10FB3; CHORASMIAN GIMEL; D; No_Joining_Group + 10FB4; CHORASMIAN DALETH; R; No_Joining_Group + 10FB5; CHORASMIAN HE; R; No_Joining_Group + 10FB6; CHORASMIAN WAW; R; No_Joining_Group + 10FB7; CHORASMIAN CURLED WAW; U; No_Joining_Group + 10FB8; CHORASMIAN ZAYIN; D; No_Joining_Group + 10FB9; CHORASMIAN HETH; R; No_Joining_Group + 10FBA; CHORASMIAN YODH; R; No_Joining_Group + 10FBB; CHORASMIAN KAPH; D; No_Joining_Group + 10FBC; CHORASMIAN LAMEDH; D; No_Joining_Group + 10FBD; CHORASMIAN MEM; R; No_Joining_Group + 10FBE; CHORASMIAN NUN; D; No_Joining_Group + 10FBF; CHORASMIAN SAMEKH; D; No_Joining_Group + 10FC0; CHORASMIAN AYIN; U; No_Joining_Group + 10FC1; CHORASMIAN PE; D; No_Joining_Group + 10FC2; CHORASMIAN RESH; R; No_Joining_Group + 10FC3; CHORASMIAN SHIN; R; No_Joining_Group + 10FC4; CHORASMIAN TAW; D; No_Joining_Group + 10FC5; CHORASMIAN ONE; U; No_Joining_Group + 10FC6; CHORASMIAN TWO; U; No_Joining_Group + 10FC7; CHORASMIAN THREE; U; No_Joining_Group + 10FC8; CHORASMIAN FOUR; U; No_Joining_Group + 10FC9; CHORASMIAN TEN; R; No_Joining_Group + 10FCA; CHORASMIAN TWENTY; D; No_Joining_Group + 10FCB; CHORASMIAN ONE HUNDRED; L; No_Joining_Group + + # Kaithi Number Signs + # These are prepended concatenation marks, comparable + # to the number signs in the Arabic script. + # Listed here for consistency in property values. + + 110BD; KAITHI NUMBER SIGN; U; No_Joining_Group + 110CD; KAITHI NUMBER SIGN ABOVE; U; No_Joining_Group + + # Adlam Characters + + 1E900; ADLAM CAPITAL ALIF; D; No_Joining_Group + 1E901; ADLAM CAPITAL DAALI; D; No_Joining_Group + 1E902; ADLAM CAPITAL LAAM; D; No_Joining_Group + 1E903; ADLAM CAPITAL MIIM; D; No_Joining_Group + 1E904; ADLAM CAPITAL BA; D; No_Joining_Group + 1E905; ADLAM CAPITAL SINNYIIYHE; D; No_Joining_Group + 1E906; ADLAM CAPITAL PE; D; No_Joining_Group + 1E907; ADLAM CAPITAL BHE; D; No_Joining_Group + 1E908; ADLAM CAPITAL RA; D; No_Joining_Group + 1E909; ADLAM CAPITAL E; D; No_Joining_Group + 1E90A; ADLAM CAPITAL FA; D; No_Joining_Group + 1E90B; ADLAM CAPITAL I; D; No_Joining_Group + 1E90C; ADLAM CAPITAL O; D; No_Joining_Group + 1E90D; ADLAM CAPITAL DHA; D; No_Joining_Group + 1E90E; ADLAM CAPITAL YHE; D; No_Joining_Group + 1E90F; ADLAM CAPITAL WAW; D; No_Joining_Group + 1E910; ADLAM CAPITAL NUN; D; No_Joining_Group + 1E911; ADLAM CAPITAL KAF; D; No_Joining_Group + 1E912; ADLAM CAPITAL YA; D; No_Joining_Group + 1E913; ADLAM CAPITAL U; D; No_Joining_Group + 1E914; ADLAM CAPITAL JIIM; D; No_Joining_Group + 1E915; ADLAM CAPITAL CHI; D; No_Joining_Group + 1E916; ADLAM CAPITAL HA; D; No_Joining_Group + 1E917; ADLAM CAPITAL QAAF; D; No_Joining_Group + 1E918; ADLAM CAPITAL GA; D; No_Joining_Group + 1E919; ADLAM CAPITAL NYA; D; No_Joining_Group + 1E91A; ADLAM CAPITAL TU; D; No_Joining_Group + 1E91B; ADLAM CAPITAL NHA; D; No_Joining_Group + 1E91C; ADLAM CAPITAL VA; D; No_Joining_Group + 1E91D; ADLAM CAPITAL KHA; D; No_Joining_Group + 1E91E; ADLAM CAPITAL GBE; D; No_Joining_Group + 1E91F; ADLAM CAPITAL ZAL; D; No_Joining_Group + 1E920; ADLAM CAPITAL KPO; D; No_Joining_Group + 1E921; ADLAM CAPITAL SHA; D; No_Joining_Group + 1E922; ADLAM SMALL ALIF; D; No_Joining_Group + 1E923; ADLAM SMALL DAALI; D; No_Joining_Group + 1E924; ADLAM SMALL LAAM; D; No_Joining_Group + 1E925; ADLAM SMALL MIIM; D; No_Joining_Group + 1E926; ADLAM SMALL BA; D; No_Joining_Group + 1E927; ADLAM SMALL SINNYIIYHE; D; No_Joining_Group + 1E928; ADLAM SMALL PE; D; No_Joining_Group + 1E929; ADLAM SMALL BHE; D; No_Joining_Group + 1E92A; ADLAM SMALL RA; D; No_Joining_Group + 1E92B; ADLAM SMALL E; D; No_Joining_Group + 1E92C; ADLAM SMALL FA; D; No_Joining_Group + 1E92D; ADLAM SMALL I; D; No_Joining_Group + 1E92E; ADLAM SMALL O; D; No_Joining_Group + 1E92F; ADLAM SMALL DHA; D; No_Joining_Group + 1E930; ADLAM SMALL YHE; D; No_Joining_Group + 1E931; ADLAM SMALL WAW; D; No_Joining_Group + 1E932; ADLAM SMALL NUN; D; No_Joining_Group + 1E933; ADLAM SMALL KAF; D; No_Joining_Group + 1E934; ADLAM SMALL YA; D; No_Joining_Group + 1E935; ADLAM SMALL U; D; No_Joining_Group + 1E936; ADLAM SMALL JIIM; D; No_Joining_Group + 1E937; ADLAM SMALL CHI; D; No_Joining_Group + 1E938; ADLAM SMALL HA; D; No_Joining_Group + 1E939; ADLAM SMALL QAAF; D; No_Joining_Group + 1E93A; ADLAM SMALL GA; D; No_Joining_Group + 1E93B; ADLAM SMALL NYA; D; No_Joining_Group + 1E93C; ADLAM SMALL TU; D; No_Joining_Group + 1E93D; ADLAM SMALL NHA; D; No_Joining_Group + 1E93E; ADLAM SMALL VA; D; No_Joining_Group + 1E93F; ADLAM SMALL KHA; D; No_Joining_Group + 1E940; ADLAM SMALL GBE; D; No_Joining_Group + 1E941; ADLAM SMALL ZAL; D; No_Joining_Group + 1E942; ADLAM SMALL KPO; D; No_Joining_Group + 1E943; ADLAM SMALL SHA; D; No_Joining_Group + 1E94B; ADLAM NASALIZATION MARK; T; No_Joining_Group + + # EOF +`.trim(); diff --git a/packages/test-utils/src/mock-store/default-files/bidi-brackets.ts b/packages/test-utils/src/mock-store/default-files/bidi-brackets.ts new file mode 100644 index 000000000..b19c7f2ca --- /dev/null +++ b/packages/test-utils/src/mock-store/default-files/bidi-brackets.ts @@ -0,0 +1,197 @@ +import { dedent } from "@luxass/utils"; + +export const defaultBidiBracketsFileContent = dedent` + # BidiBrackets-17.0.0.txt + # Date: 2025-08-01 + # © 2025 Unicode®, Inc. + # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. + # For terms of use and license, see https://www.unicode.org/terms_of_use.html + # + # Unicode Character Database + # For documentation, see https://www.unicode.org/reports/tr44/ + # + # Bidi_Paired_Bracket and Bidi_Paired_Bracket_Type Properties + # + # This file is a normative contributory data file in the Unicode + # Character Database. + # + # Bidi_Paired_Bracket is a normative property + # which establishes a mapping between characters that are treated as + # bracket pairs by the Unicode Bidirectional Algorithm. + # + # Bidi_Paired_Bracket_Type is a normative property + # which classifies characters into opening and closing paired brackets + # for the purposes of the Unicode Bidirectional Algorithm. + # + # This file lists the set of code points with Bidi_Paired_Bracket_Type + # property values Open and Close. The set is derived from the character + # properties General_Category (gc), Bidi_Class (bc), Bidi_Mirrored (Bidi_M), + # and Bidi_Mirroring_Glyph (bmg), as follows: two characters, A and B, + # form a bracket pair if A has gc=Ps and B has gc=Pe, both have bc=ON and + # Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket (bpb) maps A to B and + # vice versa, and their Bidi_Paired_Bracket_Type (bpt) property values are + # Open (o) and Close (c), respectively. + # + # The brackets with ticks U+298D LEFT SQUARE BRACKET WITH TICK IN TOP CORNER + # through U+2990 RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER are paired the + # same way their glyphs form mirror pairs, according to their bmg property + # values. They are not paired on the basis of a diagonal or antidiagonal + # matching of the corner ticks inferred from code point order. + # + # For legacy reasons, the characters U+FD3E ORNATE LEFT PARENTHESIS and + # U+FD3F ORNATE RIGHT PARENTHESIS do not mirror in bidirectional display + # and therefore do not form a bracket pair. + # + # The Unicode property value stability policy guarantees that characters + # which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y. As a result, an + # implementation can optimize the lookup of the Bidi_Paired_Bracket_Type + # property values Open and Close by restricting the processing to characters + # with bc=ON. + # + # The format of the file is three fields separated by a semicolon. + # Field 0: Unicode code point value, represented as a hexadecimal value + # Field 1: Bidi_Paired_Bracket property value, a code point value or + # Field 2: Bidi_Paired_Bracket_Type property value, one of the following: + # o Open + # c Close + # n None + # The names of the characters in field 0 are given in comments at the end + # of each line. + # + # For information on bidirectional paired brackets, see UAX #9: Unicode + # Bidirectional Algorithm, at https://www.unicode.org/reports/tr9/ + # + # This file was originally created by Andrew Glass and Laurentiu Iancu + # for Unicode 6.3. + + 0028; 0029; o # LEFT PARENTHESIS + 0029; 0028; c # RIGHT PARENTHESIS + 005B; 005D; o # LEFT SQUARE BRACKET + 005D; 005B; c # RIGHT SQUARE BRACKET + 007B; 007D; o # LEFT CURLY BRACKET + 007D; 007B; c # RIGHT CURLY BRACKET + 0F3A; 0F3B; o # TIBETAN MARK GUG RTAGS GYON + 0F3B; 0F3A; c # TIBETAN MARK GUG RTAGS GYAS + 0F3C; 0F3D; o # TIBETAN MARK ANG KHANG GYON + 0F3D; 0F3C; c # TIBETAN MARK ANG KHANG GYAS + 169B; 169C; o # OGHAM FEATHER MARK + 169C; 169B; c # OGHAM REVERSED FEATHER MARK + 2045; 2046; o # LEFT SQUARE BRACKET WITH QUILL + 2046; 2045; c # RIGHT SQUARE BRACKET WITH QUILL + 207D; 207E; o # SUPERSCRIPT LEFT PARENTHESIS + 207E; 207D; c # SUPERSCRIPT RIGHT PARENTHESIS + 208D; 208E; o # SUBSCRIPT LEFT PARENTHESIS + 208E; 208D; c # SUBSCRIPT RIGHT PARENTHESIS + 2308; 2309; o # LEFT CEILING + 2309; 2308; c # RIGHT CEILING + 230A; 230B; o # LEFT FLOOR + 230B; 230A; c # RIGHT FLOOR + 2329; 232A; o # LEFT-POINTING ANGLE BRACKET + 232A; 2329; c # RIGHT-POINTING ANGLE BRACKET + 2768; 2769; o # MEDIUM LEFT PARENTHESIS ORNAMENT + 2769; 2768; c # MEDIUM RIGHT PARENTHESIS ORNAMENT + 276A; 276B; o # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT + 276B; 276A; c # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT + 276C; 276D; o # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT + 276D; 276C; c # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT + 276E; 276F; o # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT + 276F; 276E; c # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT + 2770; 2771; o # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT + 2771; 2770; c # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT + 2772; 2773; o # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT + 2773; 2772; c # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT + 2774; 2775; o # MEDIUM LEFT CURLY BRACKET ORNAMENT + 2775; 2774; c # MEDIUM RIGHT CURLY BRACKET ORNAMENT + 27C5; 27C6; o # LEFT S-SHAPED BAG DELIMITER + 27C6; 27C5; c # RIGHT S-SHAPED BAG DELIMITER + 27E6; 27E7; o # MATHEMATICAL LEFT WHITE SQUARE BRACKET + 27E7; 27E6; c # MATHEMATICAL RIGHT WHITE SQUARE BRACKET + 27E8; 27E9; o # MATHEMATICAL LEFT ANGLE BRACKET + 27E9; 27E8; c # MATHEMATICAL RIGHT ANGLE BRACKET + 27EA; 27EB; o # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET + 27EB; 27EA; c # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET + 27EC; 27ED; o # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET + 27ED; 27EC; c # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET + 27EE; 27EF; o # MATHEMATICAL LEFT FLATTENED PARENTHESIS + 27EF; 27EE; c # MATHEMATICAL RIGHT FLATTENED PARENTHESIS + 2983; 2984; o # LEFT WHITE CURLY BRACKET + 2984; 2983; c # RIGHT WHITE CURLY BRACKET + 2985; 2986; o # LEFT WHITE PARENTHESIS + 2986; 2985; c # RIGHT WHITE PARENTHESIS + 2987; 2988; o # Z NOTATION LEFT IMAGE BRACKET + 2988; 2987; c # Z NOTATION RIGHT IMAGE BRACKET + 2989; 298A; o # Z NOTATION LEFT BINDING BRACKET + 298A; 2989; c # Z NOTATION RIGHT BINDING BRACKET + 298B; 298C; o # LEFT SQUARE BRACKET WITH UNDERBAR + 298C; 298B; c # RIGHT SQUARE BRACKET WITH UNDERBAR + 298D; 2990; o # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER + 298E; 298F; c # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + 298F; 298E; o # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + 2990; 298D; c # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER + 2991; 2992; o # LEFT ANGLE BRACKET WITH DOT + 2992; 2991; c # RIGHT ANGLE BRACKET WITH DOT + 2993; 2994; o # LEFT ARC LESS-THAN BRACKET + 2994; 2993; c # RIGHT ARC GREATER-THAN BRACKET + 2995; 2996; o # DOUBLE LEFT ARC GREATER-THAN BRACKET + 2996; 2995; c # DOUBLE RIGHT ARC LESS-THAN BRACKET + 2997; 2998; o # LEFT BLACK TORTOISE SHELL BRACKET + 2998; 2997; c # RIGHT BLACK TORTOISE SHELL BRACKET + 29D8; 29D9; o # LEFT WIGGLY FENCE + 29D9; 29D8; c # RIGHT WIGGLY FENCE + 29DA; 29DB; o # LEFT DOUBLE WIGGLY FENCE + 29DB; 29DA; c # RIGHT DOUBLE WIGGLY FENCE + 29FC; 29FD; o # LEFT-POINTING CURVED ANGLE BRACKET + 29FD; 29FC; c # RIGHT-POINTING CURVED ANGLE BRACKET + 2E22; 2E23; o # TOP LEFT HALF BRACKET + 2E23; 2E22; c # TOP RIGHT HALF BRACKET + 2E24; 2E25; o # BOTTOM LEFT HALF BRACKET + 2E25; 2E24; c # BOTTOM RIGHT HALF BRACKET + 2E26; 2E27; o # LEFT SIDEWAYS U BRACKET + 2E27; 2E26; c # RIGHT SIDEWAYS U BRACKET + 2E28; 2E29; o # LEFT DOUBLE PARENTHESIS + 2E29; 2E28; c # RIGHT DOUBLE PARENTHESIS + 2E55; 2E56; o # LEFT SQUARE BRACKET WITH STROKE + 2E56; 2E55; c # RIGHT SQUARE BRACKET WITH STROKE + 2E57; 2E58; o # LEFT SQUARE BRACKET WITH DOUBLE STROKE + 2E58; 2E57; c # RIGHT SQUARE BRACKET WITH DOUBLE STROKE + 2E59; 2E5A; o # TOP HALF LEFT PARENTHESIS + 2E5A; 2E59; c # TOP HALF RIGHT PARENTHESIS + 2E5B; 2E5C; o # BOTTOM HALF LEFT PARENTHESIS + 2E5C; 2E5B; c # BOTTOM HALF RIGHT PARENTHESIS + 3008; 3009; o # LEFT ANGLE BRACKET + 3009; 3008; c # RIGHT ANGLE BRACKET + 300A; 300B; o # LEFT DOUBLE ANGLE BRACKET + 300B; 300A; c # RIGHT DOUBLE ANGLE BRACKET + 300C; 300D; o # LEFT CORNER BRACKET + 300D; 300C; c # RIGHT CORNER BRACKET + 300E; 300F; o # LEFT WHITE CORNER BRACKET + 300F; 300E; c # RIGHT WHITE CORNER BRACKET + 3010; 3011; o # LEFT BLACK LENTICULAR BRACKET + 3011; 3010; c # RIGHT BLACK LENTICULAR BRACKET + 3014; 3015; o # LEFT TORTOISE SHELL BRACKET + 3015; 3014; c # RIGHT TORTOISE SHELL BRACKET + 3016; 3017; o # LEFT WHITE LENTICULAR BRACKET + 3017; 3016; c # RIGHT WHITE LENTICULAR BRACKET + 3018; 3019; o # LEFT WHITE TORTOISE SHELL BRACKET + 3019; 3018; c # RIGHT WHITE TORTOISE SHELL BRACKET + 301A; 301B; o # LEFT WHITE SQUARE BRACKET + 301B; 301A; c # RIGHT WHITE SQUARE BRACKET + FE59; FE5A; o # SMALL LEFT PARENTHESIS + FE5A; FE59; c # SMALL RIGHT PARENTHESIS + FE5B; FE5C; o # SMALL LEFT CURLY BRACKET + FE5C; FE5B; c # SMALL RIGHT CURLY BRACKET + FE5D; FE5E; o # SMALL LEFT TORTOISE SHELL BRACKET + FE5E; FE5D; c # SMALL RIGHT TORTOISE SHELL BRACKET + FF08; FF09; o # FULLWIDTH LEFT PARENTHESIS + FF09; FF08; c # FULLWIDTH RIGHT PARENTHESIS + FF3B; FF3D; o # FULLWIDTH LEFT SQUARE BRACKET + FF3D; FF3B; c # FULLWIDTH RIGHT SQUARE BRACKET + FF5B; FF5D; o # FULLWIDTH LEFT CURLY BRACKET + FF5D; FF5B; c # FULLWIDTH RIGHT CURLY BRACKET + FF5F; FF60; o # FULLWIDTH LEFT WHITE PARENTHESIS + FF60; FF5F; c # FULLWIDTH RIGHT WHITE PARENTHESIS + FF62; FF63; o # HALFWIDTH LEFT CORNER BRACKET + FF63; FF62; c # HALFWIDTH RIGHT CORNER BRACKET + + # EOF +`.trim(); diff --git a/packages/test-utils/src/mock-store/default-files/derived-bidi-class.ts b/packages/test-utils/src/mock-store/default-files/derived-bidi-class.ts new file mode 100644 index 000000000..a7102948d --- /dev/null +++ b/packages/test-utils/src/mock-store/default-files/derived-bidi-class.ts @@ -0,0 +1,2618 @@ +import { dedent } from "@luxass/utils"; + +export const defaultDerivedBidClassFileContent = dedent` + # DerivedBidiClass-17.0.0.txt + # Date: 2025-07-24, 00:12:44 GMT + # © 2025 Unicode®, Inc. + # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. + # For terms of use and license, see https://www.unicode.org/terms_of_use.html + # + # Unicode Character Database + # For documentation, see https://www.unicode.org/reports/tr44/ + + # ================================================ + + # Bidi Class (listing UnicodeData.txt, field 4: see UAX #44: https://www.unicode.org/reports/tr44/) + # Unlike other properties, unassigned code points in blocks + # reserved for right-to-left scripts are given either values R or AL, + # and unassigned code points in the Currency Symbols block are given the value ET. + # For details see the @missing lines below. + # + # The unassigned code points that default to BN have one of the following properties: + # Default_Ignorable_Code_Point + # Noncharacter_Code_Point + # + # For all other cases: + + # All code points not explicitly listed for Bidi_Class + # have the value Left_To_Right (L). + + # @missing: 0000..10FFFF; Left_To_Right + + # 0590..05FF Hebrew + # @missing: 0590..05FF; Right_To_Left + + # 0600..06FF Arabic + # 0700..074F Syriac + # 0750..077F Arabic_Supplement + # 0780..07BF Thaana + # @missing: 0600..07BF; Arabic_Letter + + # 07C0..07FF NKo + # 0800..083F Samaritan + # 0840..085F Mandaic + # @missing: 07C0..085F; Right_To_Left + + # 0860..086F Syriac_Supplement + # 0870..089F Arabic_Extended_B + # 08A0..08FF Arabic_Extended_A + # @missing: 0860..08FF; Arabic_Letter + + # 20A0..20CF Currency_Symbols + # @missing: 20A0..20CF; European_Terminator + + # FB00..FB4F Alphabetic_Presentation_Forms (partial) + # @missing: FB1D..FB4F; Right_To_Left + + # FB50..FDFF Arabic_Presentation_Forms_A (partial) + # @missing: FB50..FDCF; Arabic_Letter + + # FB50..FDFF Arabic_Presentation_Forms_A (partial) + # @missing: FDF0..FDFF; Arabic_Letter + + # FE70..FEFF Arabic_Presentation_Forms_B + # @missing: FE70..FEFF; Arabic_Letter + + # 10800..1083F Cypriot_Syllabary + # 10840..1085F Imperial_Aramaic + # 10860..1087F Palmyrene + # 10880..108AF Nabataean + # 108E0..108FF Hatran + # 10900..1091F Phoenician + # 10920..1093F Lydian + # 10940..1095F Sidetic + # 10980..1099F Meroitic_Hieroglyphs + # 109A0..109FF Meroitic_Cursive + # 10A00..10A5F Kharoshthi + # 10A60..10A7F Old_South_Arabian + # 10A80..10A9F Old_North_Arabian + # 10AC0..10AFF Manichaean + # 10B00..10B3F Avestan + # 10B40..10B5F Inscriptional_Parthian + # 10B60..10B7F Inscriptional_Pahlavi + # 10B80..10BAF Psalter_Pahlavi + # 10C00..10C4F Old_Turkic + # 10C80..10CFF Old_Hungarian + # @missing: 10800..10CFF; Right_To_Left + + # 10D00..10D3F Hanifi_Rohingya + # @missing: 10D00..10D3F; Arabic_Letter + + # 10D40..10D8F Garay + # 10E60..10E7F Rumi_Numeral_Symbols + # 10E80..10EBF Yezidi + # @missing: 10D40..10EBF; Right_To_Left + + # 10EC0..10EFF Arabic_Extended_C + # @missing: 10EC0..10EFF; Arabic_Letter + + # 10F00..10F2F Old_Sogdian + # @missing: 10F00..10F2F; Right_To_Left + + # 10F30..10F6F Sogdian + # @missing: 10F30..10F6F; Arabic_Letter + + # 10F70..10FAF Old_Uyghur + # 10FB0..10FDF Chorasmian + # 10FE0..10FFF Elymaic + # @missing: 10F70..10FFF; Right_To_Left + + # 1E800..1E8DF Mende_Kikakui + # 1E900..1E95F Adlam + # @missing: 1E800..1EC6F; Right_To_Left + + # 1EC70..1ECBF Indic_Siyaq_Numbers + # @missing: 1EC70..1ECBF; Arabic_Letter + + # @missing: 1ECC0..1ECFF; Right_To_Left + + # 1ED00..1ED4F Ottoman_Siyaq_Numbers + # @missing: 1ED00..1ED4F; Arabic_Letter + + # @missing: 1ED50..1EDFF; Right_To_Left + + # 1EE00..1EEFF Arabic_Mathematical_Alphabetic_Symbols + # @missing: 1EE00..1EEFF; Arabic_Letter + + # @missing: 1EF00..1EFFF; Right_To_Left + + # ================================================ + + # Bidi_Class=Left_To_Right + + 0041..005A ; L # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z + 0061..007A ; L # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z + 00AA ; L # Lo FEMININE ORDINAL INDICATOR + 00B5 ; L # L& MICRO SIGN + 00BA ; L # Lo MASCULINE ORDINAL INDICATOR + 00C0..00D6 ; L # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS + 00D8..00F6 ; L # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS + 00F8..01BA ; L # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL + 01BB ; L # Lo LATIN LETTER TWO WITH STROKE + 01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN + 01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK + 01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL + 0294..0295 ; L # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE + 0296..02AF ; L # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL + 02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y + 02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP + 02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON + 02E0..02E4 ; L # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP + 02EE ; L # Lm MODIFIER LETTER DOUBLE APOSTROPHE + 0370..0373 ; L # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI + 0376..0377 ; L # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA + 037A ; L # Lm GREEK YPOGEGRAMMENI + 037B..037D ; L # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL + 037F ; L # L& GREEK CAPITAL LETTER YOT + 0386 ; L # L& GREEK CAPITAL LETTER ALPHA WITH TONOS + 0388..038A ; L # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS + 038C ; L # L& GREEK CAPITAL LETTER OMICRON WITH TONOS + 038E..03A1 ; L # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO + 03A3..03F5 ; L # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL + 03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA + 0482 ; L # So CYRILLIC THOUSANDS SIGN + 048A..052F ; L # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER + 0531..0556 ; L # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH + 0559 ; L # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING + 055A..055F ; L # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK + 0560..0588 ; L # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE + 0589 ; L # Po ARMENIAN FULL STOP + 0903 ; L # Mc DEVANAGARI SIGN VISARGA + 0904..0939 ; L # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA + 093B ; L # Mc DEVANAGARI VOWEL SIGN OOE + 093D ; L # Lo DEVANAGARI SIGN AVAGRAHA + 093E..0940 ; L # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II + 0949..094C ; L # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU + 094E..094F ; L # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW + 0950 ; L # Lo DEVANAGARI OM + 0958..0961 ; L # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL + 0964..0965 ; L # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA + 0966..096F ; L # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE + 0970 ; L # Po DEVANAGARI ABBREVIATION SIGN + 0971 ; L # Lm DEVANAGARI SIGN HIGH SPACING DOT + 0972..0980 ; L # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI + 0982..0983 ; L # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA + 0985..098C ; L # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L + 098F..0990 ; L # Lo [2] BENGALI LETTER E..BENGALI LETTER AI + 0993..09A8 ; L # Lo [22] BENGALI LETTER O..BENGALI LETTER NA + 09AA..09B0 ; L # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA + 09B2 ; L # Lo BENGALI LETTER LA + 09B6..09B9 ; L # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA + 09BD ; L # Lo BENGALI SIGN AVAGRAHA + 09BE..09C0 ; L # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II + 09C7..09C8 ; L # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI + 09CB..09CC ; L # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU + 09CE ; L # Lo BENGALI LETTER KHANDA TA + 09D7 ; L # Mc BENGALI AU LENGTH MARK + 09DC..09DD ; L # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA + 09DF..09E1 ; L # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL + 09E6..09EF ; L # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE + 09F0..09F1 ; L # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL + 09F4..09F9 ; L # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN + 09FA ; L # So BENGALI ISSHAR + 09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA + 09FD ; L # Po BENGALI ABBREVIATION SIGN + 0A03 ; L # Mc GURMUKHI SIGN VISARGA + 0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU + 0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI + 0A13..0A28 ; L # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA + 0A2A..0A30 ; L # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA + 0A32..0A33 ; L # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA + 0A35..0A36 ; L # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA + 0A38..0A39 ; L # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA + 0A3E..0A40 ; L # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II + 0A59..0A5C ; L # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA + 0A5E ; L # Lo GURMUKHI LETTER FA + 0A66..0A6F ; L # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE + 0A72..0A74 ; L # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR + 0A76 ; L # Po GURMUKHI ABBREVIATION SIGN + 0A83 ; L # Mc GUJARATI SIGN VISARGA + 0A85..0A8D ; L # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E + 0A8F..0A91 ; L # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O + 0A93..0AA8 ; L # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA + 0AAA..0AB0 ; L # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA + 0AB2..0AB3 ; L # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA + 0AB5..0AB9 ; L # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA + 0ABD ; L # Lo GUJARATI SIGN AVAGRAHA + 0ABE..0AC0 ; L # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II + 0AC9 ; L # Mc GUJARATI VOWEL SIGN CANDRA O + 0ACB..0ACC ; L # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU + 0AD0 ; L # Lo GUJARATI OM + 0AE0..0AE1 ; L # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL + 0AE6..0AEF ; L # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE + 0AF0 ; L # Po GUJARATI ABBREVIATION SIGN + 0AF9 ; L # Lo GUJARATI LETTER ZHA + 0B02..0B03 ; L # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA + 0B05..0B0C ; L # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L + 0B0F..0B10 ; L # Lo [2] ORIYA LETTER E..ORIYA LETTER AI + 0B13..0B28 ; L # Lo [22] ORIYA LETTER O..ORIYA LETTER NA + 0B2A..0B30 ; L # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA + 0B32..0B33 ; L # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA + 0B35..0B39 ; L # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA + 0B3D ; L # Lo ORIYA SIGN AVAGRAHA + 0B3E ; L # Mc ORIYA VOWEL SIGN AA + 0B40 ; L # Mc ORIYA VOWEL SIGN II + 0B47..0B48 ; L # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI + 0B4B..0B4C ; L # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU + 0B57 ; L # Mc ORIYA AU LENGTH MARK + 0B5C..0B5D ; L # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA + 0B5F..0B61 ; L # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL + 0B66..0B6F ; L # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE + 0B70 ; L # So ORIYA ISSHAR + 0B71 ; L # Lo ORIYA LETTER WA + 0B72..0B77 ; L # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS + 0B83 ; L # Lo TAMIL SIGN VISARGA + 0B85..0B8A ; L # Lo [6] TAMIL LETTER A..TAMIL LETTER UU + 0B8E..0B90 ; L # Lo [3] TAMIL LETTER E..TAMIL LETTER AI + 0B92..0B95 ; L # Lo [4] TAMIL LETTER O..TAMIL LETTER KA + 0B99..0B9A ; L # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA + 0B9C ; L # Lo TAMIL LETTER JA + 0B9E..0B9F ; L # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA + 0BA3..0BA4 ; L # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA + 0BA8..0BAA ; L # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA + 0BAE..0BB9 ; L # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA + 0BBE..0BBF ; L # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I + 0BC1..0BC2 ; L # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU + 0BC6..0BC8 ; L # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI + 0BCA..0BCC ; L # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU + 0BD0 ; L # Lo TAMIL OM + 0BD7 ; L # Mc TAMIL AU LENGTH MARK + 0BE6..0BEF ; L # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE + 0BF0..0BF2 ; L # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND + 0C01..0C03 ; L # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA + 0C05..0C0C ; L # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L + 0C0E..0C10 ; L # Lo [3] TELUGU LETTER E..TELUGU LETTER AI + 0C12..0C28 ; L # Lo [23] TELUGU LETTER O..TELUGU LETTER NA + 0C2A..0C39 ; L # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA + 0C3D ; L # Lo TELUGU SIGN AVAGRAHA + 0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR + 0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA + 0C5C..0C5D ; L # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU + 0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL + 0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE + 0C77 ; L # Po TELUGU SIGN SIDDHAM + 0C7F ; L # So TELUGU SIGN TUUMU + 0C80 ; L # Lo KANNADA SIGN SPACING CANDRABINDU + 0C82..0C83 ; L # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA + 0C84 ; L # Po KANNADA SIGN SIDDHAM + 0C85..0C8C ; L # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L + 0C8E..0C90 ; L # Lo [3] KANNADA LETTER E..KANNADA LETTER AI + 0C92..0CA8 ; L # Lo [23] KANNADA LETTER O..KANNADA LETTER NA + 0CAA..0CB3 ; L # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA + 0CB5..0CB9 ; L # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA + 0CBD ; L # Lo KANNADA SIGN AVAGRAHA + 0CBE ; L # Mc KANNADA VOWEL SIGN AA + 0CBF ; L # Mn KANNADA VOWEL SIGN I + 0CC0..0CC4 ; L # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR + 0CC6 ; L # Mn KANNADA VOWEL SIGN E + 0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI + 0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO + 0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK + 0CDC..0CDE ; L # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA + 0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL + 0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE + 0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA + 0CF3 ; L # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT + 0D02..0D03 ; L # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA + 0D04..0D0C ; L # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L + 0D0E..0D10 ; L # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI + 0D12..0D3A ; L # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA + 0D3D ; L # Lo MALAYALAM SIGN AVAGRAHA + 0D3E..0D40 ; L # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II + 0D46..0D48 ; L # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI + 0D4A..0D4C ; L # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU + 0D4E ; L # Lo MALAYALAM LETTER DOT REPH + 0D4F ; L # So MALAYALAM SIGN PARA + 0D54..0D56 ; L # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL + 0D57 ; L # Mc MALAYALAM AU LENGTH MARK + 0D58..0D5E ; L # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH + 0D5F..0D61 ; L # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL + 0D66..0D6F ; L # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE + 0D70..0D78 ; L # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS + 0D79 ; L # So MALAYALAM DATE MARK + 0D7A..0D7F ; L # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K + 0D82..0D83 ; L # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA + 0D85..0D96 ; L # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA + 0D9A..0DB1 ; L # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA + 0DB3..0DBB ; L # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA + 0DBD ; L # Lo SINHALA LETTER DANTAJA LAYANNA + 0DC0..0DC6 ; L # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA + 0DCF..0DD1 ; L # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA + 0DD8..0DDF ; L # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA + 0DE6..0DEF ; L # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE + 0DF2..0DF3 ; L # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA + 0DF4 ; L # Po SINHALA PUNCTUATION KUNDDALIYA + 0E01..0E30 ; L # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A + 0E32..0E33 ; L # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM + 0E40..0E45 ; L # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO + 0E46 ; L # Lm THAI CHARACTER MAIYAMOK + 0E4F ; L # Po THAI CHARACTER FONGMAN + 0E50..0E59 ; L # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE + 0E5A..0E5B ; L # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT + 0E81..0E82 ; L # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG + 0E84 ; L # Lo LAO LETTER KHO TAM + 0E86..0E8A ; L # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM + 0E8C..0EA3 ; L # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING + 0EA5 ; L # Lo LAO LETTER LO LOOT + 0EA7..0EB0 ; L # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A + 0EB2..0EB3 ; L # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM + 0EBD ; L # Lo LAO SEMIVOWEL SIGN NYO + 0EC0..0EC4 ; L # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI + 0EC6 ; L # Lm LAO KO LA + 0ED0..0ED9 ; L # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE + 0EDC..0EDF ; L # Lo [4] LAO HO NO..LAO LETTER KHMU NYO + 0F00 ; L # Lo TIBETAN SYLLABLE OM + 0F01..0F03 ; L # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA + 0F04..0F12 ; L # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD + 0F13 ; L # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN + 0F14 ; L # Po TIBETAN MARK GTER TSHEG + 0F15..0F17 ; L # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS + 0F1A..0F1F ; L # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG + 0F20..0F29 ; L # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE + 0F2A..0F33 ; L # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO + 0F34 ; L # So TIBETAN MARK BSDUS RTAGS + 0F36 ; L # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN + 0F38 ; L # So TIBETAN MARK CHE MGO + 0F3E..0F3F ; L # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES + 0F40..0F47 ; L # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA + 0F49..0F6C ; L # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA + 0F7F ; L # Mc TIBETAN SIGN RNAM BCAD + 0F85 ; L # Po TIBETAN MARK PALUTA + 0F88..0F8C ; L # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN + 0FBE..0FC5 ; L # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE + 0FC7..0FCC ; L # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL + 0FCE..0FCF ; L # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM + 0FD0..0FD4 ; L # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA + 0FD5..0FD8 ; L # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS + 0FD9..0FDA ; L # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS + 1000..102A ; L # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU + 102B..102C ; L # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA + 1031 ; L # Mc MYANMAR VOWEL SIGN E + 1038 ; L # Mc MYANMAR SIGN VISARGA + 103B..103C ; L # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA + 103F ; L # Lo MYANMAR LETTER GREAT SA + 1040..1049 ; L # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE + 104A..104F ; L # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE + 1050..1055 ; L # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL + 1056..1057 ; L # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR + 105A..105D ; L # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE + 1061 ; L # Lo MYANMAR LETTER SGAW KAREN SHA + 1062..1064 ; L # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO + 1065..1066 ; L # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA + 1067..106D ; L # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 + 106E..1070 ; L # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA + 1075..1081 ; L # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA + 1083..1084 ; L # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E + 1087..108C ; L # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 + 108E ; L # Lo MYANMAR LETTER RUMAI PALAUNG FA + 108F ; L # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 + 1090..1099 ; L # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE + 109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A + 109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION + 10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE + 10C7 ; L # L& GEORGIAN CAPITAL LETTER YN + 10CD ; L # L& GEORGIAN CAPITAL LETTER AEN + 10D0..10FA ; L # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN + 10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR + 10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR + 10FD..10FF ; L # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN + 1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA + 124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE + 1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO + 1258 ; L # Lo ETHIOPIC SYLLABLE QHWA + 125A..125D ; L # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE + 1260..1288 ; L # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA + 128A..128D ; L # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE + 1290..12B0 ; L # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA + 12B2..12B5 ; L # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE + 12B8..12BE ; L # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO + 12C0 ; L # Lo ETHIOPIC SYLLABLE KXWA + 12C2..12C5 ; L # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE + 12C8..12D6 ; L # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O + 12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA + 1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE + 1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA + 1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR + 1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND + 1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE + 13A0..13F5 ; L # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV + 13F8..13FD ; L # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV + 1401..166C ; L # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA + 166D ; L # So CANADIAN SYLLABICS CHI SIGN + 166E ; L # Po CANADIAN SYLLABICS FULL STOP + 166F..167F ; L # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W + 1681..169A ; L # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH + 16A0..16EA ; L # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X + 16EB..16ED ; L # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION + 16EE..16F0 ; L # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL + 16F1..16F8 ; L # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC + 1700..1711 ; L # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA + 1715 ; L # Mc TAGALOG SIGN PAMUDPOD + 171F..1731 ; L # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA + 1734 ; L # Mc HANUNOO SIGN PAMUDPOD + 1735..1736 ; L # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION + 1740..1751 ; L # Lo [18] BUHID LETTER A..BUHID LETTER HA + 1760..176C ; L # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA + 176E..1770 ; L # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA + 1780..17B3 ; L # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU + 17B6 ; L # Mc KHMER VOWEL SIGN AA + 17BE..17C5 ; L # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU + 17C7..17C8 ; L # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU + 17D4..17D6 ; L # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH + 17D7 ; L # Lm KHMER SIGN LEK TOO + 17D8..17DA ; L # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT + 17DC ; L # Lo KHMER SIGN AVAKRAHASANYA + 17E0..17E9 ; L # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE + 1810..1819 ; L # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE + 1820..1842 ; L # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI + 1843 ; L # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN + 1844..1878 ; L # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS + 1880..1884 ; L # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA + 1887..18A8 ; L # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA + 18AA ; L # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA + 18B0..18F5 ; L # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S + 1900..191E ; L # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA + 1923..1926 ; L # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU + 1929..192B ; L # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA + 1930..1931 ; L # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA + 1933..1938 ; L # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA + 1946..194F ; L # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE + 1950..196D ; L # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI + 1970..1974 ; L # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 + 1980..19AB ; L # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA + 19B0..19C9 ; L # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 + 19D0..19D9 ; L # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE + 19DA ; L # No NEW TAI LUE THAM DIGIT ONE + 1A00..1A16 ; L # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA + 1A19..1A1A ; L # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O + 1A1E..1A1F ; L # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION + 1A20..1A54 ; L # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA + 1A55 ; L # Mc TAI THAM CONSONANT SIGN MEDIAL RA + 1A57 ; L # Mc TAI THAM CONSONANT SIGN LA TANG LAI + 1A61 ; L # Mc TAI THAM VOWEL SIGN A + 1A63..1A64 ; L # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA + 1A6D..1A72 ; L # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI + 1A80..1A89 ; L # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE + 1A90..1A99 ; L # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE + 1AA0..1AA6 ; L # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA + 1AA7 ; L # Lm TAI THAM SIGN MAI YAMOK + 1AA8..1AAD ; L # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG + 1B04 ; L # Mc BALINESE SIGN BISAH + 1B05..1B33 ; L # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA + 1B35 ; L # Mc BALINESE VOWEL SIGN TEDUNG + 1B3B ; L # Mc BALINESE VOWEL SIGN RA REPA TEDUNG + 1B3D..1B41 ; L # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG + 1B43..1B44 ; L # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG + 1B45..1B4C ; L # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA + 1B4E..1B4F ; L # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN + 1B50..1B59 ; L # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE + 1B5A..1B60 ; L # Po [7] BALINESE PANTI..BALINESE PAMENENG + 1B61..1B6A ; L # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE + 1B74..1B7C ; L # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING + 1B7D..1B7F ; L # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK + 1B82 ; L # Mc SUNDANESE SIGN PANGWISAD + 1B83..1BA0 ; L # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA + 1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL + 1BA6..1BA7 ; L # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG + 1BAA ; L # Mc SUNDANESE SIGN PAMAAEH + 1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA + 1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE + 1BBA..1BE5 ; L # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U + 1BE7 ; L # Mc BATAK VOWEL SIGN E + 1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O + 1BEE ; L # Mc BATAK VOWEL SIGN U + 1BF2..1BF3 ; L # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN + 1BFC..1BFF ; L # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT + 1C00..1C23 ; L # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A + 1C24..1C2B ; L # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU + 1C34..1C35 ; L # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG + 1C3B..1C3F ; L # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK + 1C40..1C49 ; L # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE + 1C4D..1C4F ; L # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA + 1C50..1C59 ; L # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE + 1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH + 1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD + 1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD + 1C80..1C8A ; L # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE + 1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN + 1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN + 1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA + 1CD3 ; L # Po VEDIC SIGN NIHSHVASA + 1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA + 1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL + 1CEE..1CF3 ; L # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA + 1CF5..1CF6 ; L # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA + 1CF7 ; L # Mc VEDIC SIGN ATIKRAMA + 1CFA ; L # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA + 1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL + 1D2C..1D6A ; L # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI + 1D6B..1D77 ; L # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G + 1D78 ; L # Lm MODIFIER LETTER CYRILLIC EN + 1D79..1D9A ; L # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK + 1D9B..1DBF ; L # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA + 1E00..1F15 ; L # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA + 1F18..1F1D ; L # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + 1F20..1F45 ; L # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA + 1F48..1F4D ; L # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + 1F50..1F57 ; L # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI + 1F59 ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA + 1F5B ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + 1F5D ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + 1F5F..1F7D ; L # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA + 1F80..1FB4 ; L # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + 1FB6..1FBC ; L # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + 1FBE ; L # L& GREEK PROSGEGRAMMENI + 1FC2..1FC4 ; L # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + 1FC6..1FCC ; L # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + 1FD0..1FD3 ; L # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + 1FD6..1FDB ; L # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA + 1FE0..1FEC ; L # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA + 1FF2..1FF4 ; L # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + 1FF6..1FFC ; L # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + 200E ; L # Cf LEFT-TO-RIGHT MARK + 2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I + 207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N + 2090..209C ; L # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T + 2102 ; L # L& DOUBLE-STRUCK CAPITAL C + 2107 ; L # L& EULER CONSTANT + 210A..2113 ; L # L& [10] SCRIPT SMALL G..SCRIPT SMALL L + 2115 ; L # L& DOUBLE-STRUCK CAPITAL N + 2119..211D ; L # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R + 2124 ; L # L& DOUBLE-STRUCK CAPITAL Z + 2126 ; L # L& OHM SIGN + 2128 ; L # L& BLACK-LETTER CAPITAL Z + 212A..212D ; L # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C + 212F..2134 ; L # L& [6] SCRIPT SMALL E..SCRIPT SMALL O + 2135..2138 ; L # Lo [4] ALEF SYMBOL..DALET SYMBOL + 2139 ; L # L& INFORMATION SOURCE + 213C..213F ; L # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI + 2145..2149 ; L # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J + 214E ; L # L& TURNED SMALL F + 214F ; L # So SYMBOL FOR SAMARITAN SOURCE + 2160..2182 ; L # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND + 2183..2184 ; L # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C + 2185..2188 ; L # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND + 2336..237A ; L # So [69] APL FUNCTIONAL SYMBOL I-BEAM..APL FUNCTIONAL SYMBOL ALPHA + 2395 ; L # So APL FUNCTIONAL SYMBOL QUAD + 249C..24E9 ; L # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z + 26AC ; L # So MEDIUM SMALL WHITE CIRCLE + 2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 + 2C00..2C7B ; L # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E + 2C7C..2C7D ; L # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V + 2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI + 2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA + 2CF2..2CF3 ; L # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI + 2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE + 2D27 ; L # L& GEORGIAN SMALL LETTER YN + 2D2D ; L # L& GEORGIAN SMALL LETTER AEN + 2D30..2D67 ; L # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO + 2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK + 2D70 ; L # Po TIFINAGH SEPARATOR MARK + 2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE + 2DA0..2DA6 ; L # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO + 2DA8..2DAE ; L # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO + 2DB0..2DB6 ; L # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO + 2DB8..2DBE ; L # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO + 2DC0..2DC6 ; L # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO + 2DC8..2DCE ; L # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO + 2DD0..2DD6 ; L # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO + 2DD8..2DDE ; L # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO + 3005 ; L # Lm IDEOGRAPHIC ITERATION MARK + 3006 ; L # Lo IDEOGRAPHIC CLOSING MARK + 3007 ; L # Nl IDEOGRAPHIC NUMBER ZERO + 3021..3029 ; L # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE + 302E..302F ; L # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK + 3031..3035 ; L # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF + 3038..303A ; L # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY + 303B ; L # Lm VERTICAL IDEOGRAPHIC ITERATION MARK + 303C ; L # Lo MASU MARK + 3041..3096 ; L # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE + 309D..309E ; L # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK + 309F ; L # Lo HIRAGANA DIGRAPH YORI + 30A1..30FA ; L # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO + 30FC..30FE ; L # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK + 30FF ; L # Lo KATAKANA DIGRAPH KOTO + 3105..312F ; L # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN + 3131..318E ; L # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE + 3190..3191 ; L # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK + 3192..3195 ; L # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK + 3196..319F ; L # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK + 31A0..31BF ; L # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH + 31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO + 3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U + 3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN + 322A..3247 ; L # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO + 3248..324F ; L # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE + 3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A + 327F ; L # So KOREAN STANDARD SYMBOL + 3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN + 328A..32B0 ; L # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT + 32C0..32CB ; L # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER + 32D0..3376 ; L # So [167] CIRCLED KATAKANA A..SQUARE PC + 337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB + 33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE + 3400..4DBF ; L # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF + 4E00..A014 ; L # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E + A015 ; L # Lm YI SYLLABLE WU + A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR + A4D0..A4F7 ; L # Lo [40] LISU LETTER BA..LISU LETTER OE + A4F8..A4FD ; L # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU + A4FE..A4FF ; L # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP + A500..A60B ; L # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG + A60C ; L # Lm VAI SYLLABLE LENGTHENER + A610..A61F ; L # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG + A620..A629 ; L # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE + A62A..A62B ; L # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO + A640..A66D ; L # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O + A66E ; L # Lo CYRILLIC LETTER MULTIOCULAR O + A680..A69B ; L # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O + A69C..A69D ; L # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN + A6A0..A6E5 ; L # Lo [70] BAMUM LETTER A..BAMUM LETTER KI + A6E6..A6EF ; L # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM + A6F2..A6F7 ; L # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK + A722..A76F ; L # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON + A770 ; L # Lm MODIFIER LETTER US + A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T + A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN + A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT + A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT + A790..A7DC ; L # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE + A7F1..A7F4 ; L # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q + A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H + A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I + A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE + A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M + A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I + A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O + A807..A80A ; L # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO + A80C..A822 ; L # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO + A823..A824 ; L # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I + A827 ; L # Mc SYLOTI NAGRI VOWEL SIGN OO + A830..A835 ; L # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS + A836..A837 ; L # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK + A840..A873 ; L # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU + A880..A881 ; L # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA + A882..A8B3 ; L # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA + A8B4..A8C3 ; L # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU + A8CE..A8CF ; L # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA + A8D0..A8D9 ; L # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE + A8F2..A8F7 ; L # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA + A8F8..A8FA ; L # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET + A8FB ; L # Lo DEVANAGARI HEADSTROKE + A8FC ; L # Po DEVANAGARI SIGN SIDDHAM + A8FD..A8FE ; L # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY + A900..A909 ; L # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE + A90A..A925 ; L # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO + A92E..A92F ; L # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA + A930..A946 ; L # Lo [23] REJANG LETTER KA..REJANG LETTER A + A952..A953 ; L # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA + A95F ; L # Po REJANG SECTION MARK + A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH + A983 ; L # Mc JAVANESE SIGN WIGNYAN + A984..A9B2 ; L # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA + A9B4..A9B5 ; L # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG + A9BA..A9BB ; L # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE + A9BE..A9C0 ; L # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON + A9C1..A9CD ; L # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH + A9CF ; L # Lm JAVANESE PANGRANGKEP + A9D0..A9D9 ; L # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE + A9DE..A9DF ; L # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN + A9E0..A9E4 ; L # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA + A9E6 ; L # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION + A9E7..A9EF ; L # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA + A9F0..A9F9 ; L # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE + A9FA..A9FE ; L # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA + AA00..AA28 ; L # Lo [41] CHAM LETTER A..CHAM LETTER HA + AA2F..AA30 ; L # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI + AA33..AA34 ; L # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA + AA40..AA42 ; L # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG + AA44..AA4B ; L # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS + AA4D ; L # Mc CHAM CONSONANT SIGN FINAL H + AA50..AA59 ; L # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE + AA5C..AA5F ; L # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA + AA60..AA6F ; L # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA + AA70 ; L # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION + AA71..AA76 ; L # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM + AA77..AA79 ; L # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO + AA7A ; L # Lo MYANMAR LETTER AITON RA + AA7B ; L # Mc MYANMAR SIGN PAO KAREN TONE + AA7D ; L # Mc MYANMAR SIGN TAI LAING TONE-5 + AA7E..AAAF ; L # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O + AAB1 ; L # Lo TAI VIET VOWEL AA + AAB5..AAB6 ; L # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O + AAB9..AABD ; L # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN + AAC0 ; L # Lo TAI VIET TONE MAI NUENG + AAC2 ; L # Lo TAI VIET TONE MAI SONG + AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG + AADD ; L # Lm TAI VIET SYMBOL SAM + AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI + AAE0..AAEA ; L # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA + AAEB ; L # Mc MEETEI MAYEK VOWEL SIGN II + AAEE..AAEF ; L # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU + AAF0..AAF1 ; L # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM + AAF2 ; L # Lo MEETEI MAYEK ANJI + AAF3..AAF4 ; L # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK + AAF5 ; L # Mc MEETEI MAYEK VOWEL SIGN VISARGA + AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO + AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO + AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO + AB20..AB26 ; L # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO + AB28..AB2E ; L # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO + AB30..AB5A ; L # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG + AB5B ; L # Sk MODIFIER BREVE WITH INVERTED BREVE + AB5C..AB5F ; L # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK + AB60..AB68 ; L # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE + AB69 ; L # Lm MODIFIER LETTER SMALL TURNED W + AB70..ABBF ; L # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA + ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM + ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP + ABE6..ABE7 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP + ABE9..ABEA ; L # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG + ABEB ; L # Po MEETEI MAYEK CHEIKHEI + ABEC ; L # Mc MEETEI MAYEK LUM IYEK + ABF0..ABF9 ; L # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE + AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH + D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E + D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH + E000..F8FF ; L # Co [6400] .. + F900..FA6D ; L # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D + FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 + FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST + FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH + FF21..FF3A ; L # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z + FF41..FF5A ; L # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z + FF66..FF6F ; L # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU + FF70 ; L # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + FF71..FF9D ; L # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N + FF9E..FF9F ; L # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + FFA0..FFBE ; L # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH + FFC2..FFC7 ; L # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E + FFCA..FFCF ; L # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE + FFD2..FFD7 ; L # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU + FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I + 10000..1000B ; L # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE + 1000D..10026 ; L # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO + 10028..1003A ; L # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO + 1003C..1003D ; L # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE + 1003F..1004D ; L # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO + 10050..1005D ; L # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 + 10080..100FA ; L # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 + 10100 ; L # Po AEGEAN WORD SEPARATOR LINE + 10102 ; L # Po AEGEAN CHECK MARK + 10107..10133 ; L # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND + 10137..1013F ; L # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT + 1018D..1018E ; L # So [2] GREEK INDICTION SIGN..NOMISMA SIGN + 101D0..101FC ; L # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND + 10280..1029C ; L # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X + 102A0..102D0 ; L # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 + 10300..1031F ; L # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS + 10320..10323 ; L # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY + 1032D..10340 ; L # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA + 10341 ; L # Nl GOTHIC LETTER NINETY + 10342..10349 ; L # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL + 1034A ; L # Nl GOTHIC LETTER NINE HUNDRED + 10350..10375 ; L # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA + 10380..1039D ; L # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU + 1039F ; L # Po UGARITIC WORD DIVIDER + 103A0..103C3 ; L # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA + 103C8..103CF ; L # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH + 103D0 ; L # Po OLD PERSIAN WORD DIVIDER + 103D1..103D5 ; L # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED + 10400..1044F ; L # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW + 10450..1049D ; L # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO + 104A0..104A9 ; L # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE + 104B0..104D3 ; L # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA + 104D8..104FB ; L # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA + 10500..10527 ; L # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE + 10530..10563 ; L # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW + 1056F ; L # Po CAUCASIAN ALBANIAN CITATION MARK + 10570..1057A ; L # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA + 1057C..1058A ; L # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE + 1058C..10592 ; L # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE + 10594..10595 ; L # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE + 10597..105A1 ; L # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA + 105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE + 105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE + 105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE + 105C0..105F3 ; L # Lo [52] TODHRI LETTER A..TODHRI LETTER OO + 10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 + 10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE + 10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 + 10780..10785 ; L # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK + 10787..107B0 ; L # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK + 107B2..107BA ; L # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL + 11000 ; L # Mc BRAHMI SIGN CANDRABINDU + 11002 ; L # Mc BRAHMI SIGN VISARGA + 11003..11037 ; L # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA + 11047..1104D ; L # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS + 11066..1106F ; L # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE + 11071..11072 ; L # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O + 11075 ; L # Lo BRAHMI LETTER OLD TAMIL LLA + 11082 ; L # Mc KAITHI SIGN VISARGA + 11083..110AF ; L # Lo [45] KAITHI LETTER A..KAITHI LETTER HA + 110B0..110B2 ; L # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II + 110B7..110B8 ; L # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU + 110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN + 110BD ; L # Cf KAITHI NUMBER SIGN + 110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA + 110CD ; L # Cf KAITHI NUMBER SIGN ABOVE + 110D0..110E8 ; L # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE + 110F0..110F9 ; L # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE + 11103..11126 ; L # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA + 1112C ; L # Mc CHAKMA VOWEL SIGN E + 11136..1113F ; L # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE + 11140..11143 ; L # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK + 11144 ; L # Lo CHAKMA LETTER LHAA + 11145..11146 ; L # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI + 11147 ; L # Lo CHAKMA LETTER VAA + 11150..11172 ; L # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA + 11174..11175 ; L # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK + 11176 ; L # Lo MAHAJANI LIGATURE SHRI + 11182 ; L # Mc SHARADA SIGN VISARGA + 11183..111B2 ; L # Lo [48] SHARADA LETTER A..SHARADA LETTER HA + 111B3..111B5 ; L # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II + 111BF..111C0 ; L # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA + 111C1..111C4 ; L # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM + 111C5..111C8 ; L # Po [4] SHARADA DANDA..SHARADA SEPARATOR + 111CD ; L # Po SHARADA SUTRA MARK + 111CE ; L # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E + 111D0..111D9 ; L # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE + 111DA ; L # Lo SHARADA EKAM + 111DB ; L # Po SHARADA SIGN SIDDHAM + 111DC ; L # Lo SHARADA HEADSTROKE + 111DD..111DF ; L # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 + 111E1..111F4 ; L # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND + 11200..11211 ; L # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA + 11213..1122B ; L # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA + 1122C..1122E ; L # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II + 11232..11233 ; L # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU + 11235 ; L # Mc KHOJKI SIGN VIRAMA + 11238..1123D ; L # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN + 1123F..11240 ; L # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I + 11280..11286 ; L # Lo [7] MULTANI LETTER A..MULTANI LETTER GA + 11288 ; L # Lo MULTANI LETTER GHA + 1128A..1128D ; L # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA + 1128F..1129D ; L # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA + 1129F..112A8 ; L # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA + 112A9 ; L # Po MULTANI SECTION MARK + 112B0..112DE ; L # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA + 112E0..112E2 ; L # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II + 112F0..112F9 ; L # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE + 11302..11303 ; L # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA + 11305..1130C ; L # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L + 1130F..11310 ; L # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI + 11313..11328 ; L # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA + 1132A..11330 ; L # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA + 11332..11333 ; L # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA + 11335..11339 ; L # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA + 1133D ; L # Lo GRANTHA SIGN AVAGRAHA + 1133E..1133F ; L # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I + 11341..11344 ; L # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR + 11347..11348 ; L # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI + 1134B..1134D ; L # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA + 11350 ; L # Lo GRANTHA OM + 11357 ; L # Mc GRANTHA AU LENGTH MARK + 1135D..11361 ; L # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL + 11362..11363 ; L # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL + 11380..11389 ; L # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL + 1138B ; L # Lo TULU-TIGALARI LETTER EE + 1138E ; L # Lo TULU-TIGALARI LETTER AI + 11390..113B5 ; L # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA + 113B7 ; L # Lo TULU-TIGALARI SIGN AVAGRAHA + 113B8..113BA ; L # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II + 113C2 ; L # Mc TULU-TIGALARI VOWEL SIGN EE + 113C5 ; L # Mc TULU-TIGALARI VOWEL SIGN AI + 113C7..113CA ; L # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA + 113CC..113CD ; L # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA + 113CF ; L # Mc TULU-TIGALARI SIGN LOOPED VIRAMA + 113D1 ; L # Lo TULU-TIGALARI REPHA + 113D3 ; L # Lo TULU-TIGALARI SIGN PLUTA + 113D4..113D5 ; L # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA + 113D7..113D8 ; L # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA + 11400..11434 ; L # Lo [53] NEWA LETTER A..NEWA LETTER HA + 11435..11437 ; L # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II + 11440..11441 ; L # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU + 11445 ; L # Mc NEWA SIGN VISARGA + 11447..1144A ; L # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI + 1144B..1144F ; L # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN + 11450..11459 ; L # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE + 1145A..1145B ; L # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK + 1145D ; L # Po NEWA INSERTION SIGN + 1145F..11461 ; L # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA + 11480..114AF ; L # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA + 114B0..114B2 ; L # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II + 114B9 ; L # Mc TIRHUTA VOWEL SIGN E + 114BB..114BE ; L # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU + 114C1 ; L # Mc TIRHUTA SIGN VISARGA + 114C4..114C5 ; L # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG + 114C6 ; L # Po TIRHUTA ABBREVIATION SIGN + 114C7 ; L # Lo TIRHUTA OM + 114D0..114D9 ; L # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE + 11580..115AE ; L # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA + 115AF..115B1 ; L # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II + 115B8..115BB ; L # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU + 115BE ; L # Mc SIDDHAM SIGN VISARGA + 115C1..115D7 ; L # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES + 115D8..115DB ; L # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U + 11600..1162F ; L # Lo [48] MODI LETTER A..MODI LETTER LLA + 11630..11632 ; L # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II + 1163B..1163C ; L # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU + 1163E ; L # Mc MODI SIGN VISARGA + 11641..11643 ; L # Po [3] MODI DANDA..MODI ABBREVIATION SIGN + 11644 ; L # Lo MODI SIGN HUVA + 11650..11659 ; L # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE + 11680..116AA ; L # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA + 116AC ; L # Mc TAKRI SIGN VISARGA + 116AE..116AF ; L # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II + 116B6 ; L # Mc TAKRI SIGN VIRAMA + 116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA + 116B9 ; L # Po TAKRI ABBREVIATION SIGN + 116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE + 116D0..116E3 ; L # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE + 11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA + 1171E ; L # Mc AHOM CONSONANT SIGN MEDIAL RA + 11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA + 11726 ; L # Mc AHOM VOWEL SIGN E + 11730..11739 ; L # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE + 1173A..1173B ; L # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY + 1173C..1173E ; L # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI + 1173F ; L # So AHOM SYMBOL VI + 11740..11746 ; L # Lo [7] AHOM LETTER CA..AHOM LETTER LLA + 11800..1182B ; L # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA + 1182C..1182E ; L # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II + 11838 ; L # Mc DOGRA SIGN VISARGA + 1183B ; L # Po DOGRA ABBREVIATION SIGN + 118A0..118DF ; L # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO + 118E0..118E9 ; L # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE + 118EA..118F2 ; L # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY + 118FF..11906 ; L # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E + 11909 ; L # Lo DIVES AKURU LETTER O + 1190C..11913 ; L # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA + 11915..11916 ; L # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA + 11918..1192F ; L # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA + 11930..11935 ; L # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E + 11937..11938 ; L # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O + 1193D ; L # Mc DIVES AKURU SIGN HALANTA + 1193F ; L # Lo DIVES AKURU PREFIXED NASAL SIGN + 11940 ; L # Mc DIVES AKURU MEDIAL YA + 11941 ; L # Lo DIVES AKURU INITIAL RA + 11942 ; L # Mc DIVES AKURU MEDIAL RA + 11944..11946 ; L # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK + 11950..11959 ; L # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE + 119A0..119A7 ; L # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR + 119AA..119D0 ; L # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA + 119D1..119D3 ; L # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II + 119DC..119DF ; L # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA + 119E1 ; L # Lo NANDINAGARI SIGN AVAGRAHA + 119E2 ; L # Po NANDINAGARI SIGN SIDDHAM + 119E3 ; L # Lo NANDINAGARI HEADSTROKE + 119E4 ; L # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E + 11A00 ; L # Lo ZANABAZAR SQUARE LETTER A + 11A07..11A08 ; L # Mn [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU + 11A0B..11A32 ; L # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA + 11A39 ; L # Mc ZANABAZAR SQUARE SIGN VISARGA + 11A3A ; L # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA + 11A3F..11A46 ; L # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK + 11A50 ; L # Lo SOYOMBO LETTER A + 11A57..11A58 ; L # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU + 11A5C..11A89 ; L # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA + 11A97 ; L # Mc SOYOMBO SIGN VISARGA + 11A9A..11A9C ; L # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD + 11A9D ; L # Lo SOYOMBO MARK PLUTA + 11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 + 11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL + 11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU + 11B61 ; L # Mc SHARADA VOWEL SIGN OOE + 11B65 ; L # Mc SHARADA VOWEL SIGN SHORT O + 11B67 ; L # Mc SHARADA VOWEL SIGN CANDRA O + 11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO + 11BE1 ; L # Po SUNUWAR SIGN PVO + 11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE + 11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L + 11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA + 11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA + 11C3E ; L # Mc BHAIKSUKI SIGN VISARGA + 11C3F ; L # Mn BHAIKSUKI SIGN VIRAMA + 11C40 ; L # Lo BHAIKSUKI SIGN AVAGRAHA + 11C41..11C45 ; L # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 + 11C50..11C59 ; L # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE + 11C5A..11C6C ; L # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK + 11C70..11C71 ; L # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD + 11C72..11C8F ; L # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A + 11CA9 ; L # Mc MARCHEN SUBJOINED LETTER YA + 11CB1 ; L # Mc MARCHEN VOWEL SIGN I + 11CB4 ; L # Mc MARCHEN VOWEL SIGN O + 11D00..11D06 ; L # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E + 11D08..11D09 ; L # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O + 11D0B..11D30 ; L # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA + 11D46 ; L # Lo MASARAM GONDI REPHA + 11D50..11D59 ; L # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE + 11D60..11D65 ; L # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU + 11D67..11D68 ; L # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI + 11D6A..11D89 ; L # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA + 11D8A..11D8E ; L # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU + 11D93..11D94 ; L # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU + 11D96 ; L # Mc GUNJALA GONDI SIGN VISARGA + 11D98 ; L # Lo GUNJALA GONDI OM + 11DA0..11DA9 ; L # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE + 11DB0..11DD8 ; L # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH + 11DD9 ; L # Lm TOLONG SIKI SIGN SELA + 11DDA..11DDB ; L # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA + 11DE0..11DE9 ; L # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE + 11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA + 11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O + 11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION + 11F02 ; L # Lo KAWI SIGN REPHA + 11F03 ; L # Mc KAWI SIGN VISARGA + 11F04..11F10 ; L # Lo [13] KAWI LETTER A..KAWI LETTER O + 11F12..11F33 ; L # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA + 11F34..11F35 ; L # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA + 11F3E..11F3F ; L # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI + 11F41 ; L # Mc KAWI SIGN KILLER + 11F43..11F4F ; L # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL + 11F50..11F59 ; L # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE + 11FB0 ; L # Lo LISU LETTER YHA + 11FC0..11FD4 ; L # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH + 11FFF ; L # Po TAMIL PUNCTUATION END OF TEXT + 12000..12399 ; L # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U + 12400..1246E ; L # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM + 12470..12474 ; L # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON + 12480..12543 ; L # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU + 12F90..12FF0 ; L # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 + 12FF1..12FF2 ; L # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 + 13000..1342F ; L # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D + 13430..1343F ; L # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE + 13441..13446 ; L # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN + 13460..143FA ; L # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA + 14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 + 16100..1611D ; L # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA + 1612A..1612C ; L # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA + 16130..16139 ; L # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE + 16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ + 16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK + 16A60..16A69 ; L # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE + 16A6E..16A6F ; L # Po [2] MRO DANDA..MRO DOUBLE DANDA + 16A70..16ABE ; L # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA + 16AC0..16AC9 ; L # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE + 16AD0..16AED ; L # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I + 16AF5 ; L # Po BASSA VAH FULL STOP + 16B00..16B2F ; L # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU + 16B37..16B3B ; L # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM + 16B3C..16B3F ; L # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB + 16B40..16B43 ; L # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM + 16B44 ; L # Po PAHAWH HMONG SIGN XAUS + 16B45 ; L # So PAHAWH HMONG SIGN CIM TSOV ROG + 16B50..16B59 ; L # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE + 16B5B..16B61 ; L # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS + 16B63..16B77 ; L # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS + 16B7D..16B8F ; L # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ + 16D40..16D42 ; L # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA + 16D43..16D6A ; L # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU + 16D6B..16D6C ; L # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT + 16D6D..16D6F ; L # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA + 16D70..16D79 ; L # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE + 16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y + 16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM + 16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH + 16EA0..16EB8 ; L # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY + 16EBB..16ED3 ; L # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY + 16F00..16F4A ; L # Lo [75] MIAO LETTER PA..MIAO LETTER RTE + 16F50 ; L # Lo MIAO LETTER NASALIZATION + 16F51..16F87 ; L # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI + 16F93..16F9F ; L # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 + 16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK + 16FE3 ; L # Lm OLD CHINESE ITERATION MARK + 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY + 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER + 16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS + 17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 + 18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E + 18D80..18DF2 ; L # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 + 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 + 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 + 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 + 1B000..1B122 ; L # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU + 1B132 ; L # Lo HIRAGANA LETTER SMALL KO + 1B150..1B152 ; L # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO + 1B155 ; L # Lo KATAKANA LETTER SMALL KO + 1B164..1B167 ; L # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N + 1B170..1B2FB ; L # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB + 1BC00..1BC6A ; L # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M + 1BC70..1BC7C ; L # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK + 1BC80..1BC88 ; L # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL + 1BC90..1BC99 ; L # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW + 1BC9C ; L # So DUPLOYAN SIGN O WITH CROSS + 1BC9F ; L # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP + 1CCD6..1CCEF ; L # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z + 1CF50..1CFC3 ; L # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK + 1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO + 1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 + 1D129..1D164 ; L # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE + 1D165..1D166 ; L # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM + 1D16A..1D16C ; L # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 + 1D16D..1D172 ; L # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 + 1D183..1D184 ; L # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN + 1D18C..1D1A9 ; L # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH + 1D1AE..1D1E8 ; L # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN + 1D2C0..1D2D3 ; L # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN + 1D2E0..1D2F3 ; L # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN + 1D360..1D378 ; L # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE + 1D400..1D454 ; L # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G + 1D456..1D49C ; L # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A + 1D49E..1D49F ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D + 1D4A2 ; L # L& MATHEMATICAL SCRIPT CAPITAL G + 1D4A5..1D4A6 ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K + 1D4A9..1D4AC ; L # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q + 1D4AE..1D4B9 ; L # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D + 1D4BB ; L # L& MATHEMATICAL SCRIPT SMALL F + 1D4BD..1D4C3 ; L # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N + 1D4C5..1D505 ; L # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B + 1D507..1D50A ; L # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G + 1D50D..1D514 ; L # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q + 1D516..1D51C ; L # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y + 1D51E..1D539 ; L # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B + 1D53B..1D53E ; L # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G + 1D540..1D544 ; L # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M + 1D546 ; L # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O + 1D54A..1D550 ; L # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y + 1D552..1D6A5 ; L # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J + 1D6A8..1D6C0 ; L # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA + 1D6C2..1D6DA ; L # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA + 1D6DC..1D6FA ; L # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA + 1D6FC..1D714 ; L # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA + 1D716..1D734 ; L # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA + 1D736..1D74E ; L # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA + 1D750..1D76E ; L # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA + 1D770..1D788 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA + 1D78A..1D7A8 ; L # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA + 1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA + 1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA + 1D800..1D9FF ; L # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD + 1DA37..1DA3A ; L # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE + 1DA6D..1DA74 ; L # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING + 1DA76..1DA83 ; L # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH + 1DA85..1DA86 ; L # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS + 1DA87..1DA8B ; L # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS + 1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK + 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK + 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL + 1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK + 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE + 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W + 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER + 1E140..1E149 ; L # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE + 1E14E ; L # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ + 1E14F ; L # So NYIAKENG PUACHUE HMONG CIRCLED CA + 1E290..1E2AD ; L # Lo [30] TOTO LETTER PA..TOTO LETTER A + 1E2C0..1E2EB ; L # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH + 1E2F0..1E2F9 ; L # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE + 1E4D0..1E4EA ; L # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL + 1E4EB ; L # Lm NAG MUNDARI SIGN OJOD + 1E4F0..1E4F9 ; L # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE + 1E5D0..1E5ED ; L # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG + 1E5F0 ; L # Lo OL ONAL SIGN HODDOND + 1E5F1..1E5FA ; L # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE + 1E5FF ; L # Po OL ONAL ABBREVIATION SIGN + 1E6C0..1E6DE ; L # Lo [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO + 1E6E0..1E6E2 ; L # Lo [3] TAI YO LETTER AA..TAI YO LETTER UE + 1E6E4..1E6E5 ; L # Lo [2] TAI YO LETTER U..TAI YO LETTER AE + 1E6E7..1E6ED ; L # Lo [7] TAI YO LETTER O..TAI YO LETTER AUE + 1E6F0..1E6F4 ; L # Lo [5] TAI YO LETTER AN..TAI YO LETTER AP + 1E6FE ; L # Lo TAI YO SYMBOL MUEANG + 1E6FF ; L # Lm TAI YO XAM LAI + 1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO + 1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE + 1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE + 1E7F0..1E7FE ; L # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE + 1F110..1F12E ; L # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ + 1F130..1F169 ; L # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z + 1F170..1F1AC ; L # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD + 1F1E6..1F202 ; L # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA + 1F210..1F23B ; L # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D + 1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 + 1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT + 20000..2A6DF ; L # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF + 2A700..2B81D ; L # Lo [4382] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B81D + 2B820..2CEAD ; L # Lo [5774] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEAD + 2CEB0..2EBE0 ; L # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 + 2EBF0..2EE5D ; L # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D + 2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + 30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + 31350..33479 ; L # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 + F0000..FFFFD ; L # Co [65534] .. + 100000..10FFFD; L # Co [65534] .. + + # The above property value applies to 810615 code points not listed here. + # Total code points: 1095407 + + # ================================================ + + # Bidi_Class=Right_To_Left + + 05BE ; R # Pd HEBREW PUNCTUATION MAQAF + 05C0 ; R # Po HEBREW PUNCTUATION PASEQ + 05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ + 05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA + 05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV + 05EF..05F2 ; R # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD + 05F3..05F4 ; R # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM + 07C0..07C9 ; R # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE + 07CA..07EA ; R # Lo [33] NKO LETTER A..NKO LETTER JONA RA + 07F4..07F5 ; R # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE + 07FA ; R # Lm NKO LAJANYALAN + 07FE..07FF ; R # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN + 0800..0815 ; R # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF + 081A ; R # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT + 0824 ; R # Lm SAMARITAN MODIFIER LETTER SHORT A + 0828 ; R # Lm SAMARITAN MODIFIER LETTER I + 0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU + 0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN + 085E ; R # Po MANDAIC PUNCTUATION + 200F ; R # Cf RIGHT-TO-LEFT MARK + FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ + FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV + FB2A..FB36 ; R # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH + FB38..FB3C ; R # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH + FB3E ; R # Lo HEBREW LETTER MEM WITH DAGESH + FB40..FB41 ; R # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH + FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH + FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + 10800..10805 ; R # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA + 10808 ; R # Lo CYPRIOT SYLLABLE JO + 1080A..10835 ; R # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO + 10837..10838 ; R # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE + 1083C ; R # Lo CYPRIOT SYLLABLE ZA + 1083F..10855 ; R # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW + 10857 ; R # Po IMPERIAL ARAMAIC SECTION SIGN + 10858..1085F ; R # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND + 10860..10876 ; R # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW + 10877..10878 ; R # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON + 10879..1087F ; R # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY + 10880..1089E ; R # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW + 108A7..108AF ; R # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED + 108E0..108F2 ; R # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH + 108F4..108F5 ; R # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW + 108FB..108FF ; R # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED + 10900..10915 ; R # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU + 10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE + 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C + 1093F ; R # Po LYDIAN TRIANGULAR MARK + 10940..10959 ; R # Lo [26] SIDETIC LETTER N01..SIDETIC LETTER N26 + 10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA + 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF + 109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN + 109C0..109CF ; R # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY + 109D2..109FF ; R # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS + 10A00 ; R # Lo KHAROSHTHI LETTER A + 10A10..10A13 ; R # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA + 10A15..10A17 ; R # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA + 10A19..10A35 ; R # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA + 10A40..10A48 ; R # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF + 10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES + 10A60..10A7C ; R # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH + 10A7D..10A7E ; R # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY + 10A7F ; R # Po OLD SOUTH ARABIAN NUMERIC INDICATOR + 10A80..10A9C ; R # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH + 10A9D..10A9F ; R # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY + 10AC0..10AC7 ; R # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW + 10AC8 ; R # So MANICHAEAN SIGN UD + 10AC9..10AE4 ; R # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW + 10AEB..10AEF ; R # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED + 10AF0..10AF6 ; R # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER + 10B00..10B35 ; R # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE + 10B40..10B55 ; R # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW + 10B58..10B5F ; R # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND + 10B60..10B72 ; R # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW + 10B78..10B7F ; R # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND + 10B80..10B91 ; R # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW + 10B99..10B9C ; R # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT + 10BA9..10BAF ; R # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED + 10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH + 10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US + 10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US + 10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND + 10D4A..10D4D ; R # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE + 10D4E ; R # Lm GARAY VOWEL LENGTH MARK + 10D4F ; R # Lo GARAY SUKUN + 10D50..10D65 ; R # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA + 10D6F ; R # Lm GARAY REDUPLICATION MARK + 10D70..10D85 ; R # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA + 10D8E..10D8F ; R # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN + 10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET + 10EAD ; R # Pd YEZIDI HYPHENATION MARK + 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE + 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL + 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF + 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH + 10F70..10F81 ; R # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH + 10F86..10F89 ; R # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS + 10FB0..10FC4 ; R # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW + 10FC5..10FCB ; R # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED + 10FE0..10FF6 ; R # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH + 1E800..1E8C4 ; R # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON + 1E8C7..1E8CF ; R # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE + 1E900..1E943 ; R # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA + 1E94B ; R # Lm ADLAM NASALIZATION MARK + 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE + 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK + + # The above property value applies to 2061 code points not listed here. + # Total code points: 3631 + + # ================================================ + + # Bidi_Class=European_Number + + 0030..0039 ; EN # Nd [10] DIGIT ZERO..DIGIT NINE + 00B2..00B3 ; EN # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE + 00B9 ; EN # No SUPERSCRIPT ONE + 06F0..06F9 ; EN # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE + 2070 ; EN # No SUPERSCRIPT ZERO + 2074..2079 ; EN # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE + 2080..2089 ; EN # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE + 2488..249B ; EN # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP + FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE + 102E1..102FB ; EN # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED + 1CCF0..1CCF9 ; EN # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE + 1D7CE..1D7FF ; EN # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE + 1F100..1F10A ; EN # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA + 1FBF0..1FBF9 ; EN # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE + + # Total code points: 178 + + # ================================================ + + # Bidi_Class=European_Separator + + 002B ; ES # Sm PLUS SIGN + 002D ; ES # Pd HYPHEN-MINUS + 207A..207B ; ES # Sm [2] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT MINUS + 208A..208B ; ES # Sm [2] SUBSCRIPT PLUS SIGN..SUBSCRIPT MINUS + 2212 ; ES # Sm MINUS SIGN + FB29 ; ES # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN + FE62 ; ES # Sm SMALL PLUS SIGN + FE63 ; ES # Pd SMALL HYPHEN-MINUS + FF0B ; ES # Sm FULLWIDTH PLUS SIGN + FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS + + # Total code points: 12 + + # ================================================ + + # Bidi_Class=European_Terminator + + 0023 ; ET # Po NUMBER SIGN + 0024 ; ET # Sc DOLLAR SIGN + 0025 ; ET # Po PERCENT SIGN + 00A2..00A5 ; ET # Sc [4] CENT SIGN..YEN SIGN + 00B0 ; ET # So DEGREE SIGN + 00B1 ; ET # Sm PLUS-MINUS SIGN + 058F ; ET # Sc ARMENIAN DRAM SIGN + 0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN + 066A ; ET # Po ARABIC PERCENT SIGN + 09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN + 09FB ; ET # Sc BENGALI GANDA MARK + 0AF1 ; ET # Sc GUJARATI RUPEE SIGN + 0BF9 ; ET # Sc TAMIL RUPEE SIGN + 0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT + 17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL + 2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME + 20A0..20C1 ; ET # Sc [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN + 212E ; ET # So ESTIMATED SYMBOL + 2213 ; ET # Sm MINUS-OR-PLUS SIGN + A838 ; ET # Sc NORTH INDIC RUPEE MARK + A839 ; ET # So NORTH INDIC QUANTITY MARK + FE5F ; ET # Po SMALL NUMBER SIGN + FE69 ; ET # Sc SMALL DOLLAR SIGN + FE6A ; ET # Po SMALL PERCENT SIGN + FF03 ; ET # Po FULLWIDTH NUMBER SIGN + FF04 ; ET # Sc FULLWIDTH DOLLAR SIGN + FF05 ; ET # Po FULLWIDTH PERCENT SIGN + FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN + FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN + 11FDD..11FE0 ; ET # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN + 1E2FF ; ET # Sc WANCHO NGUN SIGN + + # The above property value applies to 14 code points not listed here. + # Total code points: 92 + + # ================================================ + + # Bidi_Class=Arabic_Number + + 0600..0605 ; AN # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE + 0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE + 066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR + 06DD ; AN # Cf ARABIC END OF AYAH + 0890..0891 ; AN # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE + 08E2 ; AN # Cf ARABIC DISPUTED END OF AYAH + 10D30..10D39 ; AN # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE + 10D40..10D49 ; AN # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE + 10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS + + # Total code points: 73 + + # ================================================ + + # Bidi_Class=Common_Separator + + 002C ; CS # Po COMMA + 002E..002F ; CS # Po [2] FULL STOP..SOLIDUS + 003A ; CS # Po COLON + 00A0 ; CS # Zs NO-BREAK SPACE + 060C ; CS # Po ARABIC COMMA + 202F ; CS # Zs NARROW NO-BREAK SPACE + 2044 ; CS # Sm FRACTION SLASH + FE50 ; CS # Po SMALL COMMA + FE52 ; CS # Po SMALL FULL STOP + FE55 ; CS # Po SMALL COLON + FF0C ; CS # Po FULLWIDTH COMMA + FF0E..FF0F ; CS # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS + FF1A ; CS # Po FULLWIDTH COLON + + # Total code points: 15 + + # ================================================ + + # Bidi_Class=Paragraph_Separator + + 000A ; B # Cc + 000D ; B # Cc + 001C..001E ; B # Cc [3] .. + 0085 ; B # Cc + 2029 ; B # Zp PARAGRAPH SEPARATOR + + # Total code points: 7 + + # ================================================ + + # Bidi_Class=Segment_Separator + + 0009 ; S # Cc + 000B ; S # Cc + 001F ; S # Cc + + # Total code points: 3 + + # ================================================ + + # Bidi_Class=White_Space + + 000C ; WS # Cc + 0020 ; WS # Zs SPACE + 1680 ; WS # Zs OGHAM SPACE MARK + 2000..200A ; WS # Zs [11] EN QUAD..HAIR SPACE + 2028 ; WS # Zl LINE SEPARATOR + 205F ; WS # Zs MEDIUM MATHEMATICAL SPACE + 3000 ; WS # Zs IDEOGRAPHIC SPACE + + # Total code points: 17 + + # ================================================ + + # Bidi_Class=Other_Neutral + + 0021..0022 ; ON # Po [2] EXCLAMATION MARK..QUOTATION MARK + 0026..0027 ; ON # Po [2] AMPERSAND..APOSTROPHE + 0028 ; ON # Ps LEFT PARENTHESIS + 0029 ; ON # Pe RIGHT PARENTHESIS + 002A ; ON # Po ASTERISK + 003B ; ON # Po SEMICOLON + 003C..003E ; ON # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN + 003F..0040 ; ON # Po [2] QUESTION MARK..COMMERCIAL AT + 005B ; ON # Ps LEFT SQUARE BRACKET + 005C ; ON # Po REVERSE SOLIDUS + 005D ; ON # Pe RIGHT SQUARE BRACKET + 005E ; ON # Sk CIRCUMFLEX ACCENT + 005F ; ON # Pc LOW LINE + 0060 ; ON # Sk GRAVE ACCENT + 007B ; ON # Ps LEFT CURLY BRACKET + 007C ; ON # Sm VERTICAL LINE + 007D ; ON # Pe RIGHT CURLY BRACKET + 007E ; ON # Sm TILDE + 00A1 ; ON # Po INVERTED EXCLAMATION MARK + 00A6 ; ON # So BROKEN BAR + 00A7 ; ON # Po SECTION SIGN + 00A8 ; ON # Sk DIAERESIS + 00A9 ; ON # So COPYRIGHT SIGN + 00AB ; ON # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 00AC ; ON # Sm NOT SIGN + 00AE ; ON # So REGISTERED SIGN + 00AF ; ON # Sk MACRON + 00B4 ; ON # Sk ACUTE ACCENT + 00B6..00B7 ; ON # Po [2] PILCROW SIGN..MIDDLE DOT + 00B8 ; ON # Sk CEDILLA + 00BB ; ON # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 00BC..00BE ; ON # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS + 00BF ; ON # Po INVERTED QUESTION MARK + 00D7 ; ON # Sm MULTIPLICATION SIGN + 00F7 ; ON # Sm DIVISION SIGN + 02B9..02BA ; ON # Lm [2] MODIFIER LETTER PRIME..MODIFIER LETTER DOUBLE PRIME + 02C2..02C5 ; ON # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD + 02C6..02CF ; ON # Lm [10] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER LOW ACUTE ACCENT + 02D2..02DF ; ON # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT + 02E5..02EB ; ON # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK + 02EC ; ON # Lm MODIFIER LETTER VOICING + 02ED ; ON # Sk MODIFIER LETTER UNASPIRATED + 02EF..02FF ; ON # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW + 0374 ; ON # Lm GREEK NUMERAL SIGN + 0375 ; ON # Sk GREEK LOWER NUMERAL SIGN + 037E ; ON # Po GREEK QUESTION MARK + 0384..0385 ; ON # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS + 0387 ; ON # Po GREEK ANO TELEIA + 03F6 ; ON # Sm GREEK REVERSED LUNATE EPSILON SYMBOL + 058A ; ON # Pd ARMENIAN HYPHEN + 058D..058E ; ON # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN + 0606..0607 ; ON # Sm [2] ARABIC-INDIC CUBE ROOT..ARABIC-INDIC FOURTH ROOT + 060E..060F ; ON # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA + 06DE ; ON # So ARABIC START OF RUB EL HIZB + 06E9 ; ON # So ARABIC PLACE OF SAJDAH + 07F6 ; ON # So NKO SYMBOL OO DENNEN + 07F7..07F9 ; ON # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK + 0BF3..0BF8 ; ON # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN + 0BFA ; ON # So TAMIL NUMBER SIGN + 0C78..0C7E ; ON # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR + 0F3A ; ON # Ps TIBETAN MARK GUG RTAGS GYON + 0F3B ; ON # Pe TIBETAN MARK GUG RTAGS GYAS + 0F3C ; ON # Ps TIBETAN MARK ANG KHANG GYON + 0F3D ; ON # Pe TIBETAN MARK ANG KHANG GYAS + 1390..1399 ; ON # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT + 1400 ; ON # Pd CANADIAN SYLLABICS HYPHEN + 169B ; ON # Ps OGHAM FEATHER MARK + 169C ; ON # Pe OGHAM REVERSED FEATHER MARK + 17F0..17F9 ; ON # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON + 1800..1805 ; ON # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS + 1806 ; ON # Pd MONGOLIAN TODO SOFT HYPHEN + 1807..180A ; ON # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU + 1940 ; ON # So LIMBU SIGN LOO + 1944..1945 ; ON # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK + 19DE..19FF ; ON # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC + 1FBD ; ON # Sk GREEK KORONIS + 1FBF..1FC1 ; ON # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI + 1FCD..1FCF ; ON # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI + 1FDD..1FDF ; ON # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI + 1FED..1FEF ; ON # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA + 1FFD..1FFE ; ON # Sk [2] GREEK OXIA..GREEK DASIA + 2010..2015 ; ON # Pd [6] HYPHEN..HORIZONTAL BAR + 2016..2017 ; ON # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE + 2018 ; ON # Pi LEFT SINGLE QUOTATION MARK + 2019 ; ON # Pf RIGHT SINGLE QUOTATION MARK + 201A ; ON # Ps SINGLE LOW-9 QUOTATION MARK + 201B..201C ; ON # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK + 201D ; ON # Pf RIGHT DOUBLE QUOTATION MARK + 201E ; ON # Ps DOUBLE LOW-9 QUOTATION MARK + 201F ; ON # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK + 2020..2027 ; ON # Po [8] DAGGER..HYPHENATION POINT + 2035..2038 ; ON # Po [4] REVERSED PRIME..CARET + 2039 ; ON # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 203A ; ON # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 203B..203E ; ON # Po [4] REFERENCE MARK..OVERLINE + 203F..2040 ; ON # Pc [2] UNDERTIE..CHARACTER TIE + 2041..2043 ; ON # Po [3] CARET INSERTION POINT..HYPHEN BULLET + 2045 ; ON # Ps LEFT SQUARE BRACKET WITH QUILL + 2046 ; ON # Pe RIGHT SQUARE BRACKET WITH QUILL + 2047..2051 ; ON # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY + 2052 ; ON # Sm COMMERCIAL MINUS SIGN + 2053 ; ON # Po SWUNG DASH + 2054 ; ON # Pc INVERTED UNDERTIE + 2055..205E ; ON # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS + 207C ; ON # Sm SUPERSCRIPT EQUALS SIGN + 207D ; ON # Ps SUPERSCRIPT LEFT PARENTHESIS + 207E ; ON # Pe SUPERSCRIPT RIGHT PARENTHESIS + 208C ; ON # Sm SUBSCRIPT EQUALS SIGN + 208D ; ON # Ps SUBSCRIPT LEFT PARENTHESIS + 208E ; ON # Pe SUBSCRIPT RIGHT PARENTHESIS + 2100..2101 ; ON # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT + 2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA + 2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT + 2114 ; ON # So L B BAR SYMBOL + 2116..2117 ; ON # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT + 2118 ; ON # Sm SCRIPT CAPITAL P + 211E..2123 ; ON # So [6] PRESCRIPTION TAKE..VERSICLE + 2125 ; ON # So OUNCE SIGN + 2127 ; ON # So INVERTED OHM SIGN + 2129 ; ON # So TURNED GREEK SMALL LETTER IOTA + 213A..213B ; ON # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN + 2140..2144 ; ON # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y + 214A ; ON # So PROPERTY LINE + 214B ; ON # Sm TURNED AMPERSAND + 214C..214D ; ON # So [2] PER SIGN..AKTIESELSKAB + 2150..215F ; ON # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE + 2189 ; ON # No VULGAR FRACTION ZERO THIRDS + 218A..218B ; ON # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE + 2190..2194 ; ON # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW + 2195..2199 ; ON # So [5] UP DOWN ARROW..SOUTH WEST ARROW + 219A..219B ; ON # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE + 219C..219F ; ON # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW + 21A0 ; ON # Sm RIGHTWARDS TWO HEADED ARROW + 21A1..21A2 ; ON # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL + 21A3 ; ON # Sm RIGHTWARDS ARROW WITH TAIL + 21A4..21A5 ; ON # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR + 21A6 ; ON # Sm RIGHTWARDS ARROW FROM BAR + 21A7..21AD ; ON # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW + 21AE ; ON # Sm LEFT RIGHT ARROW WITH STROKE + 21AF..21CD ; ON # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE + 21CE..21CF ; ON # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE + 21D0..21D1 ; ON # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW + 21D2 ; ON # Sm RIGHTWARDS DOUBLE ARROW + 21D3 ; ON # So DOWNWARDS DOUBLE ARROW + 21D4 ; ON # Sm LEFT RIGHT DOUBLE ARROW + 21D5..21F3 ; ON # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW + 21F4..2211 ; ON # Sm [30] RIGHT ARROW WITH SMALL CIRCLE..N-ARY SUMMATION + 2214..22FF ; ON # Sm [236] DOT PLUS..Z NOTATION BAG MEMBERSHIP + 2300..2307 ; ON # So [8] DIAMETER SIGN..WAVY LINE + 2308 ; ON # Ps LEFT CEILING + 2309 ; ON # Pe RIGHT CEILING + 230A ; ON # Ps LEFT FLOOR + 230B ; ON # Pe RIGHT FLOOR + 230C..231F ; ON # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER + 2320..2321 ; ON # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL + 2322..2328 ; ON # So [7] FROWN..KEYBOARD + 2329 ; ON # Ps LEFT-POINTING ANGLE BRACKET + 232A ; ON # Pe RIGHT-POINTING ANGLE BRACKET + 232B..2335 ; ON # So [11] ERASE TO THE LEFT..COUNTERSINK + 237B ; ON # So NOT CHECK MARK + 237C ; ON # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW + 237D..2394 ; ON # So [24] SHOULDERED OPEN BOX..SOFTWARE-FUNCTION SYMBOL + 2396..239A ; ON # So [5] DECIMAL SEPARATOR KEY SYMBOL..CLEAR SCREEN SYMBOL + 239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM + 23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE + 23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET + 23E2..2429 ; ON # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM + 2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH + 2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY + 24EA..24FF ; ON # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO + 2500..25B6 ; ON # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE + 25B7 ; ON # Sm WHITE RIGHT-POINTING TRIANGLE + 25B8..25C0 ; ON # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE + 25C1 ; ON # Sm WHITE LEFT-POINTING TRIANGLE + 25C2..25F7 ; ON # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT + 25F8..25FF ; ON # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE + 2600..266E ; ON # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN + 266F ; ON # Sm MUSIC SHARP SIGN + 2670..26AB ; ON # So [60] WEST SYRIAC CROSS..MEDIUM BLACK CIRCLE + 26AD..2767 ; ON # So [187] MARRIAGE SYMBOL..ROTATED FLORAL HEART BULLET + 2768 ; ON # Ps MEDIUM LEFT PARENTHESIS ORNAMENT + 2769 ; ON # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT + 276A ; ON # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT + 276B ; ON # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT + 276C ; ON # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT + 276D ; ON # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT + 276E ; ON # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT + 276F ; ON # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT + 2770 ; ON # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT + 2771 ; ON # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT + 2772 ; ON # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT + 2773 ; ON # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT + 2774 ; ON # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT + 2775 ; ON # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT + 2776..2793 ; ON # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN + 2794..27BF ; ON # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP + 27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET + 27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER + 27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER + 27C7..27E5 ; ON # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK + 27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET + 27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET + 27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET + 27E9 ; ON # Pe MATHEMATICAL RIGHT ANGLE BRACKET + 27EA ; ON # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET + 27EB ; ON # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET + 27EC ; ON # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET + 27ED ; ON # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET + 27EE ; ON # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS + 27EF ; ON # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS + 27F0..27FF ; ON # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW + 2900..2982 ; ON # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON + 2983 ; ON # Ps LEFT WHITE CURLY BRACKET + 2984 ; ON # Pe RIGHT WHITE CURLY BRACKET + 2985 ; ON # Ps LEFT WHITE PARENTHESIS + 2986 ; ON # Pe RIGHT WHITE PARENTHESIS + 2987 ; ON # Ps Z NOTATION LEFT IMAGE BRACKET + 2988 ; ON # Pe Z NOTATION RIGHT IMAGE BRACKET + 2989 ; ON # Ps Z NOTATION LEFT BINDING BRACKET + 298A ; ON # Pe Z NOTATION RIGHT BINDING BRACKET + 298B ; ON # Ps LEFT SQUARE BRACKET WITH UNDERBAR + 298C ; ON # Pe RIGHT SQUARE BRACKET WITH UNDERBAR + 298D ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER + 298E ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + 298F ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + 2990 ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER + 2991 ; ON # Ps LEFT ANGLE BRACKET WITH DOT + 2992 ; ON # Pe RIGHT ANGLE BRACKET WITH DOT + 2993 ; ON # Ps LEFT ARC LESS-THAN BRACKET + 2994 ; ON # Pe RIGHT ARC GREATER-THAN BRACKET + 2995 ; ON # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET + 2996 ; ON # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET + 2997 ; ON # Ps LEFT BLACK TORTOISE SHELL BRACKET + 2998 ; ON # Pe RIGHT BLACK TORTOISE SHELL BRACKET + 2999..29D7 ; ON # Sm [63] DOTTED FENCE..BLACK HOURGLASS + 29D8 ; ON # Ps LEFT WIGGLY FENCE + 29D9 ; ON # Pe RIGHT WIGGLY FENCE + 29DA ; ON # Ps LEFT DOUBLE WIGGLY FENCE + 29DB ; ON # Pe RIGHT DOUBLE WIGGLY FENCE + 29DC..29FB ; ON # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS + 29FC ; ON # Ps LEFT-POINTING CURVED ANGLE BRACKET + 29FD ; ON # Pe RIGHT-POINTING CURVED ANGLE BRACKET + 29FE..2AFF ; ON # Sm [258] TINY..N-ARY WHITE VERTICAL BAR + 2B00..2B2F ; ON # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE + 2B30..2B44 ; ON # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET + 2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW + 2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR + 2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR + 2B76..2BFF ; ON # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL + 2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA + 2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER + 2CFD ; ON # No COPTIC FRACTION ONE HALF + 2CFE..2CFF ; ON # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER + 2E00..2E01 ; ON # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER + 2E02 ; ON # Pi LEFT SUBSTITUTION BRACKET + 2E03 ; ON # Pf RIGHT SUBSTITUTION BRACKET + 2E04 ; ON # Pi LEFT DOTTED SUBSTITUTION BRACKET + 2E05 ; ON # Pf RIGHT DOTTED SUBSTITUTION BRACKET + 2E06..2E08 ; ON # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER + 2E09 ; ON # Pi LEFT TRANSPOSITION BRACKET + 2E0A ; ON # Pf RIGHT TRANSPOSITION BRACKET + 2E0B ; ON # Po RAISED SQUARE + 2E0C ; ON # Pi LEFT RAISED OMISSION BRACKET + 2E0D ; ON # Pf RIGHT RAISED OMISSION BRACKET + 2E0E..2E16 ; ON # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE + 2E17 ; ON # Pd DOUBLE OBLIQUE HYPHEN + 2E18..2E19 ; ON # Po [2] INVERTED INTERROBANG..PALM BRANCH + 2E1A ; ON # Pd HYPHEN WITH DIAERESIS + 2E1B ; ON # Po TILDE WITH RING ABOVE + 2E1C ; ON # Pi LEFT LOW PARAPHRASE BRACKET + 2E1D ; ON # Pf RIGHT LOW PARAPHRASE BRACKET + 2E1E..2E1F ; ON # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW + 2E20 ; ON # Pi LEFT VERTICAL BAR WITH QUILL + 2E21 ; ON # Pf RIGHT VERTICAL BAR WITH QUILL + 2E22 ; ON # Ps TOP LEFT HALF BRACKET + 2E23 ; ON # Pe TOP RIGHT HALF BRACKET + 2E24 ; ON # Ps BOTTOM LEFT HALF BRACKET + 2E25 ; ON # Pe BOTTOM RIGHT HALF BRACKET + 2E26 ; ON # Ps LEFT SIDEWAYS U BRACKET + 2E27 ; ON # Pe RIGHT SIDEWAYS U BRACKET + 2E28 ; ON # Ps LEFT DOUBLE PARENTHESIS + 2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS + 2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK + 2E2F ; ON # Lm VERTICAL TILDE + 2E30..2E39 ; ON # Po [10] RING POINT..TOP HALF SECTION SIGN + 2E3A..2E3B ; ON # Pd [2] TWO-EM DASH..THREE-EM DASH + 2E3C..2E3F ; ON # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM + 2E40 ; ON # Pd DOUBLE HYPHEN + 2E41 ; ON # Po REVERSED COMMA + 2E42 ; ON # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK + 2E43..2E4F ; ON # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER + 2E50..2E51 ; ON # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR + 2E52..2E54 ; ON # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK + 2E55 ; ON # Ps LEFT SQUARE BRACKET WITH STROKE + 2E56 ; ON # Pe RIGHT SQUARE BRACKET WITH STROKE + 2E57 ; ON # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE + 2E58 ; ON # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE + 2E59 ; ON # Ps TOP HALF LEFT PARENTHESIS + 2E5A ; ON # Pe TOP HALF RIGHT PARENTHESIS + 2E5B ; ON # Ps BOTTOM HALF LEFT PARENTHESIS + 2E5C ; ON # Pe BOTTOM HALF RIGHT PARENTHESIS + 2E5D ; ON # Pd OBLIQUE HYPHEN + 2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP + 2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE + 2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + 2FF0..2FFF ; ON # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION + 3001..3003 ; ON # Po [3] IDEOGRAPHIC COMMA..DITTO MARK + 3004 ; ON # So JAPANESE INDUSTRIAL STANDARD SYMBOL + 3008 ; ON # Ps LEFT ANGLE BRACKET + 3009 ; ON # Pe RIGHT ANGLE BRACKET + 300A ; ON # Ps LEFT DOUBLE ANGLE BRACKET + 300B ; ON # Pe RIGHT DOUBLE ANGLE BRACKET + 300C ; ON # Ps LEFT CORNER BRACKET + 300D ; ON # Pe RIGHT CORNER BRACKET + 300E ; ON # Ps LEFT WHITE CORNER BRACKET + 300F ; ON # Pe RIGHT WHITE CORNER BRACKET + 3010 ; ON # Ps LEFT BLACK LENTICULAR BRACKET + 3011 ; ON # Pe RIGHT BLACK LENTICULAR BRACKET + 3012..3013 ; ON # So [2] POSTAL MARK..GETA MARK + 3014 ; ON # Ps LEFT TORTOISE SHELL BRACKET + 3015 ; ON # Pe RIGHT TORTOISE SHELL BRACKET + 3016 ; ON # Ps LEFT WHITE LENTICULAR BRACKET + 3017 ; ON # Pe RIGHT WHITE LENTICULAR BRACKET + 3018 ; ON # Ps LEFT WHITE TORTOISE SHELL BRACKET + 3019 ; ON # Pe RIGHT WHITE TORTOISE SHELL BRACKET + 301A ; ON # Ps LEFT WHITE SQUARE BRACKET + 301B ; ON # Pe RIGHT WHITE SQUARE BRACKET + 301C ; ON # Pd WAVE DASH + 301D ; ON # Ps REVERSED DOUBLE PRIME QUOTATION MARK + 301E..301F ; ON # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK + 3020 ; ON # So POSTAL MARK FACE + 3030 ; ON # Pd WAVY DASH + 3036..3037 ; ON # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL + 303D ; ON # Po PART ALTERNATION MARK + 303E..303F ; ON # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE + 309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + 30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN + 30FB ; ON # Po KATAKANA MIDDLE DOT + 31C0..31E5 ; ON # So [38] CJK STROKE T..CJK STROKE SZP + 31EF ; ON # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION + 321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU + 3250 ; ON # So PARTNERSHIP SIGN + 3251..325F ; ON # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE + 327C..327E ; ON # So [3] CIRCLED KOREAN CHARACTER CHAMKO..CIRCLED HANGUL IEUNG U + 32B1..32BF ; ON # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY + 32CC..32CF ; ON # So [4] SQUARE HG..LIMITED LIABILITY SIGN + 3377..337A ; ON # So [4] SQUARE DM..SQUARE IU + 33DE..33DF ; ON # So [2] SQUARE V OVER M..SQUARE A OVER M + 33FF ; ON # So SQUARE GAL + 4DC0..4DFF ; ON # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION + A490..A4C6 ; ON # So [55] YI RADICAL QOT..YI RADICAL KE + A60D..A60F ; ON # Po [3] VAI COMMA..VAI QUESTION MARK + A673 ; ON # Po SLAVONIC ASTERISK + A67E ; ON # Po CYRILLIC KAVYKA + A67F ; ON # Lm CYRILLIC PAYEROK + A700..A716 ; ON # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR + A717..A71F ; ON # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK + A720..A721 ; ON # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE + A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT + A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 + A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD + AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK + FBC3..FBD2 ; ON # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH + FD3E ; ON # Pe ORNATE LEFT PARENTHESIS + FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS + FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH + FD90..FD91 ; ON # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA + FDC8..FDCF ; ON # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA + FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL + FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK + FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET + FE18 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET + FE19 ; ON # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS + FE30 ; ON # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER + FE31..FE32 ; ON # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH + FE33..FE34 ; ON # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE + FE35 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS + FE36 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS + FE37 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET + FE38 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET + FE39 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET + FE3A ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET + FE3B ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET + FE3C ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET + FE3D ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET + FE3E ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET + FE3F ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET + FE40 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET + FE41 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET + FE42 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET + FE43 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET + FE44 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET + FE45..FE46 ; ON # Po [2] SESAME DOT..WHITE SESAME DOT + FE47 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET + FE48 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET + FE49..FE4C ; ON # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE + FE4D..FE4F ; ON # Pc [3] DASHED LOW LINE..WAVY LOW LINE + FE51 ; ON # Po SMALL IDEOGRAPHIC COMMA + FE54 ; ON # Po SMALL SEMICOLON + FE56..FE57 ; ON # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK + FE58 ; ON # Pd SMALL EM DASH + FE59 ; ON # Ps SMALL LEFT PARENTHESIS + FE5A ; ON # Pe SMALL RIGHT PARENTHESIS + FE5B ; ON # Ps SMALL LEFT CURLY BRACKET + FE5C ; ON # Pe SMALL RIGHT CURLY BRACKET + FE5D ; ON # Ps SMALL LEFT TORTOISE SHELL BRACKET + FE5E ; ON # Pe SMALL RIGHT TORTOISE SHELL BRACKET + FE60..FE61 ; ON # Po [2] SMALL AMPERSAND..SMALL ASTERISK + FE64..FE66 ; ON # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN + FE68 ; ON # Po SMALL REVERSE SOLIDUS + FE6B ; ON # Po SMALL COMMERCIAL AT + FF01..FF02 ; ON # Po [2] FULLWIDTH EXCLAMATION MARK..FULLWIDTH QUOTATION MARK + FF06..FF07 ; ON # Po [2] FULLWIDTH AMPERSAND..FULLWIDTH APOSTROPHE + FF08 ; ON # Ps FULLWIDTH LEFT PARENTHESIS + FF09 ; ON # Pe FULLWIDTH RIGHT PARENTHESIS + FF0A ; ON # Po FULLWIDTH ASTERISK + FF1B ; ON # Po FULLWIDTH SEMICOLON + FF1C..FF1E ; ON # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN + FF1F..FF20 ; ON # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT + FF3B ; ON # Ps FULLWIDTH LEFT SQUARE BRACKET + FF3C ; ON # Po FULLWIDTH REVERSE SOLIDUS + FF3D ; ON # Pe FULLWIDTH RIGHT SQUARE BRACKET + FF3E ; ON # Sk FULLWIDTH CIRCUMFLEX ACCENT + FF3F ; ON # Pc FULLWIDTH LOW LINE + FF40 ; ON # Sk FULLWIDTH GRAVE ACCENT + FF5B ; ON # Ps FULLWIDTH LEFT CURLY BRACKET + FF5C ; ON # Sm FULLWIDTH VERTICAL LINE + FF5D ; ON # Pe FULLWIDTH RIGHT CURLY BRACKET + FF5E ; ON # Sm FULLWIDTH TILDE + FF5F ; ON # Ps FULLWIDTH LEFT WHITE PARENTHESIS + FF60 ; ON # Pe FULLWIDTH RIGHT WHITE PARENTHESIS + FF61 ; ON # Po HALFWIDTH IDEOGRAPHIC FULL STOP + FF62 ; ON # Ps HALFWIDTH LEFT CORNER BRACKET + FF63 ; ON # Pe HALFWIDTH RIGHT CORNER BRACKET + FF64..FF65 ; ON # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT + FFE2 ; ON # Sm FULLWIDTH NOT SIGN + FFE3 ; ON # Sk FULLWIDTH MACRON + FFE4 ; ON # So FULLWIDTH BROKEN BAR + FFE8 ; ON # So HALFWIDTH FORMS LIGHT VERTICAL + FFE9..FFEC ; ON # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW + FFED..FFEE ; ON # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE + FFF9..FFFB ; ON # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR + FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER + 10101 ; ON # Po AEGEAN WORD SEPARATOR DOT + 10140..10174 ; ON # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS + 10175..10178 ; ON # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN + 10179..10189 ; ON # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN + 1018A..1018B ; ON # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN + 1018C ; ON # So GREEK SINUSOID SIGN + 10190..1019C ; ON # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL + 101A0 ; ON # So GREEK SYMBOL TAU RHO + 1091F ; ON # Po PHOENICIAN WORD SEPARATOR + 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION + 10D6E ; ON # Pd GARAY HYPHEN + 10ED0 ; ON # Po ARABIC BIBLICAL END OF VERSE + 10ED1..10ED8 ; ON # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH + 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND + 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT + 11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI + 11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA + 16FE2 ; ON # Po OLD CHINESE HOOK MARK + 1CC00..1CCD5 ; ON # So [214] UP-POINTING GO-KART..LOWER RIGHT QUADRANT STANDING KNIGHT + 1CCFA..1CCFC ; ON # So [3] SNAKE SYMBOL..NOSE SYMBOL + 1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET + 1CEBA..1CED0 ; ON # So [23] FRAGILE SYMBOL..LEUKOTHEA + 1CEE0..1CEEF ; ON # So [16] GEOMANTIC FIGURE POPULUS..GEOMANTIC FIGURE VIA + 1CEF0 ; ON # Sm MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR + 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON + 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 + 1D245 ; ON # So GREEK MUSICAL LEIMMA + 1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING + 1D6C1 ; ON # Sm MATHEMATICAL BOLD NABLA + 1D6DB ; ON # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL + 1D6FB ; ON # Sm MATHEMATICAL ITALIC NABLA + 1D715 ; ON # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL + 1D735 ; ON # Sm MATHEMATICAL BOLD ITALIC NABLA + 1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL + 1D76F ; ON # Sm MATHEMATICAL SANS-SERIF BOLD NABLA + 1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL + 1D7A9 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA + 1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + 1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + 1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK + 1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 + 1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES + 1F0B1..1F0BF ; ON # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER + 1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER + 1F0D1..1F0F5 ; ON # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 + 1F10B..1F10C ; ON # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO + 1F10D..1F10F ; ON # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH + 1F12F ; ON # So COPYLEFT SYMBOL + 1F16A..1F16F ; ON # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE + 1F1AD ; ON # So MASK WORK SYMBOL + 1F260..1F265 ; ON # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI + 1F300..1F3FA ; ON # So [251] CYCLONE..AMPHORA + 1F3FB..1F3FF ; ON # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 + 1F400..1F6D8 ; ON # So [729] RAT..LANDSLIDE + 1F6DC..1F6EC ; ON # So [17] WIRELESS..AIRPLANE ARRIVING + 1F6F0..1F6FC ; ON # So [13] SATELLITE..ROLLER SKATE + 1F700..1F7D9 ; ON # So [218] ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR + 1F7E0..1F7EB ; ON # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE + 1F7F0 ; ON # So HEAVY EQUALS SIGN + 1F800..1F80B ; ON # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD + 1F810..1F847 ; ON # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW + 1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW + 1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW + 1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS + 1F8B0..1F8BB ; ON # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR + 1F8C0..1F8C1 ; ON # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW + 1F8D0..1F8D8 ; ON # Sm [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE + 1F900..1FA57 ; ON # So [344] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS ALFIL + 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER + 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH + 1FA80..1FA8A ; ON # So [11] YO-YO..TROMBONE + 1FA8E..1FAC6 ; ON # So [57] TREASURE CHEST..FINGERPRINT + 1FAC8 ; ON # So HAIRY CREATURE + 1FACD..1FADC ; ON # So [16] ORCA..ROOT VEGETABLE + 1FADF..1FAEA ; ON # So [12] SPLATTER..DISTORTED FACE + 1FAEF..1FAF8 ; ON # So [10] FIGHT CLOUD..RIGHTWARDS PUSHING HAND + 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK + 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE + 1FBFA ; ON # So ALARM BELL SYMBOL + + # Total code points: 6854 + + # ================================================ + + # Bidi_Class=Boundary_Neutral + + 0000..0008 ; BN # Cc [9] .. + 000E..001B ; BN # Cc [14] .. + 007F..0084 ; BN # Cc [6] .. + 0086..009F ; BN # Cc [26] .. + 00AD ; BN # Cf SOFT HYPHEN + 180E ; BN # Cf MONGOLIAN VOWEL SEPARATOR + 200B..200D ; BN # Cf [3] ZERO WIDTH SPACE..ZERO WIDTH JOINER + 2060..2064 ; BN # Cf [5] WORD JOINER..INVISIBLE PLUS + 2065 ; BN # Cn + 206A..206F ; BN # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES + FDD0..FDEF ; BN # Cn [32] .. + FEFF ; BN # Cf ZERO WIDTH NO-BREAK SPACE + FFF0..FFF8 ; BN # Cn [9] .. + FFFE..FFFF ; BN # Cn [2] .. + 1BCA0..1BCA3 ; BN # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP + 1D173..1D17A ; BN # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE + 1FFFE..1FFFF ; BN # Cn [2] .. + 2FFFE..2FFFF ; BN # Cn [2] .. + 3FFFE..3FFFF ; BN # Cn [2] .. + 4FFFE..4FFFF ; BN # Cn [2] .. + 5FFFE..5FFFF ; BN # Cn [2] .. + 6FFFE..6FFFF ; BN # Cn [2] .. + 7FFFE..7FFFF ; BN # Cn [2] .. + 8FFFE..8FFFF ; BN # Cn [2] .. + 9FFFE..9FFFF ; BN # Cn [2] .. + AFFFE..AFFFF ; BN # Cn [2] .. + BFFFE..BFFFF ; BN # Cn [2] .. + CFFFE..CFFFF ; BN # Cn [2] .. + DFFFE..E0000 ; BN # Cn [3] .. + E0001 ; BN # Cf LANGUAGE TAG + E0002..E001F ; BN # Cn [30] .. + E0020..E007F ; BN # Cf [96] TAG SPACE..CANCEL TAG + E0080..E00FF ; BN # Cn [128] .. + E01F0..E0FFF ; BN # Cn [3600] .. + EFFFE..EFFFF ; BN # Cn [2] .. + FFFFE..FFFFF ; BN # Cn [2] .. + 10FFFE..10FFFF; BN # Cn [2] .. + + # Total code points: 4016 + + # ================================================ + + # Bidi_Class=Nonspacing_Mark + + 0300..036F ; NSM # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X + 0483..0487 ; NSM # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE + 0488..0489 ; NSM # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN + 0591..05BD ; NSM # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG + 05BF ; NSM # Mn HEBREW POINT RAFE + 05C1..05C2 ; NSM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT + 05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT + 05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN + 0610..061A ; NSM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA + 064B..065F ; NSM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW + 0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF + 06D6..06DC ; NSM # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN + 06DF..06E4 ; NSM # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA + 06E7..06E8 ; NSM # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON + 06EA..06ED ; NSM # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM + 0711 ; NSM # Mn SYRIAC LETTER SUPERSCRIPT ALAPH + 0730..074A ; NSM # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH + 07A6..07B0 ; NSM # Mn [11] THAANA ABAFILI..THAANA SUKUN + 07EB..07F3 ; NSM # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE + 07FD ; NSM # Mn NKO DANTAYALAN + 0816..0819 ; NSM # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH + 081B..0823 ; NSM # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A + 0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U + 0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA + 0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK + 0897..089F ; NSM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA + 08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA + 08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA + 093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE + 093C ; NSM # Mn DEVANAGARI SIGN NUKTA + 0941..0948 ; NSM # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI + 094D ; NSM # Mn DEVANAGARI SIGN VIRAMA + 0951..0957 ; NSM # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE + 0962..0963 ; NSM # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL + 0981 ; NSM # Mn BENGALI SIGN CANDRABINDU + 09BC ; NSM # Mn BENGALI SIGN NUKTA + 09C1..09C4 ; NSM # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR + 09CD ; NSM # Mn BENGALI SIGN VIRAMA + 09E2..09E3 ; NSM # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL + 09FE ; NSM # Mn BENGALI SANDHI MARK + 0A01..0A02 ; NSM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI + 0A3C ; NSM # Mn GURMUKHI SIGN NUKTA + 0A41..0A42 ; NSM # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU + 0A47..0A48 ; NSM # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI + 0A4B..0A4D ; NSM # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA + 0A51 ; NSM # Mn GURMUKHI SIGN UDAAT + 0A70..0A71 ; NSM # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK + 0A75 ; NSM # Mn GURMUKHI SIGN YAKASH + 0A81..0A82 ; NSM # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA + 0ABC ; NSM # Mn GUJARATI SIGN NUKTA + 0AC1..0AC5 ; NSM # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E + 0AC7..0AC8 ; NSM # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI + 0ACD ; NSM # Mn GUJARATI SIGN VIRAMA + 0AE2..0AE3 ; NSM # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL + 0AFA..0AFF ; NSM # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE + 0B01 ; NSM # Mn ORIYA SIGN CANDRABINDU + 0B3C ; NSM # Mn ORIYA SIGN NUKTA + 0B3F ; NSM # Mn ORIYA VOWEL SIGN I + 0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR + 0B4D ; NSM # Mn ORIYA SIGN VIRAMA + 0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK + 0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL + 0B82 ; NSM # Mn TAMIL SIGN ANUSVARA + 0BC0 ; NSM # Mn TAMIL VOWEL SIGN II + 0BCD ; NSM # Mn TAMIL SIGN VIRAMA + 0C00 ; NSM # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE + 0C04 ; NSM # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE + 0C3C ; NSM # Mn TELUGU SIGN NUKTA + 0C3E..0C40 ; NSM # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II + 0C46..0C48 ; NSM # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI + 0C4A..0C4D ; NSM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA + 0C55..0C56 ; NSM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK + 0C62..0C63 ; NSM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL + 0C81 ; NSM # Mn KANNADA SIGN CANDRABINDU + 0CBC ; NSM # Mn KANNADA SIGN NUKTA + 0CCC..0CCD ; NSM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA + 0CE2..0CE3 ; NSM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL + 0D00..0D01 ; NSM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU + 0D3B..0D3C ; NSM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA + 0D41..0D44 ; NSM # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR + 0D4D ; NSM # Mn MALAYALAM SIGN VIRAMA + 0D62..0D63 ; NSM # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL + 0D81 ; NSM # Mn SINHALA SIGN CANDRABINDU + 0DCA ; NSM # Mn SINHALA SIGN AL-LAKUNA + 0DD2..0DD4 ; NSM # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA + 0DD6 ; NSM # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA + 0E31 ; NSM # Mn THAI CHARACTER MAI HAN-AKAT + 0E34..0E3A ; NSM # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU + 0E47..0E4E ; NSM # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN + 0EB1 ; NSM # Mn LAO VOWEL SIGN MAI KAN + 0EB4..0EBC ; NSM # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO + 0EC8..0ECE ; NSM # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN + 0F18..0F19 ; NSM # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS + 0F35 ; NSM # Mn TIBETAN MARK NGAS BZUNG NYI ZLA + 0F37 ; NSM # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS + 0F39 ; NSM # Mn TIBETAN MARK TSA -PHRU + 0F71..0F7E ; NSM # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO + 0F80..0F84 ; NSM # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA + 0F86..0F87 ; NSM # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS + 0F8D..0F97 ; NSM # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA + 0F99..0FBC ; NSM # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA + 0FC6 ; NSM # Mn TIBETAN SYMBOL PADMA GDAN + 102D..1030 ; NSM # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU + 1032..1037 ; NSM # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW + 1039..103A ; NSM # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT + 103D..103E ; NSM # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA + 1058..1059 ; NSM # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL + 105E..1060 ; NSM # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA + 1071..1074 ; NSM # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE + 1082 ; NSM # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA + 1085..1086 ; NSM # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y + 108D ; NSM # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE + 109D ; NSM # Mn MYANMAR VOWEL SIGN AITON AI + 135D..135F ; NSM # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK + 1712..1714 ; NSM # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA + 1732..1733 ; NSM # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U + 1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U + 1772..1773 ; NSM # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U + 17B4..17B5 ; NSM # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA + 17B7..17BD ; NSM # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA + 17C6 ; NSM # Mn KHMER SIGN NIKAHIT + 17C9..17D3 ; NSM # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT + 17DD ; NSM # Mn KHMER SIGN ATTHACAN + 180B..180D ; NSM # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE + 180F ; NSM # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR + 1885..1886 ; NSM # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA + 18A9 ; NSM # Mn MONGOLIAN LETTER ALI GALI DAGALGA + 1920..1922 ; NSM # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U + 1927..1928 ; NSM # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O + 1932 ; NSM # Mn LIMBU SMALL LETTER ANUSVARA + 1939..193B ; NSM # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I + 1A17..1A18 ; NSM # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U + 1A1B ; NSM # Mn BUGINESE VOWEL SIGN AE + 1A56 ; NSM # Mn TAI THAM CONSONANT SIGN MEDIAL LA + 1A58..1A5E ; NSM # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA + 1A60 ; NSM # Mn TAI THAM SIGN SAKOT + 1A62 ; NSM # Mn TAI THAM VOWEL SIGN MAI SAT + 1A65..1A6C ; NSM # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW + 1A73..1A7C ; NSM # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN + 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT + 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW + 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY + 1ABF..1ADD ; NSM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW + 1AE0..1AEB ; NSM # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE + 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG + 1B34 ; NSM # Mn BALINESE SIGN REREKAN + 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA + 1B3C ; NSM # Mn BALINESE VOWEL SIGN LA LENGA + 1B42 ; NSM # Mn BALINESE VOWEL SIGN PEPET + 1B6B..1B73 ; NSM # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG + 1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR + 1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU + 1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG + 1BAB..1BAD ; NSM # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA + 1BE6 ; NSM # Mn BATAK SIGN TOMPI + 1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE + 1BED ; NSM # Mn BATAK VOWEL SIGN KARO O + 1BEF..1BF1 ; NSM # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H + 1C2C..1C33 ; NSM # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T + 1C36..1C37 ; NSM # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA + 1CD0..1CD2 ; NSM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA + 1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA + 1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL + 1CED ; NSM # Mn VEDIC SIGN TIRYAK + 1CF4 ; NSM # Mn VEDIC TONE CANDRA ABOVE + 1CF8..1CF9 ; NSM # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE + 1DC0..1DFF ; NSM # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW + 20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE + 20DD..20E0 ; NSM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH + 20E1 ; NSM # Mn COMBINING LEFT RIGHT ARROW ABOVE + 20E2..20E4 ; NSM # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE + 20E5..20F0 ; NSM # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE + 2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS + 2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER + 2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS + 302A..302D ; NSM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK + 3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + A66F ; NSM # Mn COMBINING CYRILLIC VZMET + A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN + A674..A67D ; NSM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK + A69E..A69F ; NSM # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E + A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS + A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA + A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA + A80B ; NSM # Mn SYLOTI NAGRI SIGN ANUSVARA + A825..A826 ; NSM # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E + A82C ; NSM # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA + A8C4..A8C5 ; NSM # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU + A8E0..A8F1 ; NSM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA + A8FF ; NSM # Mn DEVANAGARI VOWEL SIGN AY + A926..A92D ; NSM # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU + A947..A951 ; NSM # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R + A980..A982 ; NSM # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR + A9B3 ; NSM # Mn JAVANESE SIGN CECAK TELU + A9B6..A9B9 ; NSM # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT + A9BC..A9BD ; NSM # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET + A9E5 ; NSM # Mn MYANMAR SIGN SHAN SAW + AA29..AA2E ; NSM # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE + AA31..AA32 ; NSM # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE + AA35..AA36 ; NSM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA + AA43 ; NSM # Mn CHAM CONSONANT SIGN FINAL NG + AA4C ; NSM # Mn CHAM CONSONANT SIGN FINAL M + AA7C ; NSM # Mn MYANMAR SIGN TAI LAING TONE-2 + AAB0 ; NSM # Mn TAI VIET MAI KANG + AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U + AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA + AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK + AAC1 ; NSM # Mn TAI VIET TONE MAI THO + AAEC..AAED ; NSM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI + AAF6 ; NSM # Mn MEETEI MAYEK VIRAMA + ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP + ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP + ABED ; NSM # Mn MEETEI MAYEK APUN IYEK + FB1E ; NSM # Mn HEBREW POINT JUDEO-SPANISH VARIKA + FE00..FE0F ; NSM # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 + FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF + 101FD ; NSM # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE + 102E0 ; NSM # Mn COPTIC EPACT THOUSANDS MARK + 10376..1037A ; NSM # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII + 10A01..10A03 ; NSM # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R + 10A05..10A06 ; NSM # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O + 10A0C..10A0F ; NSM # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA + 10A38..10A3A ; NSM # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW + 10A3F ; NSM # Mn KHAROSHTHI VIRAMA + 10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW + 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI + 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK + 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK + 10EFA..10EFF ; NSM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA + 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW + 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW + 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA + 11038..11046 ; NSM # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA + 11070 ; NSM # Mn BRAHMI SIGN OLD TAMIL VIRAMA + 11073..11074 ; NSM # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O + 1107F..11081 ; NSM # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA + 110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI + 110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA + 110C2 ; NSM # Mn KAITHI VOWEL SIGN VOCALIC R + 11100..11102 ; NSM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA + 11127..1112B ; NSM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU + 1112D..11134 ; NSM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA + 11173 ; NSM # Mn MAHAJANI SIGN NUKTA + 11180..11181 ; NSM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA + 111B6..111BE ; NSM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O + 111C9..111CC ; NSM # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK + 111CF ; NSM # Mn SHARADA SIGN INVERTED CANDRABINDU + 1122F..11231 ; NSM # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI + 11234 ; NSM # Mn KHOJKI SIGN ANUSVARA + 11236..11237 ; NSM # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA + 1123E ; NSM # Mn KHOJKI SIGN SUKUN + 11241 ; NSM # Mn KHOJKI VOWEL SIGN VOCALIC R + 112DF ; NSM # Mn KHUDAWADI SIGN ANUSVARA + 112E3..112EA ; NSM # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA + 11300..11301 ; NSM # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU + 1133B..1133C ; NSM # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA + 11340 ; NSM # Mn GRANTHA VOWEL SIGN II + 11366..1136C ; NSM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX + 11370..11374 ; NSM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA + 113BB..113C0 ; NSM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL + 113CE ; NSM # Mn TULU-TIGALARI SIGN VIRAMA + 113D0 ; NSM # Mn TULU-TIGALARI CONJOINER + 113D2 ; NSM # Mn TULU-TIGALARI GEMINATION MARK + 113E1..113E2 ; NSM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA + 11438..1143F ; NSM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI + 11442..11444 ; NSM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA + 11446 ; NSM # Mn NEWA SIGN NUKTA + 1145E ; NSM # Mn NEWA SANDHI MARK + 114B3..114B8 ; NSM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL + 114BA ; NSM # Mn TIRHUTA VOWEL SIGN SHORT E + 114BF..114C0 ; NSM # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA + 114C2..114C3 ; NSM # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA + 115B2..115B5 ; NSM # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR + 115BC..115BD ; NSM # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA + 115BF..115C0 ; NSM # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA + 115DC..115DD ; NSM # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU + 11633..1163A ; NSM # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI + 1163D ; NSM # Mn MODI SIGN ANUSVARA + 1163F..11640 ; NSM # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA + 116AB ; NSM # Mn TAKRI SIGN ANUSVARA + 116AD ; NSM # Mn TAKRI VOWEL SIGN AA + 116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU + 116B7 ; NSM # Mn TAKRI SIGN NUKTA + 1171D ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LA + 1171F ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA + 11722..11725 ; NSM # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU + 11727..1172B ; NSM # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER + 1182F..11837 ; NSM # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA + 11839..1183A ; NSM # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA + 1193B..1193C ; NSM # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU + 1193E ; NSM # Mn DIVES AKURU VIRAMA + 11943 ; NSM # Mn DIVES AKURU SIGN NUKTA + 119D4..119D7 ; NSM # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR + 119DA..119DB ; NSM # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI + 119E0 ; NSM # Mn NANDINAGARI SIGN VIRAMA + 11A01..11A06 ; NSM # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O + 11A09..11A0A ; NSM # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK + 11A33..11A38 ; NSM # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA + 11A3B..11A3E ; NSM # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA + 11A47 ; NSM # Mn ZANABAZAR SQUARE SUBJOINER + 11A51..11A56 ; NSM # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE + 11A59..11A5B ; NSM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK + 11A8A..11A96 ; NSM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA + 11A98..11A99 ; NSM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER + 11B60 ; NSM # Mn SHARADA VOWEL SIGN OE + 11B62..11B64 ; NSM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E + 11B66 ; NSM # Mn SHARADA VOWEL SIGN CANDRA E + 11C30..11C36 ; NSM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L + 11C38..11C3D ; NSM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA + 11C92..11CA7 ; NSM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA + 11CAA..11CB0 ; NSM # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA + 11CB2..11CB3 ; NSM # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E + 11CB5..11CB6 ; NSM # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU + 11D31..11D36 ; NSM # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R + 11D3A ; NSM # Mn MASARAM GONDI VOWEL SIGN E + 11D3C..11D3D ; NSM # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O + 11D3F..11D45 ; NSM # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA + 11D47 ; NSM # Mn MASARAM GONDI RA-KARA + 11D90..11D91 ; NSM # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI + 11D95 ; NSM # Mn GUNJALA GONDI SIGN ANUSVARA + 11D97 ; NSM # Mn GUNJALA GONDI VIRAMA + 11EF3..11EF4 ; NSM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U + 11F00..11F01 ; NSM # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA + 11F36..11F3A ; NSM # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R + 11F40 ; NSM # Mn KAWI VOWEL SIGN EU + 11F42 ; NSM # Mn KAWI CONJOINER + 11F5A ; NSM # Mn KAWI SIGN NUKTA + 13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + 13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED + 1611E..16129 ; NSM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK + 1612D..1612F ; NSM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA + 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE + 16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM + 16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR + 16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW + 16FE4 ; NSM # Mn KHITAN SMALL SCRIPT FILLER + 1BC9D..1BC9E ; NSM # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK + 1CF00..1CF2D ; NSM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT + 1CF30..1CF46 ; NSM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG + 1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 + 1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE + 1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE + 1D1AA..1D1AD ; NSM # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO + 1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME + 1DA00..1DA36 ; NSM # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN + 1DA3B..1DA6C ; NSM # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT + 1DA75 ; NSM # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS + 1DA84 ; NSM # Mn SIGNWRITING LOCATION HEAD NECK + 1DA9B..1DA9F ; NSM # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 + 1DAA1..1DAAF ; NSM # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 + 1E000..1E006 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE + 1E008..1E018 ; NSM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU + 1E01B..1E021 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI + 1E023..1E024 ; NSM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS + 1E026..1E02A ; NSM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + 1E08F ; NSM # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 1E130..1E136 ; NSM # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D + 1E2AE ; NSM # Mn TOTO SIGN RISING TONE + 1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI + 1E4EC..1E4EF ; NSM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH + 1E5EE..1E5EF ; NSM # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR + 1E6E3 ; NSM # Mn TAI YO SIGN UE + 1E6E6 ; NSM # Mn TAI YO SIGN AU + 1E6EE..1E6EF ; NSM # Mn [2] TAI YO SIGN AY..TAI YO SIGN ANG + 1E6F5 ; NSM # Mn TAI YO SIGN OM + 1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS + 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA + E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + + # Total code points: 2067 + + # ================================================ + + # Bidi_Class=Arabic_Letter + + 0608 ; AL # Sm ARABIC RAY + 060B ; AL # Sc AFGHANI SIGN + 060D ; AL # Po ARABIC DATE SEPARATOR + 061B ; AL # Po ARABIC SEMICOLON + 061C ; AL # Cf ARABIC LETTER MARK + 061D..061F ; AL # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK + 0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE + 0640 ; AL # Lm ARABIC TATWEEL + 0641..064A ; AL # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH + 066D ; AL # Po ARABIC FIVE POINTED STAR + 066E..066F ; AL # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF + 0671..06D3 ; AL # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE + 06D4 ; AL # Po ARABIC FULL STOP + 06D5 ; AL # Lo ARABIC LETTER AE + 06E5..06E6 ; AL # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH + 06EE..06EF ; AL # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V + 06FA..06FC ; AL # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW + 06FD..06FE ; AL # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN + 06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V + 0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS + 070F ; AL # Cf SYRIAC ABBREVIATION MARK + 0710 ; AL # Lo SYRIAC LETTER ALAPH + 0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH + 074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU + 07B1 ; AL # Lo THAANA LETTER NAA + 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA + 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT + 0888 ; AL # Sk ARABIC RAISED ROUND DOT + 0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE + 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF + 08C9 ; AL # Lm ARABIC SMALL FARSI YEH + FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM + FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE + FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM + FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM + FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM + FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU + FDFC ; AL # Sc RIAL SIGN + FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM + FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM + 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA + 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW + 10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW + 10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW + 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN + 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED + 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT + 1EC71..1ECAB ; AL # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE + 1ECAC ; AL # So INDIC SIYAQ PLACEHOLDER + 1ECAD..1ECAF ; AL # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS + 1ECB0 ; AL # Sc INDIC SIYAQ RUPEE MARK + 1ECB1..1ECB4 ; AL # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK + 1ED01..1ED2D ; AL # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND + 1ED2E ; AL # So OTTOMAN SIYAQ MARRATAN + 1ED2F..1ED3D ; AL # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH + 1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL + 1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF + 1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM + 1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH + 1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH + 1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF + 1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH + 1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD + 1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN + 1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM + 1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH + 1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH + 1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM + 1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN + 1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF + 1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN + 1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH + 1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD + 1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN + 1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON + 1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF + 1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM + 1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH + 1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF + 1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF + 1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH + 1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH + 1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH + 1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH + 1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN + 1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL + 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH + 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + + # The above property value applies to 253 code points not listed here. + # Total code points: 1731 + + # ================================================ + + # Bidi_Class=Left_To_Right_Override + + 202D ; LRO # Cf LEFT-TO-RIGHT OVERRIDE + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Right_To_Left_Override + + 202E ; RLO # Cf RIGHT-TO-LEFT OVERRIDE + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Left_To_Right_Embedding + + 202A ; LRE # Cf LEFT-TO-RIGHT EMBEDDING + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Right_To_Left_Embedding + + 202B ; RLE # Cf RIGHT-TO-LEFT EMBEDDING + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Pop_Directional_Format + + 202C ; PDF # Cf POP DIRECTIONAL FORMATTING + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Left_To_Right_Isolate + + 2066 ; LRI # Cf LEFT-TO-RIGHT ISOLATE + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Right_To_Left_Isolate + + 2067 ; RLI # Cf RIGHT-TO-LEFT ISOLATE + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=First_Strong_Isolate + + 2068 ; FSI # Cf FIRST STRONG ISOLATE + + # Total code points: 1 + + # ================================================ + + # Bidi_Class=Pop_Directional_Isolate + + 2069 ; PDI # Cf POP DIRECTIONAL ISOLATE + + # Total code points: 1 + + # EOF +`.trim(); diff --git a/packages/test-utils/src/mock-store/file-tree.ts b/packages/test-utils/src/mock-store/file-tree.ts new file mode 100644 index 000000000..99778f992 --- /dev/null +++ b/packages/test-utils/src/mock-store/file-tree.ts @@ -0,0 +1,116 @@ +import type { UnicodeFileTreeNode } from "@ucdjs/schemas"; + +/** + * A file tree node with optional content for mocking file downloads. + * The `_content` property is used by the files handler to return content. + */ +export type FileTreeNodeWithContent = UnicodeFileTreeNode & { _content?: string }; + +/** + * Input format for creating file trees. + * - String values represent file content + * - Nested objects represent directories with their contents + * + * @example + * ```ts + * // Simple flat files + * { + * "UnicodeData.txt": "file content", + * "Blocks.txt": "blocks content" + * } + * + * // Nested directories + * { + * "UnicodeData.txt": "content", + * "auxiliary": { + * "GraphemeBreakProperty.txt": "content", + * "WordBreakProperty.txt": "content" + * } + * } + * + * // Deeply nested + * { + * "extracted": { + * "DerivedBidiClass.txt": "content", + * "nested": { + * "DeepFile.txt": "content" + * } + * } + * } + * ``` + */ +export interface FileTreeInput { + [name: string]: string | FileTreeInput; +} + +/** + * Creates a file tree structure from a simplified input format. + * + * This utility converts a nested object structure into the UnicodeFileTreeNode format + * used by mockStoreApi. String values become files with that content, nested objects + * become directories. + * + * @param input - Object mapping names to content (files) or nested objects (directories) + * @param parentPath - Internal parameter for building nested paths (don't pass this) + * @returns Array of FileTreeNodeWithContent for use with mockStoreApi + * + * @example + * ```ts + * // Flat files + * createFileTree({ + * "UnicodeData.txt": "0041;LATIN CAPITAL LETTER A", + * "Blocks.txt": "0000..007F; Basic Latin" + * }) + * + * // With directories + * createFileTree({ + * "UnicodeData.txt": "content", + * "auxiliary": { + * "GraphemeBreakProperty.txt": "content" + * } + * }) + * + * // Use with mockStoreApi + * mockStoreApi({ + * versions: ["16.0.0"], + * files: { + * "*": createFileTree({ + * "UnicodeData.txt": "content", + * "Blocks.txt": "content" + * }) + * } + * }) + * ``` + */ +export function createFileTree( + input: FileTreeInput, + parentPath = "", +): FileTreeNodeWithContent[] { + const result: FileTreeNodeWithContent[] = []; + + for (const [name, value] of Object.entries(input)) { + const path = parentPath ? `${parentPath}/${name}` : name; + + if (typeof value === "string") { + // It's a file with content + result.push({ + type: "file", + name, + path, + _content: value, + lastModified: 0, + }); + } else { + // It's a directory - recursively process children + result.push({ + type: "directory", + name, + path, + children: createFileTree(value, path), + lastModified: 0, + }); + } + } + + return result; +} diff --git a/packages/test-utils/src/mock-store/handlers/file-tree.ts b/packages/test-utils/src/mock-store/handlers/file-tree.ts index 5e51af31c..650ec1219 100644 --- a/packages/test-utils/src/mock-store/handlers/file-tree.ts +++ b/packages/test-utils/src/mock-store/handlers/file-tree.ts @@ -1,5 +1,23 @@ +import type { MockStoreNode } from "../types"; import { HttpResponse } from "../../msw"; +import { addPathsToFileNodes } from "../add-paths"; import { defineMockRouteHandler } from "../define"; +import { omitContentRecursively } from "../utils"; + +// Aligns with files handler: always return contents of the synthetic "ucd" folder +// and keep paths prefixed with /{version}/ucd/... +function normalizeFileTree(nodes: MockStoreNode[]): { nodes: MockStoreNode[]; basePath: string } { + if ( + nodes.length === 1 + && nodes[0]?.type === "directory" + && nodes[0]?.name === "ucd" + ) { + return { nodes: (nodes[0].children ?? []) as MockStoreNode[], basePath: "ucd" }; + } + + // Even without an explicit ucd directory, we still prefix paths with "ucd" + return { nodes, basePath: "ucd" }; +} export const fileTreeRoute = defineMockRouteHandler({ endpoint: "/api/v1/versions/{version}/file-tree", @@ -20,19 +38,23 @@ export const fileTreeRoute = defineMockRouteHandler({ mockFetch([ ["GET", url, ({ params }) => { if (shouldUseDefaultValue) { - // If the only key in files is "*", we will - // just return the files object as is. - if (Object.keys(files).length === 1 && Object.keys(files)[0] === "*") { - return HttpResponse.json(files["*"]); - } - - // If there is multiple keys in files we will try and match the version const version = params.version as string; - if (version && files[version]) { - return HttpResponse.json(files[version]); + + const useWildcardOnly = Object.keys(files).length === 1 && Object.keys(files)[0] === "*"; + + // Prefer version-specific data; fall back to wildcard; then nothing + const filesData = useWildcardOnly + ? files["*"] + : files[version] || files["*"]; + + if (!filesData) { + return HttpResponse.json([]); } - return HttpResponse.json([]); + const { nodes, basePath } = normalizeFileTree(filesData); + const filesWithPaths = addPathsToFileNodes(nodes, version, basePath || undefined); + + return HttpResponse.json(omitContentRecursively(filesWithPaths)); } return HttpResponse.json(providedResponse); diff --git a/packages/test-utils/src/mock-store/handlers/files.ts b/packages/test-utils/src/mock-store/handlers/files.ts index 56acb19dd..f999da526 100644 --- a/packages/test-utils/src/mock-store/handlers/files.ts +++ b/packages/test-utils/src/mock-store/handlers/files.ts @@ -1,6 +1,19 @@ +import type { ApiError } from "@ucdjs/schemas"; +import type { MockStoreNodeWithPath } from "../types"; +import { findFileByPath } from "@ucdjs-internal/shared"; +import { + UCD_STAT_CHILDREN_DIRS_HEADER, + UCD_STAT_CHILDREN_FILES_HEADER, + UCD_STAT_CHILDREN_HEADER, + UCD_STAT_SIZE_HEADER, + UCD_STAT_TYPE_HEADER, +} from "@ucdjs/env"; import { HttpResponse } from "../../msw"; +import { addPathsToFileNodes } from "../add-paths"; import { defineMockRouteHandler } from "../define"; +import { omitChildrenAndContent } from "../utils"; +const DEFAULT_FILE_RESPONSE_CONTENT = "This is a default file response."; export const filesRoute = defineMockRouteHandler({ endpoint: "/api/v1/files/{wildcard}", setup: ({ @@ -8,18 +21,160 @@ export const filesRoute = defineMockRouteHandler({ providedResponse, mockFetch, shouldUseDefaultValue, + files, }) => { if (typeof providedResponse === "function") { mockFetch([ - ["GET", url, providedResponse], + [["GET", "HEAD"], url, providedResponse], ]); return; } mockFetch([ - ["GET", url, () => { + [["GET", "HEAD"], url, ({ request, params }) => { if (shouldUseDefaultValue) { - return HttpResponse.text("Default file content"); + const wildcard = (params.wildcard as string) || ""; + const isHeadRequest = request.method === "HEAD"; + + // Extract version and file path from wildcard (e.g., "16.0.0/ucd/UnicodeData.txt") + const [firstPart, ...pathParts] = wildcard.split("/"); + + // Check if the first part is a valid version key in files + const isVersionKey = firstPart && firstPart in files; + + let version = ""; + let filePath: string; + let versionFiles: MockStoreNodeWithPath[] = []; + let lookupPath: string; + + // Determine which files to use based on the request path + // If the first part is a version key, use that version's files + // Otherwise fall back to wildcard ("*") which serves as a default for all versions + // For root paths without a version prefix, use the "root" entry + if (isVersionKey || (firstPart && files["*"])) { + version = firstPart; + filePath = pathParts.join("/"); + const versionFilesRaw = files[firstPart] || files["*"]; + + if (!versionFilesRaw || !Array.isArray(versionFilesRaw)) { + return HttpResponse.text(DEFAULT_FILE_RESPONSE_CONTENT); + } + + // Build file tree with version-prefixed paths and "ucd" subdirectory (e.g., /16.0.0/ucd/file.txt) + versionFiles = addPathsToFileNodes(versionFilesRaw, version, "ucd"); + lookupPath = filePath ? `/${version}/${filePath}` : ""; + } else { + filePath = wildcard; + const rootFilesRaw = files.root; + + if (!rootFilesRaw || !Array.isArray(rootFilesRaw)) { + return HttpResponse.text(DEFAULT_FILE_RESPONSE_CONTENT); + } + + // Build file tree without version or subdirectory prefix (e.g., /test.txt) + versionFiles = addPathsToFileNodes(rootFilesRaw, "", ""); + lookupPath = filePath ? `/${filePath}` : ""; + } + + // If no specific path requested, return the listing of root-level files for this context + if (!lookupPath) { + // For versioned requests, expose the synthetic "ucd" directory at the top level + // so that listing "/{version}" yields just that folder. + if (version) { + const ucdDir: MockStoreNodeWithPath = { + type: "directory", + name: "ucd", + path: `/${version}/ucd`, + lastModified: 0, + children: [], + }; + const stripped = omitChildrenAndContent([ucdDir]); + return HttpResponse.json(stripped, { + headers: { + [UCD_STAT_TYPE_HEADER]: "directory", + [UCD_STAT_CHILDREN_HEADER]: "1", + [UCD_STAT_CHILDREN_FILES_HEADER]: "0", + [UCD_STAT_CHILDREN_DIRS_HEADER]: "1", + }, + }); + } + + const stripped = omitChildrenAndContent(versionFiles); + return HttpResponse.json(stripped, { + headers: { + [UCD_STAT_TYPE_HEADER]: "directory", + [UCD_STAT_CHILDREN_HEADER]: `${stripped.length}`, + [UCD_STAT_CHILDREN_FILES_HEADER]: `${stripped.filter((f) => f.type === "file").length}`, + [UCD_STAT_CHILDREN_DIRS_HEADER]: `${stripped.filter((f) => f.type === "directory").length}`, + }, + }); + } + + // Special-case the synthetic "ucd" directory: it's implicit in the generated paths + // for versioned requests, so listing "/{version}/ucd" should return the top-level files. + if (version && filePath === "ucd") { + const stripped = omitChildrenAndContent(versionFiles); + return HttpResponse.json(stripped, { + headers: { + [UCD_STAT_TYPE_HEADER]: "directory", + [UCD_STAT_CHILDREN_HEADER]: `${stripped.length}`, + [UCD_STAT_CHILDREN_FILES_HEADER]: `${stripped.filter((f) => f.type === "file").length}`, + [UCD_STAT_CHILDREN_DIRS_HEADER]: `${stripped.filter((f) => f.type === "directory").length}`, + }, + }); + } + + // Locate the requested file or directory within the tree + const fileNode = findFileByPath(versionFiles, lookupPath); + + // If it's a directory, return its children (or empty array) + if (fileNode && fileNode.type === "directory") { + const stripped = omitChildrenAndContent(fileNode.children ?? []); + return HttpResponse.json(stripped, { + headers: { + [UCD_STAT_TYPE_HEADER]: "directory", + [UCD_STAT_CHILDREN_HEADER]: `${stripped.length}`, + [UCD_STAT_CHILDREN_FILES_HEADER]: `${stripped.filter((f) => f.type === "file").length}`, + [UCD_STAT_CHILDREN_DIRS_HEADER]: `${stripped.filter((f) => f.type === "directory").length}`, + }, + }); + } + + // If it's a file with _content, return the content + if (fileNode && "_content" in fileNode && typeof fileNode._content === "string") { + let content = fileNode._content; + const contentLength = new TextEncoder().encode(content).length; + const headers: Record = { + "Content-Type": "text/plain; charset=utf-8", + [UCD_STAT_TYPE_HEADER]: "file", + }; + + // Check if the content ends with a newline; if not, add one for better terminal display + if (!content.endsWith("\n")) { + headers["X-Content-Warning"] = "Content did not end with a newline; added for display purposes."; + content += "\n"; + } + + // Only include size headers for HEAD requests (buffered response) + if (isHeadRequest) { + headers["Content-Length"] = `${contentLength}`; + headers[UCD_STAT_SIZE_HEADER] = `${contentLength}`; + } + + return HttpResponse.text(content, { headers }); + } + + // If file found but no _content, return the filename + if (fileNode) { + console.warn(`Mock store: File "${filePath}" found but has no _content. Returning default response content.`); + return HttpResponse.text(DEFAULT_FILE_RESPONSE_CONTENT); + } + + return HttpResponse.json({ + message: "Resource not found", + status: 404, + timestamp: new Date().toISOString(), + } satisfies ApiError, { status: 404 }); } if (providedResponse instanceof ArrayBuffer || providedResponse instanceof Uint8Array) { diff --git a/packages/test-utils/src/mock-store/handlers/well-known.ts b/packages/test-utils/src/mock-store/handlers/well-known.ts index cd5ee085c..e85c8dddb 100644 --- a/packages/test-utils/src/mock-store/handlers/well-known.ts +++ b/packages/test-utils/src/mock-store/handlers/well-known.ts @@ -1,5 +1,7 @@ import { flattenFilePaths } from "@ucdjs-internal/shared"; +import { hasUCDFolderPath } from "@unicode-utils/core"; import { HttpResponse } from "../../msw"; +import { addPathsToFileNodes } from "../add-paths"; import { defineMockRouteHandler } from "../define"; export const wellKnownConfig = defineMockRouteHandler({ @@ -77,15 +79,14 @@ export const wellKnownStoreVersionManifest = defineMockRouteHandler({ // just return the files object as is. if (Object.keys(files).length === 1 && Object.keys(files)[0] === "*") { return HttpResponse.json({ - expectedFiles: flattenFilePaths(files["*"]!), + expectedFiles: flattenFilePaths(addPathsToFileNodes(files["*"]!, version, hasUCDFolderPath(version) ? "ucd" : undefined)), }); } // If there is multiple keys in files we will try and match the version - const version = params.version as string; if (version && files[version]) { return HttpResponse.json({ - expectedFiles: flattenFilePaths(files[version]), + expectedFiles: flattenFilePaths(addPathsToFileNodes(files[version]!, version, hasUCDFolderPath(version) ? "ucd" : undefined)), }); } diff --git a/packages/test-utils/src/mock-store/index.ts b/packages/test-utils/src/mock-store/index.ts index 21f63a6f0..ad8c87e6f 100644 --- a/packages/test-utils/src/mock-store/index.ts +++ b/packages/test-utils/src/mock-store/index.ts @@ -3,7 +3,11 @@ import type { MockStoreConfig, MockStoreFiles } from "./types"; import { createDebugger, isApiError } from "@ucdjs-internal/shared"; import { HttpResponse } from "msw"; import { mockFetch } from "../msw"; +import { defaultArabicShapingFileContent } from "./default-files/arabic-shaping"; +import { defaultBidiBracketsFileContent } from "./default-files/bidi-brackets"; +import { defaultDerivedBidClassFileContent } from "./default-files/derived-bidi-class"; import { MOCK_ROUTES } from "./handlers"; + import { extractConfiguredMetadata, parseLatency, @@ -17,26 +21,25 @@ const DEFAULT_MOCK_STORE_FILES = { { type: "file", name: "ArabicShaping.txt", - path: "ArabicShaping.txt", lastModified: 1755287100000, + _content: defaultArabicShapingFileContent, }, { type: "file", name: "BidiBrackets.txt", - path: "BidiBrackets.txt", lastModified: 1755287100000, + _content: defaultBidiBracketsFileContent, }, { type: "directory", name: "extracted", - path: "extracted", lastModified: 1755287100000, children: [ { type: "file", name: "DerivedBidiClass.txt", - path: "extracted/DerivedBidiClass.txt", lastModified: 1755287100000, + _content: defaultDerivedBidClassFileContent, }, ], }, @@ -61,7 +64,7 @@ export function mockStoreApi(config?: MockStoreConfig): void { const endpoint = route.endpoint; // Every endpoint is optional, but by default enabled - const response = responses?.[endpoint as keyof typeof responses] ?? true; + const response = responses?.[endpoint as keyof typeof responses] ?? false; // If explicitly disabled, skip if (response === false) continue; @@ -167,4 +170,6 @@ function toMSWPath(endpoint: string): string { } export type { MockStoreConfig }; +export { createFileTree } from "./file-tree"; +export type { FileTreeInput, FileTreeNodeWithContent } from "./file-tree"; export { configure, unsafeResponse } from "./helpers"; diff --git a/packages/test-utils/src/mock-store/types.ts b/packages/test-utils/src/mock-store/types.ts index 485d077cb..0f3a6043b 100644 --- a/packages/test-utils/src/mock-store/types.ts +++ b/packages/test-utils/src/mock-store/types.ts @@ -1,6 +1,12 @@ import type { MockFetchFn } from "@luxass/msw-utils"; -import type { UnicodeFileTree } from "@ucdjs/schemas"; -import type { AsyncResponseResolverReturnType, DefaultBodyType, HttpResponseResolver, PathParams } from "msw"; +import type { Prettify } from "@luxass/utils"; +import type { UnicodeFileTreeNode } from "@ucdjs/schemas"; +import type { + AsyncResponseResolverReturnType, + DefaultBodyType, + HttpResponseResolver, + PathParams, +} from "msw"; import type { paths } from "../.generated/api"; import type { MOCK_ROUTES } from "./handlers"; import type { kConfiguredResponse } from "./helpers"; @@ -75,13 +81,21 @@ type DerivedResponses = Partial<{ }>; export type StoreVersionFileKey = "16.0.0" | "17.0.0"; -export type StoreFileKeyWildcard = "*"; +export type StoreFileKeyWildcard = "*" | "root"; type PartialRecord = { [P in K]?: T; }; -export type MockStoreFiles = PartialRecord; +type MockStoreKey = StoreVersionFileKey | StoreFileKeyWildcard | (string & {}); + +export type MockStoreNode = Prettify<(Omit & { type: "file"; path?: string; _content?: string }) + | (Omit & { type: "directory"; path?: string; _content?: string; children: MockStoreNode[] })>; + +export type MockStoreNodeWithPath = Prettify<(Omit & { type: "file"; _content?: string }) + | (Omit & { type: "directory"; _content?: string; children: MockStoreNodeWithPath[] })>; + +export type MockStoreFiles = PartialRecord; export interface MockStoreConfig { /** @@ -99,7 +113,7 @@ export interface MockStoreConfig { * If the value is `false`, then no handler will be used. * If the value provided is a specific response, then that response will be used. * - * By default, all endpoints will use the default handler. + * By default, all endpoints are disabled. */ responses?: DerivedResponses; diff --git a/packages/test-utils/src/mock-store/utils.ts b/packages/test-utils/src/mock-store/utils.ts index b310c9526..0c1242cc0 100644 --- a/packages/test-utils/src/mock-store/utils.ts +++ b/packages/test-utils/src/mock-store/utils.ts @@ -3,6 +3,7 @@ import type { HttpResponseResolver } from "msw"; import type { ConfiguredResponse, MockFetchType, + MockStoreNode, OnAfterMockFetchCallback, OnBeforeMockFetchCallback, OnRequestCallback, @@ -150,3 +151,32 @@ export function wrapMockFetch( return originalMockFetch(...args); }) as MockFetchFn; } + +export type DeepOmit = T extends object + ? T extends any[] + ? DeepOmit[] + : { + [P in Exclude]: DeepOmit; + } + : T; + +export function omitContentRecursively(nodes: T[]): DeepOmit[] { + return nodes.map((node) => { + if (node.type === "directory") { + return { + ...node, + children: omitContentRecursively(node.children), + } as DeepOmit; + } + + delete node._content; + return node as DeepOmit; + }); +} + +export function omitChildrenAndContent(nodes: T[]): Omit[] { + return nodes.map((node) => { + const { _content, children: _children, ...rest } = node as any; + return rest as Omit; + }); +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f6585fefe..b85744433 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -950,15 +950,24 @@ importers: '@luxass/msw-utils': specifier: catalog:prod version: 0.6.0(msw@2.12.4(@types/node@24.3.1)(typescript@5.9.3)) + '@luxass/utils': + specifier: catalog:prod + version: 2.7.2 '@ucdjs-internal/shared': specifier: workspace:* version: link:../shared + '@ucdjs/env': + specifier: workspace:* + version: link:../env '@ucdjs/fs-bridge': specifier: workspace:* version: link:../fs-bridge '@ucdjs/schemas': specifier: workspace:* version: link:../schemas + '@unicode-utils/core': + specifier: catalog:prod + version: 0.12.0-beta.18 msw: specifier: catalog:testing version: 2.12.4(@types/node@24.3.1)(typescript@5.9.3)