[Groonga-commit] groonga/groonga at 17085d8 [master] NormalizeNFKC100: support unify_to_romaji from katakana

アーカイブの一覧に戻る
Kouhei Sutou null+****@clear*****
Wed Nov 7 15:59:23 JST 2018


Kouhei Sutou	2018-11-07 15:59:23 +0900 (Wed, 07 Nov 2018)

  Revision: 17085d8cad78105891089c27dcc9665aaf6e4387
  https://github.com/groonga/groonga/commit/17085d8cad78105891089c27dcc9665aaf6e4387

  Message:
    NormalizeNFKC100: support unify_to_romaji from katakana

  Added files:
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.test
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.expected
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.test
  Copied files:
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.expected
      (from test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected)
    test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.expected
      (from test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected)
  Modified files:
    lib/romaji.c
    test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected

  Modified: lib/romaji.c (+7 -6)
===================================================================
--- lib/romaji.c    2018-11-07 15:25:55 +0900 (75e79deca)
+++ lib/romaji.c    2018-11-07 15:59:23 +0900 (5270bf725)
@@ -62,7 +62,7 @@ grn_romaji_convert_hepburn(grn_ctx *ctx,
                   next[2] == 0xa5 || /* U+30E5 KATAKANA LETTER SMALL YU */
                   next[2] == 0xa7)) { /* U+30E7 KATAKANA LETTER SMALL YO */
         next_small_y = GRN_TRUE;
-        next_small_yayuyo = auo[(next[2] - 3) % 5];
+        next_small_yayuyo = aiueo[(next[2] - 3) % 5];
       } else if (next[0] == 0xe3 &&
                  ((next[1] == 0x81 &&
                    (next[2] == 0xb0 || /* U+3070 HIRAGANA LETTER BA */
@@ -244,8 +244,8 @@ grn_romaji_convert_hepburn(grn_ctx *ctx,
           if (0x80 <= next[2] && next[2] <= 0x89) {
             /* U+30C0 KATAKANA LETTER DA ..
              * U+30C9 KATAKANA LETTER DO */
-            const char *tdtjxtztdtd = "tdtjxtztdtd";
-            next_consonant = tdtjxtztdtd[next[2] - 0x80];
+            const char *dtjxtztdtd = "dtjxtztdtd";
+            next_consonant = dtjxtztdtd[next[2] - 0x80];
           } else if (0x8a <= next[2] && next[2] <= 0x8e) {
             /* U+30CA KATAKANA LETTER NA ..
              * U+30CE KATAKANA LETTER NO */
@@ -557,7 +557,7 @@ grn_romaji_convert_hepburn(grn_ctx *ctx,
             /* U+30B7 KATAKANA LETTER SI */
             buffer[(*n_bytes)++] = 's';
             buffer[(*n_bytes)++] = 'h';
-          } else if (current[2] == 0x98) {
+          } else if (current[2] == 0xb8) {
             /* U+30B8 KATAKANA LETTER ZI */
             buffer[(*n_bytes)++] = 'j';
           } else {
@@ -602,6 +602,7 @@ grn_romaji_convert_hepburn(grn_ctx *ctx,
             /* U+30C2 KATAKANA LETTER DI */
             buffer[(*n_bytes)++] = 'j';
           } else if (current[2] == 0x83) {
+            /* U+30C3 KATAKANA LETTER SMALL TU */
             buffer[(*n_bytes)++] = 'x';
             buffer[(*n_bytes)++] = 't';
             buffer[(*n_bytes)++] = 's';
@@ -610,8 +611,8 @@ grn_romaji_convert_hepburn(grn_ctx *ctx,
             buffer[(*n_bytes)++] = 't';
             buffer[(*n_bytes)++] = 's';
           } else {
-            const char *td_____tdtd = "td_____tdtd";
-            buffer[(*n_bytes)++] = td_____tdtd[current[2] - 0x80];
+            const char *d____ztdtd = "d____ztdtd";
+            buffer[(*n_bytes)++] = d____ztdtd[current[2] - 0x80];
           }
           buffer[(*n_bytes)++] = aiiuuueeoo[current[2] - 0x80];
         }

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.expected (+62 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.expected    2018-11-07 15:59:23 +0900 (29b479993)
@@ -0,0 +1,62 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "アァイィウゥエェオォ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "axaixiuxuexeoxo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      3,
+      -1,
+      3,
+      3,
+      -1,
+      3,
+      3,
+      -1,
+      3,
+      3,
+      -1,
+      3,
+      3,
+      -1
+    ],
+    "offsets": [
+      0,
+      3,
+      3,
+      6,
+      9,
+      9,
+      12,
+      15,
+      15,
+      18,
+      21,
+      21,
+      24,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/a.test    2018-11-07 15:59:23 +0900 (92a1152d5)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "アァイィウゥエェオォ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.expected    2018-11-07 15:59:23 +0900 (37e040a74)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "バビブベボビャビュビョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "babibubebobyabyubyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ba.test    2018-11-07 15:59:23 +0900 (02ea757eb)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "バビブベボビャビュビョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.expected (+65 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.expected    2018-11-07 15:59:23 +0900 (530f43242)
@@ -0,0 +1,65 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ダヂヅデドヂャヂュヂョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "dajizudedojajujo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      6,
+      -1,
+      6,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      21,
+      21,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/da.test    2018-11-07 15:59:23 +0900 (0120d3600)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ダヂヅデドヂャヂュヂョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.expected (+41 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.expected    2018-11-07 15:59:23 +0900 (959573377)
@@ -0,0 +1,41 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ヴゕゖ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "vuxkaxke",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      -1,
+      3,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      3,
+      6,
+      6,
+      6
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/etc.test    2018-11-07 15:59:23 +0900 (61196e28b)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ヴゕゖ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.expected    2018-11-07 15:59:23 +0900 (59cce624d)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ガギグゲゴギャギュギョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "gagigugegogyagyugyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ga.test    2018-11-07 15:59:23 +0900 (97da668a0)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ガギグゲゴギャギュギョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.expected    2018-11-07 15:59:23 +0900 (e4d38086f)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ハヒフヘホヒャヒュヒョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "hahihuhehohyahyuhyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ha.test    2018-11-07 15:59:23 +0900 (635e0b896)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ハヒフヘホヒャヒュヒョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.expected    2018-11-07 15:59:23 +0900 (121fbd658)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "カキクケコキャキュキョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "kakikukekokyakyukyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ka.test    2018-11-07 15:59:23 +0900 (6d90372a0)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "カキクケコキャキュキョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.expected    2018-11-07 15:59:23 +0900 (60915283b)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "マミムメモミャミュミョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "mamimumemomyamyumyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ma.test    2018-11-07 15:59:23 +0900 (65747c562)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "マミムメモミャミュミョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.expected (+65 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.expected    2018-11-07 15:59:23 +0900 (582b98bb2)
@@ -0,0 +1,65 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ンパンバンマンアンヤ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "mpambamman-an-ya",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "symbol",
+      "alpha",
+      "alpha",
+      "symbol",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      3,
+      -1,
+      3,
+      3,
+      -1,
+      3,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      3,
+      -1,
+      3,
+      -1
+    ],
+    "offsets": [
+      0,
+      3,
+      3,
+      6,
+      9,
+      9,
+      12,
+      15,
+      15,
+      18,
+      18,
+      21,
+      24,
+      24,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/n.test    2018-11-07 15:59:23 +0900 (6ab4df257)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ンパンバンマンアンヤ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.expected    2018-11-07 15:59:23 +0900 (086a22cbe)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ナニヌネノニャニュニョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "naninunenonyanyunyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/na.test    2018-11-07 15:59:23 +0900 (a1bf5b6e3)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ナニヌネノニャニュニョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.expected    2018-11-07 15:59:23 +0900 (3b01567e7)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "パピプペポピャピュピョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "papipupepopyapyupyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/pa.test    2018-11-07 15:59:23 +0900 (8542c3bd9)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "パピプペポピャピュピョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.expected (+74 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.expected    2018-11-07 15:59:23 +0900 (895e66df8)
@@ -0,0 +1,74 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ラリルレロリャリュリョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "rarirureroryaryuryo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      15,
+      21,
+      21,
+      21,
+      27,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ra.test    2018-11-07 15:59:23 +0900 (1529d78b2)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ラリルレロリャリュリョ" \
+  WITH_CHECKS|WITH_TYPES

  Copied: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.expected (+14 -55) 50%
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected    2018-11-07 15:25:55 +0900 (67969a6ef)
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.expected    2018-11-07 15:59:23 +0900 (2970e7a02)
@@ -1,4 +1,4 @@
-normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "あイウェおざジたチなニぱピまミヽヾ漢字"   WITH_CHECKS|WITH_TYPES
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "サシスセソシャシュショ"   WITH_CHECKS|WITH_TYPES
 [
   [
     0,
@@ -6,7 +6,7 @@ normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "re
     0.0
   ],
   {
-    "normalized": "aiuxeozazitachinanipapimamiヽヾ漢字",
+    "normalized": "sashisusesoshashusho",
     "types": [
       "alpha",
       "alpha",
@@ -27,92 +27,51 @@ normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "re
       "alpha",
       "alpha",
       "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "katakana",
-      "katakana",
-      "kanji",
-      "kanji"
+      "alpha"
     ],
     "checks": [
       3,
-      3,
-      3,
-      3,
       -1,
       3,
-      3,
       -1,
-      3,
       -1,
       3,
       -1,
       3,
       -1,
-      -1,
       3,
       -1,
-      3,
+      6,
       -1,
-      3,
       -1,
-      3,
+      6,
       -1,
-      3,
       -1,
-      3,
+      6,
       -1,
-      3,
+      -1
+    ],
+    "offsets": [
       0,
       0,
       3,
-      0,
-      0,
       3,
-      0,
-      0,
-      3,
-      0,
-      0
-    ],
-    "offsets": [
-      0,
       3,
       6,
+      6,
       9,
       9,
       12,
+      12,
       15,
       15,
-      18,
-      18,
+      15,
+      21,
       21,
       21,
-      24,
-      24,
-      24,
       27,
       27,
-      30,
-      30,
-      33,
-      33,
-      36,
-      36,
-      39,
-      39,
-      42,
-      42,
-      45,
-      48,
-      51,
-      54
+      27
     ]
   }
 ]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/sa.test    2018-11-07 15:59:23 +0900 (edeb01007)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "サシスセソシャシュショ" \
+  WITH_CHECKS|WITH_TYPES

  Copied: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.expected (+15 -50) 55%
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected    2018-11-07 15:25:55 +0900 (67969a6ef)
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.expected    2018-11-07 15:59:23 +0900 (b4e0a58a5)
@@ -1,4 +1,4 @@
-normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "あイウェおざジたチなニぱピまミヽヾ漢字"   WITH_CHECKS|WITH_TYPES
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "タチツテトッチャチュチョ"   WITH_CHECKS|WITH_TYPES
 [
   [
     0,
@@ -6,7 +6,7 @@ normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "re
     0.0
   ],
   {
-    "normalized": "aiuxeozazitachinanipapimamiヽヾ漢字",
+    "normalized": "tachitsutetotchachucho",
     "types": [
       "alpha",
       "alpha",
@@ -29,29 +29,13 @@ normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "re
       "alpha",
       "alpha",
       "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "alpha",
-      "katakana",
-      "katakana",
-      "kanji",
-      "kanji"
+      "alpha"
     ],
     "checks": [
       3,
-      3,
-      3,
-      3,
       -1,
       3,
-      3,
       -1,
-      3,
-      -1,
-      3,
       -1,
       3,
       -1,
@@ -61,58 +45,39 @@ normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "re
       3,
       -1,
       3,
+      6,
       -1,
-      3,
       -1,
-      3,
+      6,
       -1,
-      3,
       -1,
-      3,
-      0,
-      0,
-      3,
+      6,
+      -1,
+      -1
+    ],
+    "offsets": [
       0,
       0,
       3,
-      0,
-      0,
       3,
-      0,
-      0
-    ],
-    "offsets": [
-      0,
       3,
       6,
+      6,
+      6,
       9,
       9,
       12,
+      12,
       15,
-      15,
       18,
       18,
-      21,
-      21,
+      18,
       24,
       24,
       24,
-      27,
-      27,
       30,
       30,
-      33,
-      33,
-      36,
-      36,
-      39,
-      39,
-      42,
-      42,
-      45,
-      48,
-      51,
-      54
+      30
     ]
   }
 ]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ta.test    2018-11-07 15:59:23 +0900 (44b9c8f1d)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "タチツテトッチャチュチョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.expected (+53 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.expected    2018-11-07 15:59:23 +0900 (cfa6b9a80)
@@ -0,0 +1,53 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ヮワヰヱヲン"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "xwawawiwewon",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3
+    ],
+    "offsets": [
+      0,
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/wa.test    2018-11-07 15:59:23 +0900 (9bfab8337)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ヮワヰヱヲン" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.expected (+62 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.expected    2018-11-07 15:59:23 +0900 (811449dc2)
@@ -0,0 +1,62 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ヤユヨャュョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "yayuyoxyaxyuxyo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      -1,
+      3,
+      -1,
+      -1,
+      3,
+      -1,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      9,
+      12,
+      12,
+      12,
+      15,
+      15,
+      15
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/ya.test    2018-11-07 15:59:23 +0900 (d9d065cba)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ヤユヨャュョ" \
+  WITH_CHECKS|WITH_TYPES

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.expected (+65 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.expected    2018-11-07 15:59:23 +0900 (ae1c4bc8a)
@@ -0,0 +1,65 @@
+normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "report_source_offset", true)'   "ザジズゼゾジャジュジョ"   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "zajizuzezojajujo",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha"
+    ],
+    "checks": [
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      3,
+      -1,
+      6,
+      -1,
+      6,
+      -1,
+      6,
+      -1
+    ],
+    "offsets": [
+      0,
+      0,
+      3,
+      3,
+      6,
+      6,
+      9,
+      9,
+      12,
+      12,
+      15,
+      15,
+      21,
+      21,
+      27,
+      27
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/katakana/za.test    2018-11-07 15:59:23 +0900 (2143bdba8)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("unify_to_romaji", true, \
+                     "report_source_offset", true)' \
+  "ザジズゼゾジャジュジョ" \
+  WITH_CHECKS|WITH_TYPES

  Modified: test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected (+1 -1)
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected    2018-11-07 15:25:55 +0900 (67969a6ef)
+++ test/command/suite/normalizers/nfkc100/unify_to_romaji/mixed.expected    2018-11-07 15:59:23 +0900 (8fdba3329)
@@ -6,7 +6,7 @@ normalize   'NormalizerNFKC100("unify_to_romaji", true,                      "re
     0.0
   ],
   {
-    "normalized": "aiuxeozazitachinanipapimamiヽヾ漢字",
+    "normalized": "aiuxeozajitachinanipapimamiヽヾ漢字",
     "types": [
       "alpha",
       "alpha",
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181107/da8e7199/attachment-0001.html>


More information about the Groonga-commit mailing list
アーカイブの一覧に戻る