• R/O
  • SSH
  • HTTPS

qrobosdk: コミット


コミットメタ情報

リビジョン1785 (tree)
日時2010-04-17 23:34:28
作者satofumi

ログメッセージ

テストを実装

変更サマリ

差分

--- trunk/libs/input/TestConvertToJp.cpp (revision 1784)
+++ trunk/libs/input/TestConvertToJp.cpp (revision 1785)
@@ -9,6 +9,7 @@
99
1010 #include "TestConvertToJp.h"
1111 #include "convertToJp.h"
12+#include "Utf8.h"
1213
1314 using namespace qrk;
1415 using namespace std;
@@ -20,13 +21,46 @@
2021 // ローマ字変換のテスト
2122 void TestConvertToJp::romanConvertTest(void)
2223 {
23- CPPUNIT_ASSERT_EQUAL(string("あ"), convertToJp("a"));
24- CPPUNIT_ASSERT_EQUAL(string("あさ"), convertToJp("asa"));
25- CPPUNIT_ASSERT_EQUAL(string("あsy"), convertToJp("asy"));
26- CPPUNIT_ASSERT_EQUAL(string("あしゃ"), convertToJp("asya"));
27- CPPUNIT_ASSERT_EQUAL(string("ぁ"), convertToJp("xa"));
28- CPPUNIT_ASSERT_EQUAL(string("cば"), convertToJp("cba"));
29- CPPUNIT_ASSERT_EQUAL(string("cか"), convertToJp("kka"));
30- CPPUNIT_ASSERT_EQUAL(string("し"), convertToJp("shi"));
31- CPPUNIT_ASSERT_EQUAL(string("し"), convertToJp("si"));
24+ Utf8 actual = Utf8(convertToJp("a"));
25+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), actual.size());
26+ CPPUNIT_ASSERT_EQUAL(Utf8("あ")[0], actual[0]);
27+
28+ actual = Utf8(convertToJp("asa"));
29+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
30+ CPPUNIT_ASSERT_EQUAL(Utf8("あ")[0], actual[0]);
31+ CPPUNIT_ASSERT_EQUAL(Utf8("さ")[0], actual[1]);
32+
33+ actual = Utf8(convertToJp("asy"));
34+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
35+ CPPUNIT_ASSERT_EQUAL(Utf8("あ")[0], actual[0]);
36+ CPPUNIT_ASSERT_EQUAL(Utf8("s")[0], actual[1]);
37+ CPPUNIT_ASSERT_EQUAL(Utf8("y")[0], actual[2]);
38+
39+ actual = Utf8(convertToJp("asya"));
40+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
41+ CPPUNIT_ASSERT_EQUAL(Utf8("あ")[0], actual[0]);
42+ CPPUNIT_ASSERT_EQUAL(Utf8("し")[0], actual[1]);
43+ CPPUNIT_ASSERT_EQUAL(Utf8("ゃ")[0], actual[2]);
44+
45+ actual = Utf8(convertToJp("xa"));
46+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), actual.size());
47+ CPPUNIT_ASSERT_EQUAL(Utf8("ぁ")[0], actual[0]);
48+
49+ actual = Utf8(convertToJp("cba"));
50+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
51+ CPPUNIT_ASSERT_EQUAL(Utf8("c")[0], actual[0]);
52+ CPPUNIT_ASSERT_EQUAL(Utf8("ば")[0], actual[1]);
53+
54+ actual = Utf8(convertToJp("kka"));
55+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
56+ CPPUNIT_ASSERT_EQUAL(Utf8("c")[0], actual[0]);
57+ CPPUNIT_ASSERT_EQUAL(Utf8("か")[0], actual[1]);
58+
59+ actual = Utf8(convertToJp("shi"));
60+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), actual.size());
61+ CPPUNIT_ASSERT_EQUAL(Utf8("し")[0], actual[0]);
62+
63+ actual = Utf8(convertToJp("si"));
64+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), actual.size());
65+ CPPUNIT_ASSERT_EQUAL(Utf8("し")[0], actual[0]);
3266 }
--- trunk/libs/input/convertToJp.cpp (revision 1784)
+++ trunk/libs/input/convertToJp.cpp (revision 1785)
@@ -10,14 +10,89 @@
1010 #include "convertToJp.h"
1111 #include "roman_table.h"
1212 #include "kana_table.h"
13+#include <vector>
14+#include <cstring>
1315
1416 using namespace qrk;
17+using namespace std;
1518
1619
17-std::string qrk::convertToJp(const char* raw_text)
20+namespace
1821 {
19- (void)raw_text;
20- // !!!
22+ // !!! convertToRoman() 内の関数と共通にすべき
23+ size_t short_strlen(const unsigned short* text)
24+ {
25+ size_t size = 0;
26+ while (text[size] != 0x0) {
27+ ++size;
28+ }
29+ return size;
30+ }
31+}
2132
22- return "";
33+
34+std::string qrk::convertToJp(const char* raw_text, bool is_roman)
35+{
36+ //unsigned short[][2][ROMAN_CONVERT_SIZE_MAX] table
37+ const unsigned short* table =
38+ (is_roman) ? &RomanTable[0][0][0] : &KanaTable[0][0][0];
39+ const int pattern_max_size = (is_roman) ?
40+ static_cast<int>(ROMAN_CONVERT_SIZE_MAX) : KANA_CONVERT_SIZE_MAX;
41+
42+ vector<unsigned short> converted;
43+ size_t raw_text_size = strlen(raw_text);
44+ // !!! copy() を使うべき
45+ for (size_t i = 0; i < raw_text_size; ++i) {
46+ converted.push_back(raw_text[i]);
47+ }
48+
49+ // 先頭文字から比較と変換を行う
50+ for (size_t match_start = 0; match_start < converted.size(); ++match_start) {
51+ for (const unsigned short* pattern = table;
52+ *pattern != 0x0; pattern += pattern_max_size) {
53+
54+ bool matched = true;
55+ int match_index = 0;
56+ for (; *(pattern + match_index) != 0x0; ++match_index) {
57+
58+ unsigned short ch = *(table + match_index);
59+ if (ch != converted[match_start + match_index]) {
60+ matched = false;
61+ break;
62+ }
63+ }
64+
65+ // パターンが一致していたら、置換する
66+ if (matched) {
67+ // 変換後にデータが縮む場合は、あらかじめ縮小しておく
68+ size_t matched_size = match_index;
69+ size_t replaced_size = short_strlen(pattern + pattern_max_size);
70+
71+ vector<unsigned short>::iterator it = converted.begin() + match_start;
72+ for (size_t i = 0; i < (matched_size - replaced_size); ++i) {
73+ it = converted.erase(it);
74+ }
75+
76+ // 置換する
77+ for (size_t i = 0; i < replaced_size; ++i) {
78+ converted[match_start + i] = *(pattern + pattern_max_size + i);
79+ }
80+ break;
81+ }
82+ }
83+ }
84+
85+ string converted_string;
86+ size_t n = converted.size();
87+ for (size_t i = 0; i < n; ++i) {
88+ const unsigned short ch = converted[i];
89+ if (ch & 0xff00) {
90+ converted_string.push_back(0xe3);
91+ converted_string.push_back((ch >> 8) & 0xff);
92+ converted_string.push_back(ch & 0xff);
93+ } else {
94+ converted_string.push_back(ch);
95+ }
96+ }
97+ return converted_string;
2398 }
--- trunk/libs/input/convertToRoman.cpp (revision 1784)
+++ trunk/libs/input/convertToRoman.cpp (revision 1785)
@@ -33,13 +33,11 @@
3333 Utf8 input(jp_text);
3434 size_t input_size = input.size();
3535
36- //fprintf(stderr, "\n");
3736 vector<unsigned short> converted;
37+ // !!! copy() を使うべき
3838 for (size_t i = 0; i < input_size; ++i) {
39- //fprintf(stderr, " %x[%d],", input[i], i);
4039 converted.push_back(input[i]);
4140 }
42- //fprintf(stderr, "\n");
4341
4442 for (int match_start = converted.size() - 1;
4543 match_start >= 0; --match_start) {
@@ -46,8 +44,8 @@
4644 for (int pattern_index = 0;
4745 RomanTable[pattern_index][1][0] != 0x0; ++pattern_index) {
4846
47+ bool matched = true;
4948 int match_index = 0;
50- bool matched = true;
5149 for (; RomanTable[pattern_index][1][match_index] != 0x0; ++match_index) {
5250 unsigned short ch = RomanTable[pattern_index][1][match_index];
5351 if (ch != converted[match_start + match_index]) {
@@ -54,21 +52,14 @@
5452 matched = false;
5553 break;
5654 }
57-
58- //fprintf(stderr, " [(%d, %d, %d), (%d), %x, %x],", match_start, pattern_index, match_index, match_start + match_index, ch, converted[match_start + match_index]);
5955 }
60- //fprintf(stderr, "\n");
6156
6257 // パターンが一致していたら、置換する
6358 if (matched) {
64-
59+ // nba -> んば のように置換後に伸びる場合は、置換前に伸長しておく
6560 size_t matched_size = match_index;
66- //fprintf(stderr, " matched !\n");
67- // nba -> んば のようにパターンが伸びる場合は、置換前に伸長しておく
6861 size_t replaced_size = short_strlen(&RomanTable[pattern_index][0][0]);
6962
70- //fprintf(stderr, "[%d, %d], ", matched_size, replaced_size);
71-
7263 vector<unsigned short>::iterator it = converted.begin() + match_start;
7364 for (size_t i = 0; i < (replaced_size - matched_size); ++i) {
7465 converted.insert(it, 0x0);
@@ -78,16 +69,7 @@
7869 for (size_t i = 0; i < replaced_size; ++i) {
7970 converted[match_start + i] = RomanTable[pattern_index][0][i];
8071 }
81-
82- string converted_string;
83- size_t n = converted.size();
84- for (size_t i = 0; i < n; ++i) {
85- converted_string.push_back(converted[i]);
86- }
87- //fprintf(stderr, "[%s]", converted_string.c_str());
88-
89- // nba -> んば、といった変換用のために、1文字分戻る
90- // !!!
72+ break;
9173 }
9274 }
9375 }
--- trunk/libs/input/TestConvertToJp.h (revision 1784)
+++ trunk/libs/input/TestConvertToJp.h (revision 1785)
@@ -9,6 +9,7 @@
99
1010 $Id$
1111
12+ \todo 実装する
1213 \todo かな入力のテストを追加する
1314 */
1415
--- trunk/libs/input/convertToJp.h (revision 1784)
+++ trunk/libs/input/convertToJp.h (revision 1785)
@@ -16,7 +16,7 @@
1616 namespace qrk
1717 {
1818 //! 日本語への変換
19- extern std::string convertToJp(const char* raw_text);
19+ extern std::string convertToJp(const char* raw_text, bool is_roman = true);
2020 }
2121
2222 #endif /* !QRK_CONVERT_TO_JP_H */
旧リポジトリブラウザで表示