[vConnect/trunk/stand2.0] add: EncodingConverter
@@ -0,0 +1,50 @@ | ||
1 | +#ifndef TEST_EncodingConverterTest | |
2 | +#define TEST_EncodingConverterTest | |
3 | +#include <cppunit/extensions/HelperMacros.h> | |
4 | +#include "../EncodingConverter.h" | |
5 | + | |
6 | +using namespace vconnect; | |
7 | +using namespace std; | |
8 | + | |
9 | +class EncodingConverterTest : public CppUnit::TestFixture | |
10 | +{ | |
11 | +public: | |
12 | + void testConvert() | |
13 | + { | |
14 | + EncodingConverter shiftJISConverter( "Shift_JIS", "UTF-8" ); | |
15 | + string actual = shiftJISConverter.convert( getFixture( "fixture/EncodingConverter/shift_jis.txt" ) ); | |
16 | + string expected = "だ・い・じ・け・ん"; | |
17 | + CPPUNIT_ASSERT_EQUAL( expected, actual ); | |
18 | + | |
19 | + EncodingConverter utf16leConverter( "UTF-16LE", "UTF-8" ); | |
20 | + actual = utf16leConverter.convert( getFixture( "fixture/EncodingConverter/utf16le.txt" ) ); | |
21 | + expected = "社会復帰できなくなっちゃうよ"; | |
22 | + CPPUNIT_ASSERT_EQUAL( expected, actual ); | |
23 | + | |
24 | + EncodingConverter utf32leConverter( "UTF-32LE", "UTF-8" ); | |
25 | + actual = utf32leConverter.convert( getFixture( "fixture/EncodingConverter/utf32le.txt" ) ); | |
26 | + expected = "尾骶骨"; | |
27 | + CPPUNIT_ASSERT_EQUAL( expected, actual ); | |
28 | + } | |
29 | + | |
30 | + /** | |
31 | + * 指定されたパスに置かれたファイルを文字列に読み込む | |
32 | + * @param path 読み込むファイルのパス | |
33 | + * @return 読み込んだデータ | |
34 | + */ | |
35 | + string getFixture( string path ) | |
36 | + { | |
37 | + ifstream stream( path.c_str() ); | |
38 | + string result; | |
39 | + stream >> result; | |
40 | + stream.close(); | |
41 | + return result; | |
42 | + } | |
43 | + | |
44 | + CPPUNIT_TEST_SUITE( EncodingConverterTest ); | |
45 | + CPPUNIT_TEST( testConvert ); | |
46 | + CPPUNIT_TEST_SUITE_END(); | |
47 | +}; | |
48 | + | |
49 | +CPPUNIT_TEST_SUITE_REGISTRATION( EncodingConverterTest ); | |
50 | +#endif |
@@ -0,0 +1 @@ | ||
1 | +だ・い・じ・け・ん |
@@ -0,0 +1 @@ | ||
1 | +>yOゥ_0^g0M0j0O0j0c0a0?F0? | |
\ No newline at end of file |
@@ -0,0 +1,24 @@ | ||
1 | +#include <cppunit/TestRunner.h> | |
2 | +#include <cppunit/TestResult.h> | |
3 | +#include <cppunit/TestResultCollector.h> | |
4 | +#include <cppunit/extensions/HelperMacros.h> | |
5 | +#include <cppunit/BriefTestProgressListener.h> | |
6 | +#include <cppunit/extensions/TestFactoryRegistry.h> | |
7 | + | |
8 | +#include "EncodingConverterTest.h" | |
9 | + | |
10 | +int main( int argc, char* argv[] ) | |
11 | +{ | |
12 | + CppUnit::TestResult controller; | |
13 | + CppUnit::TestResultCollector results; | |
14 | + controller.addListener( &results ); | |
15 | + | |
16 | + CppUnit::BriefTestProgressListener progress; | |
17 | + controller.addListener( &progress ); | |
18 | + | |
19 | + CppUnit::TestRunner runner; | |
20 | + runner.addTest( CppUnit::TestFactoryRegistry::getRegistry().makeTest() ); | |
21 | + runner.run( controller ); | |
22 | + | |
23 | + return results.wasSuccessful() ? 0 : 1; | |
24 | +} |
@@ -0,0 +1,3 @@ | ||
1 | +all: *.cpp *.h | |
2 | + g++ AllTests.cpp ../../libiconv-1.13/lib/*.o -lcppunit -o run | |
3 | + ./run |
@@ -0,0 +1,154 @@ | ||
1 | +#include "stand.h" | |
2 | + | |
3 | +using namespace std; | |
4 | + | |
5 | +namespace vconnect | |
6 | +{ | |
7 | + class EncodingConverter | |
8 | + { | |
9 | + private: | |
10 | + iconv_t converter; | |
11 | + | |
12 | + public: | |
13 | + EncodingConverter( const char *from, const char *to ) | |
14 | + { | |
15 | + this->converter = iconv_open( to, from ); | |
16 | + } | |
17 | + | |
18 | + ~EncodingConverter() | |
19 | + { | |
20 | + iconv_close( this->converter ); | |
21 | + } | |
22 | + | |
23 | + string convert( string source ) | |
24 | + { | |
25 | + string result; | |
26 | + char *input = const_cast<char *>( source.c_str() ); | |
27 | + size_t remainingInputBytes = source.size(); | |
28 | + | |
29 | + char *buffer = new char[remainingInputBytes + 1]; | |
30 | + char *output = buffer; | |
31 | + size_t remainingOutputBytes = remainingInputBytes; | |
32 | + size_t outputBytes = remainingInputBytes; | |
33 | + | |
34 | + while( remainingInputBytes > 0 ){ | |
35 | + char *originalInput = input; | |
36 | + size_t n = iconv( this->converter, &input, &remainingInputBytes, &output, &remainingOutputBytes ); | |
37 | + if( (n != (size_t) - 1 && remainingInputBytes == 0) || (errno == EINVAL) ){ | |
38 | + remainingInputBytes = 0; | |
39 | + result.append( buffer, 0, outputBytes - remainingOutputBytes ); | |
40 | + }else{ | |
41 | + switch( errno ){ | |
42 | + case E2BIG:{ | |
43 | + result.append( buffer, 0, outputBytes - remainingOutputBytes ); | |
44 | + output = buffer; | |
45 | + remainingOutputBytes = outputBytes; | |
46 | + break; | |
47 | + } | |
48 | + case EILSEQ:{ | |
49 | + result.append( buffer, 0, outputBytes - remainingOutputBytes ); | |
50 | + result.append( input, 0, 1 ); | |
51 | + input++; | |
52 | + remainingInputBytes--; | |
53 | + output = buffer; | |
54 | + remainingOutputBytes = outputBytes; | |
55 | + break; | |
56 | + } | |
57 | + default:{ | |
58 | + result.append( originalInput ); | |
59 | + remainingInputBytes = 0; | |
60 | + break; | |
61 | + } | |
62 | + } | |
63 | + } | |
64 | + } | |
65 | + output = buffer; | |
66 | + remainingOutputBytes = outputBytes; | |
67 | + if( iconv( this->converter , NULL, NULL, &output, &remainingOutputBytes ) != (size_t) - 1 ){ | |
68 | + result.append( buffer, 0, outputBytes - remainingOutputBytes ); | |
69 | + } | |
70 | + delete [] buffer; | |
71 | + return result; | |
72 | + } | |
73 | + | |
74 | + private: | |
75 | + EncodingConverter(); | |
76 | + | |
77 | + static const char *getCharsetFromCodepage( unsigned int codepage ) | |
78 | + { | |
79 | + switch( codepage ){ | |
80 | + case 932:{ | |
81 | + return "Shift_JIS"; | |
82 | + } | |
83 | + case 51932: { | |
84 | + return "euc-jp"; | |
85 | + } | |
86 | + case 50220: { | |
87 | + return "iso-2022-jp"; | |
88 | + } | |
89 | + case 1208: | |
90 | + case 1209: { | |
91 | + return "UTF-8"; | |
92 | + } | |
93 | + case 1202: | |
94 | + case 1203: { | |
95 | + return "UTF-16LE"; | |
96 | + } | |
97 | + case 1200: | |
98 | + case 1201: { | |
99 | + return "UTF-16BE"; | |
100 | + } | |
101 | + case 1204: | |
102 | + case 1205: { | |
103 | + return "UTF-16"; | |
104 | + } | |
105 | + case 1234: | |
106 | + case 1235: { | |
107 | + return "UTF-32LE"; | |
108 | + } | |
109 | + case 1232: | |
110 | + case 1233: { | |
111 | + return "UTF-32BE"; | |
112 | + } | |
113 | + case 1236: | |
114 | + case 1237: { | |
115 | + return "UTF-32"; | |
116 | + } | |
117 | + } | |
118 | + return ""; | |
119 | + } | |
120 | + | |
121 | + static bool isValidConverter( iconv_t converter ) | |
122 | + { | |
123 | + iconv_t invalid = (iconv_t) - 1; | |
124 | + return (converter == invalid) ? false : true; | |
125 | + } | |
126 | + | |
127 | + static bool isValidEncoding( unsigned int codepage ) | |
128 | + { | |
129 | + // まずUTF-8が有効かどうか | |
130 | + iconv_t cnv = iconv_open( "UTF-8", "UTF-8" ); | |
131 | + if( false == isValidConverter( cnv ) ){ | |
132 | + iconv_close( cnv ); | |
133 | + return false; | |
134 | + } | |
135 | + iconv_close( cnv ); | |
136 | + | |
137 | + const char *charset_name = getCharsetFromCodepage( codepage ); | |
138 | + iconv_t cnv2 = iconv_open( "UTF-8", charset_name ); | |
139 | + if( false == isValidConverter( cnv2 ) ){ | |
140 | + iconv_close( cnv2 ); | |
141 | + return false; | |
142 | + } | |
143 | + iconv_close( cnv2 ); | |
144 | + | |
145 | + iconv_t cnv3 = iconv_open( charset_name, "UTF-8" ); | |
146 | + if( false == isValidConverter( cnv3 ) ){ | |
147 | + iconv_close( cnv3 ); | |
148 | + return false; | |
149 | + } | |
150 | + iconv_close( cnv3 ); | |
151 | + return true; | |
152 | + } | |
153 | + }; | |
154 | +} |