Revision: 9207 https://osdn.net/projects/ttssh2/scm/svn/commits/9207 Author: nmaya Date: 2021-04-13 07:53:32 +0900 (Tue, 13 Apr 2021) Log Message: ----------- 文字コード関連のスクリプトを teraterm/teraterm/unicode に移動・説明ファイルを追加 branches/move_code_script から trunk へのマージ r9189 相当 Revision Links: -------------- https://osdn.net/projects/ttssh2/scm/svn/commits/9189 Added Paths: ----------- branches/4-stable/teraterm/teraterm/unicode/ branches/4-stable/teraterm/teraterm/unicode/conv_combining.md branches/4-stable/teraterm/teraterm/unicode/conv_combining.pl branches/4-stable/teraterm/teraterm/unicode/conv_sjis.md branches/4-stable/teraterm/teraterm/unicode/conv_sjis.pl branches/4-stable/teraterm/teraterm/unicode/conv_uni.md branches/4-stable/teraterm/teraterm/unicode/conv_uni.pl branches/4-stable/teraterm/teraterm/unicode/readme.md Removed Paths: ------------- branches/4-stable/installer/conv.pl branches/4-stable/installer/hfs_conv.pl branches/4-stable/installer/rev_conv.pl -------------- next part -------------- Deleted: branches/4-stable/installer/conv.pl =================================================================== --- branches/4-stable/installer/conv.pl 2021-04-12 14:16:18 UTC (rev 9206) +++ branches/4-stable/installer/conv.pl 2021-04-12 22:53:32 UTC (rev 9207) @@ -1,34 +0,0 @@ - -# -# Unicode\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ď\xB8\x8F\x87\x82Ƀ\\x81[\x83g\x82\xB7\x82\xE9 -# - -$file = 'SHIFTJIS_TXT.htm'; - -&read_mapfile($file); -exit(); - -sub read_mapfile { - my($file) = @_; - my(%table, $val, $key); - - open(FP, $file) || die "error"; - while ($line = <FP>) { - if ($line =~ /^\#/) {next;} - if ($line =~ m+^\/+) {next;} - if ($line =~ m+^\<+) {next;} - $line =~ s/^\s+//; - @column = split(/\s+/, $line); - - $val = int(hex($column[1])); # Unicode -# print "$column[0] -> $column[1] ($val)\n"; - $table{$val} = hex($column[0]); -# printf "%d => %x\n", $val, $table{$val}; - } - close(FP); - - foreach $key (sort {$a <=> $b} keys %table) { - printf " { 0x%04X, 0x%04X },\n", $key, $table{$key}; - } -} - Deleted: branches/4-stable/installer/hfs_conv.pl =================================================================== --- branches/4-stable/installer/hfs_conv.pl 2021-04-12 14:16:18 UTC (rev 9206) +++ branches/4-stable/installer/hfs_conv.pl 2021-04-12 22:53:32 UTC (rev 9207) @@ -1,83 +0,0 @@ -# -# Unicode\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ď\xB8\x8F\x87\x82Ƀ\\x81[\x83g\x82\xB7\x82\xE9 -# -# >perl hfs_conv.pl > hfs_plus.map - -$file = 'UNICODE DECOMPOSITION TABLE.htm'; - -print <<EOD; -typedef struct hfsplus_codemap { - unsigned short illegal_code; - unsigned short first_code; - unsigned short second_code; -} hfsplus_codemap_t; - -/* - * cf. http://developer.apple.com/technotes/tn/tn1150table.html - * - */ -static hfsplus_codemap_t mapHFSPlusUnicode[] = { -EOD - -&read_mapfile($file); - -print <<EOD; -}; -EOD - -exit(); - -sub read_mapfile { - my($file) = @_; - my(%table, $val, $key); - - $illegal = ''; - $first = ''; - $second = ''; - - open(FP, $file) || die "error"; - while ($line = <FP>) { -# print "$line\n"; - if ($line =~ m|<P>0x(....)</P>|) { - $illegal = hex($1); - } - if ($line =~ m|<P>0x(....) 0x(....)</P>|) { - $first = hex($1); - $second = hex($2); - - push(@key, "$illegal,$first,$second"); - - # printf " {0x%04X, 0x%04X, 0x%04X},\n", $illegal, $first, $second; - } - } - close(FP); - - # $first\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ă\\x81[\x83g\x82\xB7\x82\xE9 - $k = @key - 1; - while ($k >= 0) { - $j = -1; - for ($i = 1; $i <= $k; $i++) { - @m = split(/,/, $key[$i-1]); - @n = split(/,/, $key[$i]); - if ($m[1] > $n[1]) { - $j = $i - 1; - $t = $key[$j]; - $key[$j] = $key[$i]; - $key[$i] = $t; - } - } - $k = $j; - } - - foreach $s (@key) { - @n = split(/,/, $s); - printf " {0x%04X, 0x%04X, 0x%04X},\n", $n[0], $n[1], $n[2]; - } - -# foreach $key (sort {$a <=> $b} keys %table) { -# @n = split(/,/, $table{$key}); -# printf " {0x%04X, 0x%04X, 0x%04X},\n", $n[0], $key, $n[1]; -# } - -} - Deleted: branches/4-stable/installer/rev_conv.pl =================================================================== --- branches/4-stable/installer/rev_conv.pl 2021-04-12 14:16:18 UTC (rev 9206) +++ branches/4-stable/installer/rev_conv.pl 2021-04-12 22:53:32 UTC (rev 9207) @@ -1,34 +0,0 @@ - -# -# Shift_JIS\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ď\xB8\x8F\x87\x82ɏo\x97͂\xB7\x82\xE9 -# - -$file = 'SHIFTJIS_TXT.htm'; - -&read_mapfile($file); -exit(); - -sub read_mapfile { - my($file) = @_; - my(%table, $val, $key); - - open(FP, $file) || die "error"; - while ($line = <FP>) { - if ($line =~ /^\#/) {next;} - if ($line =~ m+^\/+) {next;} - if ($line =~ m+^\<+) {next;} - $line =~ s/^\s+//; - @column = split(/\s+/, $line); - - $val = int(hex($column[0])); # Unicode -# print "$column[0] -> $column[1] ($val)\n"; - $table{$val} = hex($column[1]); -# printf "%d => %x\n", $val, $table{$val}; - } - close(FP); - - foreach $key (sort {$a <=> $b} keys %table) { - printf " { 0x%04X, 0x%04X },\n", $key, $table{$key}; - } -} - Added: branches/4-stable/teraterm/teraterm/unicode/conv_combining.md =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/conv_combining.md (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/conv_combining.md 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,28 @@ +# conv_combining.pl について + +- hfs_plus.map (現在は uni_combining.map) を出力するために作成された +- conv_combining.pl の前は installer/conv_hfs.pl だった +- uni_combining.map は直接人手で修正している + - https://ja.osdn.net/cvs/view/ttssh2/teraterm/source/teraterm/hfs_plus.map?hideattic=0&view=log + - https://ja.osdn.net/projects/ttssh2/svn/view/trunk/teraterm/teraterm/uni_combining.map?root=ttssh2&view=log + - r6514 にて hfs_plus.map -> uni_combining.map +- 現在はこのスクリプトの出力をそのまま使用していない + +## 入力ファイル + +- 現在の入手先 + - https://developer.apple.com/library/archive/technotes/tn/tn1150table.html + +- 以前 + - スクリプト作成当時 + - UNICODE DECOMPOSITION TABLE.htm + - 移動 + - http://developer.apple.com/technotes/tn/tn1150table.html + +## 使い方 + +``` +wget https://developer.apple.com/library/archive/technotes/tn/tn1150table.html +mv tn1150table.html "UNICODE DECOMPOSITION TABLE.htm" +perl conv_combining.pl > uni_combining.map +``` Copied: branches/4-stable/teraterm/teraterm/unicode/conv_combining.pl (from rev 9204, branches/4-stable/installer/hfs_conv.pl) =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/conv_combining.pl (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/conv_combining.pl 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,83 @@ +# +# Unicode\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ď\xB8\x8F\x87\x82Ƀ\\x81[\x83g\x82\xB7\x82\xE9 +# +# >perl hfs_conv.pl > hfs_plus.map + +$file = 'UNICODE DECOMPOSITION TABLE.htm'; + +print <<EOD; +typedef struct hfsplus_codemap { + unsigned short illegal_code; + unsigned short first_code; + unsigned short second_code; +} hfsplus_codemap_t; + +/* + * cf. http://developer.apple.com/technotes/tn/tn1150table.html + * + */ +static hfsplus_codemap_t mapHFSPlusUnicode[] = { +EOD + +&read_mapfile($file); + +print <<EOD; +}; +EOD + +exit(); + +sub read_mapfile { + my($file) = @_; + my(%table, $val, $key); + + $illegal = ''; + $first = ''; + $second = ''; + + open(FP, $file) || die "error"; + while ($line = <FP>) { +# print "$line\n"; + if ($line =~ m|<P>0x(....)</P>|) { + $illegal = hex($1); + } + if ($line =~ m|<P>0x(....) 0x(....)</P>|) { + $first = hex($1); + $second = hex($2); + + push(@key, "$illegal,$first,$second"); + + # printf " {0x%04X, 0x%04X, 0x%04X},\n", $illegal, $first, $second; + } + } + close(FP); + + # $first\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ă\\x81[\x83g\x82\xB7\x82\xE9 + $k = @key - 1; + while ($k >= 0) { + $j = -1; + for ($i = 1; $i <= $k; $i++) { + @m = split(/,/, $key[$i-1]); + @n = split(/,/, $key[$i]); + if ($m[1] > $n[1]) { + $j = $i - 1; + $t = $key[$j]; + $key[$j] = $key[$i]; + $key[$i] = $t; + } + } + $k = $j; + } + + foreach $s (@key) { + @n = split(/,/, $s); + printf " {0x%04X, 0x%04X, 0x%04X},\n", $n[0], $n[1], $n[2]; + } + +# foreach $key (sort {$a <=> $b} keys %table) { +# @n = split(/,/, $table{$key}); +# printf " {0x%04X, 0x%04X, 0x%04X},\n", $n[0], $key, $n[1]; +# } + +} + Added: branches/4-stable/teraterm/teraterm/unicode/conv_sjis.md =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/conv_sjis.md (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/conv_sjis.md 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,22 @@ +# conv_sjis.pl について + +- uni2sjis.map を作るために作成された +- conv_sjis.pl の前は installer/conv.pl だった +- uni2sjis.map は直接人手で修正している + - https://ja.osdn.net/cvs/view/ttssh2/teraterm/source/teraterm/uni2sjis.map?hideattic=0&view=log + - https://ja.osdn.net/projects/ttssh2/svn/view/trunk/teraterm/teraterm/uni2sjis.map?root=ttssh2&view=log +- 現在はこのスクリプトの出力をそのまま使用していない + +## 入力ファイル + +- 最新はこれか? + - ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT +- 作成当時 + - SHIFTJIS_TXT.htm + +## 使い方 + +``` +wget ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT +perl conv_sjis.pl > uni2sjis.map +``` Copied: branches/4-stable/teraterm/teraterm/unicode/conv_sjis.pl (from rev 9204, branches/4-stable/installer/conv.pl) =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/conv_sjis.pl (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/conv_sjis.pl 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,34 @@ + +# +# Unicode\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ď\xB8\x8F\x87\x82Ƀ\\x81[\x83g\x82\xB7\x82\xE9 +# + +$file = 'SHIFTJIS_TXT.htm'; + +&read_mapfile($file); +exit(); + +sub read_mapfile { + my($file) = @_; + my(%table, $val, $key); + + open(FP, $file) || die "error"; + while ($line = <FP>) { + if ($line =~ /^\#/) {next;} + if ($line =~ m+^\/+) {next;} + if ($line =~ m+^\<+) {next;} + $line =~ s/^\s+//; + @column = split(/\s+/, $line); + + $val = int(hex($column[1])); # Unicode +# print "$column[0] -> $column[1] ($val)\n"; + $table{$val} = hex($column[0]); +# printf "%d => %x\n", $val, $table{$val}; + } + close(FP); + + foreach $key (sort {$a <=> $b} keys %table) { + printf " { 0x%04X, 0x%04X },\n", $key, $table{$key}; + } +} + Added: branches/4-stable/teraterm/teraterm/unicode/conv_uni.md =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/conv_uni.md (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/conv_uni.md 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,19 @@ +# conv_uni.pl について + +- sjis2uni.map を作るために作成されたと思われる +- conv_uni.pl の前は installer/rev_conv.pl だった +- sjis2uni.map はソースツリーに存在しないし、使用されていない + +## 入力ファイル + +- 最新はこれか? + - ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT +- 作成当時 + - SHIFTJIS_TXT.htm + +## 使い方 + +``` +wget ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT +perl conv_uni.pl > sjis2uni.map +``` Copied: branches/4-stable/teraterm/teraterm/unicode/conv_uni.pl (from rev 9204, branches/4-stable/installer/rev_conv.pl) =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/conv_uni.pl (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/conv_uni.pl 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,34 @@ + +# +# Shift_JIS\x82\xF0\x83L\x81[\x82Ƃ\xB5\x82ď\xB8\x8F\x87\x82ɏo\x97͂\xB7\x82\xE9 +# + +$file = 'SHIFTJIS_TXT.htm'; + +&read_mapfile($file); +exit(); + +sub read_mapfile { + my($file) = @_; + my(%table, $val, $key); + + open(FP, $file) || die "error"; + while ($line = <FP>) { + if ($line =~ /^\#/) {next;} + if ($line =~ m+^\/+) {next;} + if ($line =~ m+^\<+) {next;} + $line =~ s/^\s+//; + @column = split(/\s+/, $line); + + $val = int(hex($column[0])); # Unicode +# print "$column[0] -> $column[1] ($val)\n"; + $table{$val} = hex($column[1]); +# printf "%d => %x\n", $val, $table{$val}; + } + close(FP); + + foreach $key (sort {$a <=> $b} keys %table) { + printf " { 0x%04X, 0x%04X },\n", $key, $table{$key}; + } +} + Added: branches/4-stable/teraterm/teraterm/unicode/readme.md =================================================================== --- branches/4-stable/teraterm/teraterm/unicode/readme.md (rev 0) +++ branches/4-stable/teraterm/teraterm/unicode/readme.md 2021-04-12 22:53:32 UTC (rev 9207) @@ -0,0 +1,29 @@ +# 文字コード関連のマップファイル,スクリプトファイルについて + +## [uni_combining.map](../uni_combining.map) + +- NFDで分解されている文字を結合するために使用 +``` + // WideCharToMultiByte() では結合処理は行われない + // 自力で結合処理を行う。ただし、最初の2文字だけ + // 例1: + // U+307B(ほ) + U+309A(゜) は + // Shift jis の 0x82d9(ほ) と 0x814b(゜) に変換され + // 0x82db(ぽ) には変換されない + // 予め U+307D(ぽ)に正規化しておく +``` +- [conv_combining.md](conv_combining.md) + +## [uni2sjis.map](../uni2sjis.map) + +- UnicodeからShift JISに変換するために使用 +- [conv_sjis.md](conv_sjis.md) + +## sjis2uni.map はソースツリーに存在しない + +- Shift JISからUnicodeからに変換できると思われるが、使用されていない +- [conv_uni.md](conv_uni.md) + +## [unisym2decsp.map](../unisym2decsp.map) + +- UnicodeからDEC special文字コードに変換するために使用