• R/O
  • SSH
  • HTTPS

zerochplus: Commit


Commit MetaInfo

Revisão538 (tree)
Hora2014-09-29 17:37:53
Autoriroiro

Mensagem de Log

triptest
・文字コードテスト

Mudança Sumário

Diff

--- triptest/trunk/testenc.pl (nonexistent)
+++ triptest/trunk/testenc.pl (revision 538)
@@ -0,0 +1,102 @@
1+use utf8;
2+use strict;
3+use Encode;
4+
5+sub main {
6+ # 31 32 33 34 : 1234 / 1234 / 1234
7+ test('31 32 33 34', '31 32 33 34'); # as ascii
8+ # E3 81 82 E3 81 84 : あい / 縺ゅ> / ?や?
9+ test('E3 81 82 E3 81 84', '82 A0 82 A2'); # from utf8 to sjis
10+ # EF BD B1 EF BD B2 : アイ / ?ア?イ / 鐔縁讐
11+ test('EF BD B1 EF BD B2', 'E8 5C 89 8F 8F 51'); # from eucjp to sjis
12+
13+ # E4 BF A1 E8 BB BD : 信軽 / 菫。霆ス / 篆∴蚕
14+ test('E4 BF A1 E8 BB BD', '90 4D 8C 79'); # from utf8 to sjis
15+ test('E8 8F AB EF BD A1 E9 9C 86 EF BD BD', 'E4 BF A1 E8 BB BD'); # from utf8 to sjis
16+ test('E7 AF 86 E2 88 B4 E8 9A 95', 'E2 BD 81 88 8E 5C'); # from utf8 to sjis
17+ # EB A7 A8 E3 B9 AD : 맨㹭 / ?ィ羯ュ / 襷?広
18+ test('EB A7 A8 E3 B9 AD', '3F A8 E3 B9 AD'); # as sjis
19+ # E5 AD A5 EB B0 B4 : 孥밴 / 蟄??エ / 絖ル梓
20+ test('E5 AD A5 EB B0 B4', 'E3 4C 83 8B 88 B2'); # from eucjp to sjis
21+ test('E7 B5 96 E3 83 AB E6 A2 93', 'E3 4C 83 8B 88 B2'); # from utf8 to sjis
22+ # E5 B3 B1 E3 A2 BE : 峱㢾 / 蟲ア罎セ / 絣宴⊂
23+ test('E5 B3 B1 E3 A2 BE', 'E3 52 89 83 81 BC'); # from eucjp to sjis
24+ test('E8 9F B2 EF BD B1 E7 BD 8E EF BD BE', 'E5 B3 B1 E3 A2 BE'); # from utf8 to sjis
25+ test('E7 B5 A3 E5 AE B4 E2 8A 82', 'E3 52 89 83 81 BC'); # from utf8 to sjis
26+ # E7 BC BA E5 BF B3 : 缺忳 / 郛コ蠢ウ / 膽阪審
27+ test('E7 BC BA E5 BF B3', 'E7 BC BA E5 BF B3'); # as utf8
28+ test('E9 83 9B EF BD BA E8 A0 A2 EF BD B3', 'E7 BC BA E5 BF B3'); # from utf8 to sjis
29+ test('E8 86 BD E9 98 AA E5 AF A9', 'E4 5B 8D E3 90 52'); # from utf8 to sjis
30+ # E6 AD A3 E8 B9 AE : 正蹮 / 豁」雹ョ / 罩h庚
31+ test('E6 AD A3 E8 B9 AE', 'E6 AD A3 E8 B9 AE'); # as utf8
32+ test('E8 B1 81 EF BD A3 E9 9B B9 EF BD AE', 'E6 AD A3 E8 B9 AE'); # from utf8 to sjis
33+ test('E7 BD A9 EF BD 88 E5 BA 9A', 'E3 AB 82 88 8D 4D'); # from utf8 to sjis
34+ # E6 A4 BB E4 BE AA : 椻侪 / 讀サ萓ェ / 罎私妾
35+ test('E6 A4 BB E4 BE AA', 'E6 A4 BB E4 BE AA'); # as utf8
36+ test('E8 AE 80 EF BD BB E8 90 93 EF BD AA', 'E6 A4 BB E4 BE AA'); # from utf8 to sjis
37+ test('E7 BD 8E E7 A7 81 E5 A6 BE', 'E3 A2 8E 84 8F A8'); # from utf8 to sjis
38+ # E5 AA BE E9 A1 B6 : 媾顶 / 蟐セ鬘カ / 紵冗ゞ
39+ test('E5 AA BE E9 A1 B6', 'E5 AA BE E9 A1 B6'); # as utf8
40+ test('E8 9F 90 EF BD BE E9 AC 98 EF BD B6', 'E5 AA BE E9 A1 B6'); # from utf8 to sjis
41+ test('E7 B4 B5 E5 86 97 E3 82 9E', 'E3 49 8F E7 81 55'); # from utf8 to sjis
42+ # E3 B6 BD EC B9 AA : 㶽칪 / 羝ス?ェ / 禧曙巧
43+ test('E3 B6 BD EC B9 AA', 'E2 55 8F 8C 8D 49'); # from eucjp to sjis
44+ test('E7 A6 A7 E6 9B 99 E5 B7 A7', 'E2 55 8F 8C 8D 49'); # from utf8 to sjis
45+ # EC B9 A8 E5 A4 BB : 침夻 / ?ィ螟サ / 豺?せ
46+ test('EC B9 A8 E5 A4 BB', '3F A8 E5 A4 BB'); # as sjis
47+ # E7 B4 A6 E5 BC A9 : 紦弩 / 邏ヲ蠑ゥ / 膣?而
48+ test('E7 B4 A6 E5 BC A9', 'E7 B4 A6 E5 BC A9'); # as utf8
49+ test('E9 82 8F EF BD A6 E8 A0 91 EF BD A9', 'E7 B4 A6 E5 BC A9'); # from utf8 to sjis
50+
51+ return 0;
52+}
53+
54+sub test {
55+ my ($str, $expect) = @_;
56+ $str =~ s/([0-9A-F]{2}) ?/chr hex $1/egi;
57+ $expect =~ s/([0-9A-F]{2}) ?/chr hex $1/egi;
58+ print 'string: '.str2hex($str)."\n";
59+ my $result = enc($str);
60+ if ($result ne $expect) {
61+ print ' result: '.str2hex($result)."\n";
62+ print ' expect: '.str2hex($expect)."\n";
63+ return 0;
64+ }
65+ return 1;
66+}
67+
68+sub str2hex {
69+ my ($str) = @_;
70+ $str =~ s/(.)/sprintf('%02X ', ord($1))/ge;
71+ $str =~ s/ $//;
72+ return $str;
73+}
74+
75+sub enc {
76+ my ($utf8) = @_;
77+
78+ my $str = decode('utf8', $utf8);
79+
80+ my $utf8sjis = 0;
81+ my $sjissjis = 0;
82+ my $eucjpsjis = 0;
83+
84+ encode('sjis', $str, sub { $utf8sjis++; '' });
85+ decode('sjis', $utf8, sub { $sjissjis++; '' });
86+ decode('eucjp', $utf8, sub { $eucjpsjis++; '' });
87+
88+ if ($utf8sjis <= $sjissjis && $utf8sjis <= $eucjpsjis) {
89+ return encode('sjis', $str);
90+ }
91+ if ($sjissjis <= $utf8sjis && $sjissjis <= $eucjpsjis) {
92+ return encode('sjis', decode('sjis', $utf8));
93+ }
94+ if ($eucjpsjis <= $utf8sjis && $eucjpsjis <= $sjissjis) {
95+ return encode('sjis', decode('eucjp', $utf8));
96+ }
97+
98+ # unreachable
99+ return $utf8;
100+}
101+
102+exit(main()) if (!defined caller);
--- triptest/trunk/test.pl (revision 537)
+++ triptest/trunk/test.pl (revision 538)
@@ -1,10 +1,10 @@
11 use utf8;
22 use strict;
3+use Encode;
34
45 sub main {
56 require 'trip.cgi';
67 binmode(STDOUT);
7- binmode(STDOUT, ':encoding(cp932)');
88
99 test('',
1010 'net' => 'jPpg5.obl6',
@@ -538,18 +538,107 @@
538538 'bban' => 'AOGu5v68Us', # ##8300000000000000..
539539 );
540540
541- test('孥밴',
542- 'atchs' => 'M2oL7qG6dM', ##e34c838b88b20000L.
541+ # E4 BF A1 E8 BB BD : 信軽 / 菫。霆ス / 篆∴蚕
542+ test('信軽', # E4 BF A1 E8 BB BD => 90 4D 8C 79 (from utf8 to sjis)
543+ 'atchs' => '7kFWX/qQYo', # ##904d8c7900000000M.
543544 );
544- test('絖ル梓',
545- 'atchs' => 'M2oL7qG6dM', ##e34c838b88b20000L.
545+ test('菫。霆ス', # E8 8F AB EF BD A1 E9 9C 86 EF BD BD => E4 BF A1 E8 BB BD (from utf8 to sjis)
546+ 'atchs' => 'APAvffTKNY', # ##e4bfa1e8bbbd0000..
546547 );
547- test("\t孥밴",
548- 'atchs' => 'IA.fJI5mHQ', ##09e34c838b88b200.L
548+ test('篆∴蚕', # E7 AF 86 E2 88 B4 E8 9A 95 => E2 BD 81 88 8E 5C (from utf8 to sjis)
549+ 'atchs' => 'xV10Cw674M', # ##e2bd81888e5c0000..
549550 );
550- test('맨㹭',
551- 'atchs' => 'l9OGNUEGNg', ##3f2863392d000000..
551+ # EB A7 A8 E3 B9 AD : 맨㹭 / ?ィ羯ュ / 襷?広
552+ test('맨㹭', # EB A7 A8 E3 B9 AD => 3F A8 E3 B9 AD (as sjis)
553+ 'atchs' => 'l9OGNUEGNg', # ##3fa8e3b9ad000000.. #?ィ羯ュ
552554 );
555+ # E5 AD A5 EB B0 B4 : 孥밴 / 蟄??エ / 絖ル梓
556+ test('孥밴', # E5 AD A5 EB B0 B4 => E3 4C 83 8B 88 B2 (from eucjp to sjis)
557+ 'atchs' => 'M2oL7qG6dM', # ##e34c838b88b20000L.
558+ );
559+ test('絖ル梓', # E7 B5 96 E3 83 AB E6 A2 93 => E3 4C 83 8B 88 B2 (from utf8 to sjis)
560+ 'atchs' => 'M2oL7qG6dM', # ##e34c838b88b20000L.
561+ );
562+ # E5 B3 B1 E3 A2 BE : 峱㢾 / 蟲ア罎セ / 絣宴⊂
563+ test('峱㢾', # E5 B3 B1 E3 A2 BE => E3 52 89 83 81 BC (from eucjp to sjis)
564+ 'atchs' => 'V3HdQo70SA', # ##e352898381bc0000R.
565+ );
566+ test('蟲ア罎セ', # E8 9F B2 EF BD B1 E7 BD 8E EF BD BE => E5 B3 B1 E3 A2 BE (from utf8 to sjis)
567+ 'atchs' => 'AABymBxNrU', # ##e5b3b1e3a2be0000..
568+ );
569+ test('絣宴⊂', # E7 B5 A3 E5 AE B4 E2 8A 82 => E3 52 89 83 81 BC (from utf8 to sjis)
570+ 'atchs' => 'V3HdQo70SA', # ##e352898381bc0000R.
571+ );
572+ # E7 BC BA E5 BF B3 : 缺忳 / 郛コ蠢ウ / 膽阪審
573+ test('缺忳', # E7 BC BA E5 BF B3 (as utf8)
574+ 'atchs' => 'WvHcc0tL/g', # ##e7bcbae5bfb30000..
575+ );
576+ test('郛コ蠢ウ', # E9 83 9B EF BD BA E8 A0 A2 EF BD B3 => E7 BC BA E5 BF B3 (from utf8 to sjis)
577+ 'atchs' => 'WvHcc0tL/g', # ##e7bcbae5bfb30000..
578+ );
579+ test('膽阪審', # E8 86 BD E9 98 AA E5 AF A9 => E4 5B 8D E3 90 52 (from utf8 to sjis)
580+ 'atchs' => 'nXqoPlEsW.', # ##e45b8de390520000a.
581+ );
582+ # E6 AD A3 E8 B9 AE : 正蹮 / 豁」雹ョ / 罩h庚
583+ test('正蹮', # E6 AD A3 E8 B9 AE (as utf8)
584+ 'atchs' => 'sJaNondzgA', # ##e6ada3e8b9ae0000..
585+ );
586+ test('豁」雹ョ', # E8 B1 81 EF BD A3 E9 9B B9 EF BD AE => E6 AD A3 E8 B9 AE (from utf8 to sjis)
587+ 'atchs' => 'sJaNondzgA', # ##e6ada3e8b9ae0000..
588+ );
589+ test('罩h庚', # E7 BD A9 EF BD 88 E5 BA 9A => E3 AB 82 88 8D 4D (from utf8 to sjis)
590+ 'atchs' => 'JF6srNhdqE', # ##e3ab82888d4d0000..
591+ );
592+ # E6 A4 BB E4 BE AA : 椻侪 / 讀サ萓ェ / 罎私妾
593+ test('椻侪', # E6 A4 BB E4 BE AA (as utf8)
594+ 'atchs' => '/ZlVOBaCDY', # ##e6a4bbe4beaa0000..
595+ );
596+ test('讀サ萓ェ', # E8 AE 80 EF BD BB E8 90 93 EF BD AA => E6 A4 BB E4 BE AA (from utf8 to sjis)
597+ 'atchs' => '/ZlVOBaCDY', # ##e6a4bbe4beaa0000..
598+ );
599+ test('罎私妾', # E7 BD 8E E7 A7 81 E5 A6 BE => E3 A2 8E 84 8F A8 (from utf8 to sjis)
600+ 'atchs' => 'by3Lhxz1H6', # ##e3a28e848fa80000..
601+ );
602+ # E5 AA BE E9 A1 B6 : 媾顶 / 蟐セ鬘カ / 紵冗ゞ
603+ test('媾顶', # E5 AA BE E9 A1 B6 (as utf8)
604+ 'atchs' => '4o.Bpl1ORc', # ##e5aabee9a1b60000..
605+ );
606+ test('蟐セ鬘カ', # E8 9F 90 EF BD BE E9 AC 98 EF BD B6 => E5 AA BE E9 A1 B6 (from utf8 to sjis)
607+ 'atchs' => '4o.Bpl1ORc', # ##e5aabee9a1b60000..
608+ );
609+ test('紵冗ゞ', # E7 B4 B5 E5 86 97 E3 82 9E => E3 49 8F E7 81 55 (from utf8 to sjis)
610+ 'atchs' => 'p.CsKTRhlE', # ##e3498fe781550000I.
611+ );
612+ # E3 B6 BD EC B9 AA : 㶽칪 / 羝ス?ェ / 禧曙巧
613+ test('㶽칪', # E3 B6 BD EC B9 AA => E2 55 8F 8C 8D 49 (from eucjp to sjis)
614+ 'atchs' => 'KLCGM8uxXI', # ##e2558f8c8d490000U.
615+ );
616+ test('禧曙巧', # E7 A6 A7 E6 9B 99 E5 B7 A7 => E2 55 8F 8C 8D 49 (from utf8 to sjis)
617+ 'atchs' => 'KLCGM8uxXI', # ##e2558f8c8d490000U.
618+ );
619+ # EC B9 A8 E5 A4 BB : 침夻 / ?ィ螟サ / 豺?せ
620+ test('침夻', # EC B9 A8 E5 A4 BB => 3F A8 E5 A4 BB (as sjis)
621+ 'atchs' => 'Hyot5W8Vhw', # ##3fa8e5a4bb000000.. #?ィ螟サ
622+ );
623+ # E7 B4 A6 E5 BC A9 : 紦弩 / 邏ヲ蠑ゥ / 膣?而
624+ test('紦弩', # E7 B4 A6 E5 BC A9 (as utf8)
625+ 'atchs' => '53rES.iYkg', # ##e7b4a6e5bca90000..
626+ );
627+ test('邏ヲ蠑ゥ', # E9 82 8F EF BD A6 E8 A0 91 EF BD A9 => E7 B4 A6 E5 BC A9 (from utf8 to sjis)
628+ 'atchs' => '53rES.iYkg', # ##e7b4a6e5bca90000..
629+ );
630+# #
631+# test('', #
632+# 'atchs' => '', #
633+# );
634+# test('', #
635+# 'atchs' => '', #
636+# );
637+# test('', #
638+# 'atchs' => '', #
639+# );
640+ print "end.\n";
641+ return 0;
553642 }
554643
555644 sub test {
@@ -565,15 +654,15 @@
565654 $diff = 1;
566655 }
567656 if ($diff) {
568- print "key #$key\n";
657+ print encode('cp932', "key #$key\n");
569658 print " mode $mode\n";
570659 if (defined $result) {
571- print " result $result\n";
660+ print encode('cp932', " result $result\n");
572661 } else {
573662 print " result [no trip]\n";
574663 }
575664 if (defined $expects{$mode}) {
576- print " expect $expects{$mode}\n";
665+ print encode('cp932', " expect $expects{$mode}\n");
577666 } else {
578667 print " expect [no trip]\n";
579668 }
Show on old repository browser