Ruby GTK3移行後のメインリポジトリ
Revisão | 27d4bd3b68e6c66eb0688832fec7254a17d6c339 (tree) |
---|---|
Hora | 2015-12-11 23:30:08 |
Autor | Shyouzou Sugitani <shy@user...> |
Commiter | Shyouzou Sugitani |
update misaka.rb
@@ -1,3 +1,6 @@ | ||
1 | +Fri December 11 2015 Shyouzou Sugitani <shy@users.osdn.me> | |
2 | + * 美坂互換モジュールにCharlockHolmesによる文字コードの自動判定を実装. | |
3 | + | |
1 | 4 | Sun December 6 2015 Shyouzou Sugitani <shy@users.osdn.me> |
2 | 5 | * Ruby移行以降の修正で漏れがあったもの対応. |
3 | 6 | (さくらスクリプト処理, 美坂互換モジュールの動作改善) |
@@ -41,7 +41,7 @@ module HTTPC | ||
41 | 41 | end |
42 | 42 | |
43 | 43 | def check_import |
44 | - if @__sakura != nil and chardet != nil | |
44 | + if @__sakura != nil | |
45 | 45 | return 1 |
46 | 46 | else |
47 | 47 | return 0 |
@@ -23,11 +23,11 @@ require "stringio" | ||
23 | 23 | require_relative "../home" |
24 | 24 | require_relative "../logging" |
25 | 25 | |
26 | -## FIXME | |
27 | -##try: | |
28 | -## import chardet.universaldetector | |
29 | -##except: | |
30 | -## chardet = nil | |
26 | +begin | |
27 | + require 'charlock_holmes' | |
28 | +rescue LoadError | |
29 | + CharlockHolmes = nil | |
30 | +end | |
31 | 31 | |
32 | 32 | module Misaka |
33 | 33 |
@@ -1265,41 +1265,35 @@ module Misaka | ||
1265 | 1265 | @misaka_error = error |
1266 | 1266 | global_variables = [] |
1267 | 1267 | global_constants = [] |
1268 | - ## FIXME | |
1269 | - ### charset auto-detection | |
1270 | - ##if chardet != nil | |
1271 | - ## detector = chardet.universaldetector.UniversalDetector() | |
1272 | - ## for filename in filelist | |
1273 | - ## path = File.join(@misaka_dir, filename) | |
1274 | - ## begin | |
1275 | - ## f = open(path, 'rb') | |
1276 | - ## rescue #except IOError: | |
1277 | - ## Logging::Logging.debug('cannot read ' + filename.to_s) | |
1278 | - ## next | |
1279 | - ## end | |
1280 | - ## ext = File.extname(filename) | |
1281 | - ## if ext == '.__1' | |
1282 | - ## f = io.StringIO(crypt(f.read())) | |
1283 | - ## end | |
1284 | - ## detector.reset() | |
1285 | - ## for line in f | |
1286 | - ## detector.feed(line) | |
1287 | - ## if detector.done | |
1288 | - ## break | |
1289 | - ## end | |
1290 | - ## end | |
1291 | - ## detector.close() | |
1292 | - ## f.close() | |
1293 | - ## if detector.result['confidence'] > 0.98 and \ | |
1294 | - ## detector.result['encoding'] != 'ascii' # XXX | |
1295 | - ## @charset = detector.result['encoding'] | |
1296 | - ## if @charset == 'SHIFT_JIS' | |
1297 | - ## @charset = 'CP932' # XXX | |
1298 | - ## end | |
1299 | - ## break | |
1300 | - ## end | |
1301 | - ## end | |
1302 | - ##end | |
1268 | + # charset auto-detection | |
1269 | + if CharlockHolmes != nil | |
1270 | + detector = CharlockHolmes::EncodingDetector.new | |
1271 | + for filename in filelist | |
1272 | + path = File.join(@misaka_dir, filename) | |
1273 | + begin | |
1274 | + f = open(path, 'rb') | |
1275 | + rescue #except IOError: | |
1276 | + Logging::Logging.debug('cannot read ' + filename.to_s) | |
1277 | + next | |
1278 | + end | |
1279 | + ext = File.extname(filename) | |
1280 | + if ext == '.__1' | |
1281 | + result = detector.detect(crypt(f.read())) | |
1282 | + else | |
1283 | + result = detector.detect(f.read()) | |
1284 | + end | |
1285 | + f.close() | |
1286 | + if result[:confidence] > 98 and \ | |
1287 | + result[:encoding] != 'ISO-8859-1' # XXX | |
1288 | + @charset = result[:encoding] | |
1289 | + if @charset == 'Shift_JIS' | |
1290 | + @charset = 'CP932' # XXX | |
1291 | + end | |
1292 | + print("CharlockHolmes:, ", @charset, "\n") | |
1293 | + break | |
1294 | + end | |
1295 | + end | |
1296 | + end | |
1303 | 1297 | for filename in filelist |
1304 | 1298 | path = File.join(@misaka_dir, filename) |
1305 | 1299 | basename = File.basename(filename, ".*") |