Kouhei Sutou
null+****@clear*****
Mon Nov 6 12:26:54 JST 2017
Kouhei Sutou 2017-11-06 12:26:54 +0900 (Mon, 06 Nov 2017) New Revision: 41dd9640026a330c6fb5216875f1aa9b10336953 https://github.com/groonga/groonga/commit/41dd9640026a330c6fb5216875f1aa9b10336953 Message: Add a tool to list symbol characters in UCD Added files: tools/ucd-symbol-list.rb Modified files: tools/Makefile.am Modified: tools/Makefile.am (+2 -1) =================================================================== --- tools/Makefile.am 2017-11-02 09:27:32 +0900 (6027eee18) +++ tools/Makefile.am 2017-11-06 12:26:54 +0900 (49ffaae3f) @@ -1,7 +1,8 @@ noinstall_ruby_scripts = \ groonga-memory-leak-checker.rb \ groonga-object-list-checker.rb \ - prepare-sphinx-html.rb + prepare-sphinx-html.rb \ + ucd-symbol-list.rb EXTRA_DIST = \ $(noinstall_ruby_scripts) Added: tools/ucd-symbol-list.rb (+77 -0) 100755 =================================================================== --- /dev/null +++ tools/ucd-symbol-list.rb 2017-11-06 12:26:54 +0900 (a95dfc561) @@ -0,0 +1,77 @@ +#!/usr/bin/env ruby + +base_dir = ARGV[0] + + �� targets = {} +def register(character_code, description) + @targets[character_code] = description +end + +property_aliases = {} +File.open("#{base_dir}/PropertyValueAliases.txt") do |file| + file.each_line do |line| + case line + when /\A[a-z]/i + target, abbrev, name, = line.chomp.split(/\s*;\s*/) + next if abbrev == "n/a" + property_aliases[abbrev] = name + end + end +end + +File.open("#{base_dir}/PropList.txt") do |file| + file.each_line do |line| + case line.chomp + when /\A([\da-f]{4,5})(?:\.\.([\da-f]{4,5})) +; .+? \# (.{2})/i + start = $1 + last = $2 + property_value_alias = $3 + property_value = property_aliases[property_value_alias] + property_value ||= property_value_alias + case property_value + when "Dash_Punctuation", + "Open_Punctuation", + "Close_Punctuation", + "Connector_Punctuation", + "Other_Punctuation", + "Math_Symbol", + "Currency_Symbol", + "Modifier_Symbol", + "Other_Symbol" + if last.nil? + register(start.to_i(16), property_value) + else + (start.to_i(16)..last.to_i(16)).each do |character_code| + register(character_code, property_value) + end + end + end + end + end +end + +File.open("#{base_dir}/Blocks.txt") do |file| + file.each_line do |line| + case line.chomp + when /\A([\da-f]{4,5})\.\.([\da-f]{4,5}); (.+)\z/i + start = $1 + last = $2 + description = $3 + case description + when "CJK Symbols and Punctuation", + "Enclosed CJK Letters and Months", + "CJK Compatibility", + "CJK Compatibility Forms" + (start.to_i(16)..last.to_i(16)).each do |character_code| + register(character_code, description) + end + end + end + end +end + + �� targets.keys.sort.each do |character_code| + description = @targets[character_code] + character = [character_code].pack("U") + puts("%#x: %s: %s" % [character_code, character, description]) +end -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20171106/ce7e0461/attachment-0001.htm