Hiroyuki Komatsu
komat****@users*****
2004年 12月 17日 (金) 23:11:32 JST
Index: prime/lib/session.rb diff -u prime/lib/session.rb:1.4.4.11 prime/lib/session.rb:1.4.4.12 --- prime/lib/session.rb:1.4.4.11 Tue Dec 14 17:16:31 2004 +++ prime/lib/session.rb Fri Dec 17 23:11:32 2004 @@ -1,5 +1,5 @@ # session.rb -# $Id: session.rb,v 1.4.4.11 2004/12/14 08:16:31 komatsu Exp $ +# $Id: session.rb,v 1.4.4.12 2004/12/17 14:11:32 komatsu Exp $ # # Copyright (C) 2001 Satoru Takabayashi <sator****@namaz*****> # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> @@ -423,8 +423,8 @@ ## Conversion methods ## def conv_convert (session) - candidates =****@prime*****_convert(session) - return reply_successful(candidates.to_text) + conversions =****@prime*****_convert(session) + return reply_successful( conversions.to_text() ) end def conv_predict (session, method = nil) Index: prime/lib/prime2.rb diff -u prime/lib/prime2.rb:1.1.2.1 prime/lib/prime2.rb:1.1.2.2 --- prime/lib/prime2.rb:1.1.2.1 Tue Dec 14 17:16:31 2004 +++ prime/lib/prime2.rb Fri Dec 17 23:11:32 2004 @@ -1,5 +1,5 @@ # prime2.rb: Module for PRIME2 protocol. -# $Id: prime2.rb,v 1.1.2.1 2004/12/14 08:16:31 komatsu Exp $ +# $Id: prime2.rb,v 1.1.2.2 2004/12/17 14:11:32 komatsu Exp $ # # Copyright (C) 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -141,7 +141,7 @@ words_compact = convert_compact(composer, context) words_overall = convert_overall(composer, context) -# words_japanese = convert_japanese(composer, context) + words_japanese = convert_japanese(composer, context) results_compact = PrimeWordList::merge_with_label(@context, words_compact) results_conversion = PrimeWordList::merge_with_label(@context, @@ -150,8 +150,25 @@ candidates = PrimeWordList::concat(results_compact | results_conversion) end - session_set_candidates(session, candidates) - return candidates + conversions = _adhoc_wordlist_to_conversionlist(query_string, candidates) + + session_set_conversions(session, conversions) + return conversions +# session_set_candidates(session, candidates) +# return candidates + end + + def _adhoc_wordlist_to_conversionlist (reading, wordlist) + conversion_list = [] + wordlist.length.times { | index | + + segment = PrimeSegment.new(reading) + segment.set_candidates(wordlist, index) + score = wordlist[index].score + conversion = PrimeConversion.new( [segment], score ) + conversion_list.push(conversion) + } + return PrimeConversionList.new(conversion_list) end def conv_predict (session, method = nil) @@ -168,6 +185,8 @@ candidates = PrimeWordList::merge_with_label(@context, words_compact) end + conversion = PrimeConversion(candidates) + session_set_candidates(session, candidates) return candidates end @@ -204,6 +223,13 @@ @sessions[session].set_candidates(wordlist) end + def session_set_conversions (session, conversions) + @sessions[session].set_conversions(conversions) + end + def session_get_conversions (session) + return @sessions[session].get_conversions() + end + def session_set_selection (session, index) @sessions[session].set_selection(index) end @@ -262,6 +288,13 @@ # words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] words_compact = PrimeWordList::merge(words_prefix)[0,1] + ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the + ## value of word is [], and the class of the result of word[0,1] + ## becomes Array which is a super class of PrimewordList. + if words_compact.empty? then + words_compact = PrimeWordList.new() + end + if words_compact.length > 0 then predict_with_multi_clauses!(words_compact) @@ -272,12 +305,98 @@ return words_compact end + def convert_japanese_process_segments_list (composer) + string = composer.edit_get_surface_string() + (depth, segments_list) = guess_clauses_internal(string) + + conversions = [] + min_length = depth + + segments_list.each { | segments | + if segments.length < min_length then + conversions = [] + min_length = segments.length + elsif segments.length == min_length then + prime_segments = [] + segments.each { | (reading, pos, adjunct, pos_adjunct, engines) | + prime_segment = PrimeSegment.new(reading, pos, adjunct, pos_adjunct) + prime_segments.push(prime_segment) + } + conversions.push(prime_segments) + end + } + return conversions + end + + def convert_japanese_get_score (segments) + segment = segments[0] + pos = segment.pos + literal = segment.get_literal() + score = segment.get_score() + + segments[1..-1].each { | segment2 | + pos2 = segment2.pos + literal2 = segment2.get_literal() + score2 = segment2.get_score() + if pos2.nil? then + cost = 0.9 ** literal2.length + else + cost = _get_connection_cost(pos, literal, pos2, literal2) + end + connection_key = [pos, pos2].join("\t") + pos = (@pos_connection_pos[connection_key] or pos2) + + score = (Math::sqrt(score * score2) * cost).to_i + } + return score + end + + def convert_japanese (composer, context) + rest = nil + + segments_list = convert_japanese_process_segments_list(composer) + + conversions = PrimeConversionList.new() + + segments_list.each { | segments | + if segments.length == 1 then + segment = segments[0] + reading = segment.reading + query = PrimeQuery.new( [reading], segment.pos ) + words = search(query) + + words.length.times { | index | + new_segment = segment.dup() + new_segment.set_candidates(words, index) + score = words[index].score + conversion = PrimeConversion.new( [new_segment], score ) + conversions.push(conversion) + } + else + segments.each { | segment | + query = PrimeQuery.new( [segment.reading], segment.pos ) + words = search(query) + index = (segment.pos == nil) ? -1 : 0 + segment.set_candidates(words, index) + } + score = convert_japanese_get_score(segments) + conversions.push( PrimeConversion.new(segments, score) ) + end + } +# conversions = PrimeConversionList.new(conversions) + puts "----" + puts conversions.to_text_debug() + puts "----" + return conversions + end + class PrimeSession def initialize () - @composer = initialize_composer() - @candidates = PrimeWordList.new() - @target = "" - @context = "" + @composer = initialize_composer() + @candidates = PrimeWordList.new() + @conversions = [] + @target = "" + @context = "" end def initialize_composer () @@ -323,9 +442,18 @@ return @composer end + def set_conversions (conversions) + @conversions = conversions + end + def get_conversions () + return @conversions + end + + ## Obsolete def set_candidates (wordlist) @candidates = wordlist end + ## Obsolete def get_candidates () return @candidates end @@ -346,3 +474,110 @@ end end end + +class PrimeConversionList < Array + def to_text () + texts = self.map { | conversion | conversion.to_text() } + return texts.join("\n") + end + + def to_text_debug () + texts = self.sort { | conversion1, conversion2 | + conversion1.score <=> conversion2.score + }.map { | conversion | conversion.to_text_debug() } + return texts.join("\n") + end +end + +class PrimeConversion + attr_reader :score + def initialize (segments, score = 0, position = -1) + @segments = segments + @position = position + @score = score + end + + def to_text_debug () + if****@segme***** == 1 then + return "(#{score})\t" + @segments[0].to_text_data() # with annotations + else + texts =****@segme***** { | segment | segment.to_text() } + return "(#{score})\t" + texts.join("|") + end + end + + def to_text () + if****@segme***** == 1 then + return @segments[0].to_text_data() # with annotations + else + texts =****@segme***** { | segment | segment.to_text() } + return texts.join() + end + end +end + +class PrimeSegment + attr_reader :reading, :pos + def initialize (reading, pos = nil, adjunct = "", pos_adjunct = nil) + @reading = reading + @pos = pos + @adjunct = adjunct + @pos_adjunct = pos_adjunct + + @candidates = PrimeWordList.new() + @candidate_index = -1 + end + + def dup () + segment = PrimeSegment.new(@reading, @pos, @adjunct, @pos_adjunct) + segment.set_candidates(@candidates, @candidate_index) + return segment + end + + def set_candidates (candidates, candidate_index = -1) + @candidates = candidates + @candidate_index = candidate_index + end + def set_candidate_index (candidate_index) + @candidate_index = candidate_index + end + + def get_literal () + if @candidate_index == -1 then + return @reading + else + return @candidates[@candidate_index].to_text_literal() + end + end + def get_score () + if @candidate_index == -1 then + ## FIXME: Remove the music number. + ## FIXME: (2004-12-17) <Hiro> + return 8000 + else + return @candidates[@candidate_index].score + end + end + + def to_text_data () + if @candidate_index == -1 then + text = @reading + @adjunct + else + text = \ + [ @candidates[@candidate_index].to_text_literal() + @adjunct, + @candidates[@candidate_index].to_text_data() ].join("\t") + end + return text + end + + def to_text () + return ( get_literal() + @adjunct ) + end +end + +class PrimeConvertMultiSegments + ## This returns a PrimeConversionList + def PrimeConvertMultiSegments::convert (composer) + string = composer.edit_get_surface_string() + end +end Index: prime/lib/prime.rb diff -u prime/lib/prime.rb:1.7.4.9 prime/lib/prime.rb:1.7.4.10 --- prime/lib/prime.rb:1.7.4.9 Tue Dec 14 17:16:31 2004 +++ prime/lib/prime.rb Fri Dec 17 23:11:32 2004 @@ -1,5 +1,5 @@ # prime/prime.rb -# $Id: prime.rb,v 1.7.4.9 2004/12/14 08:16:31 komatsu Exp $ +# $Id: prime.rb,v 1.7.4.10 2004/12/17 14:11:32 komatsu Exp $ # # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -315,6 +315,13 @@ # words_japanese = search_japanese_prefix(string) words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] + ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the + ## value of word is [], and the class of the result of word[0,1] + ## becomes Array which is a super class of PrimewordList. + if words_compact.empty? then + words_compact = PrimeWordList.new() + end + if words_compact.length > 0 then predict_with_multi_clauses!(words_compact) @@ -448,6 +455,16 @@ ] return data_list.compact.join("\t") end + + ## This is for the PRIME2 protocol. + def to_text_data + data_list = [ + ( "form=#{@data['annotation']}" if****@data*****_key?('annotation') ), + ( "usage=#{@data['usage']}" if****@data*****_key?('usage') ), + ( "comment=#{@data['usage']}" if****@data*****_key?('comment') ), + ] + return data_list.compact.join("\t") + end end class PrimeWordList < Array