null+****@clear*****
null+****@clear*****
2012年 5月 30日 (水) 10:02:53 JST
Susumu Yata 2012-05-30 10:02:53 +0900 (Wed, 30 May 2012) New Revision: 0a6b50d9e250f4829a18faa0f8061f290d6574df Log: Update TokenKytea for KyTea version 0.4.2. Modified files: plugins/tokenizers/kytea.cpp Modified: plugins/tokenizers/kytea.cpp (+4 -2) =================================================================== --- plugins/tokenizers/kytea.cpp 2012-05-30 00:00:35 +0900 (58efb58) +++ plugins/tokenizers/kytea.cpp 2012-05-30 10:02:53 +0900 (2e12f9b) @@ -187,7 +187,9 @@ grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args, grn_plugin_mutex_lock(ctx, kytea_mutex); try { const std::string str(query->ptr, query->length); - tokenizer->sentence = kytea::KyteaSentence(kytea_util->mapString(str)); + const kytea::KyteaString &surface_str = kytea_util->mapString(str); + const kytea::KyteaString &normalized_str = kytea_util->normalize(surface_str); + tokenizer->sentence = kytea::KyteaSentence(surface_str, normalized_str); kytea_tagger->calculateWS(tokenizer->sentence); } catch (...) { grn_plugin_mutex_unlock(ctx, kytea_mutex); @@ -200,7 +202,7 @@ grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args, try { for (std::size_t i = 0; i < tokenizer->sentence.words.size(); ++i) { const std::string &token = - kytea_util->showString(tokenizer->sentence.words[i].surf); + kytea_util->showString(tokenizer->sentence.words[i].surface); const char *ptr = token.c_str(); unsigned int left = static_cast<unsigned int>(token.length()); while (left > 0) {