[Groonga-commit] groonga/groonga [master] Update TokenKytea for KyTea version 0.4.2.

Back to archive index

null+****@clear***** null+****@clear*****
2012年 5月 30日 (水) 10:02:53 JST


Susumu Yata	2012-05-30 10:02:53 +0900 (Wed, 30 May 2012)

  New Revision: 0a6b50d9e250f4829a18faa0f8061f290d6574df

  Log:
    Update TokenKytea for KyTea version 0.4.2.

  Modified files:
    plugins/tokenizers/kytea.cpp

  Modified: plugins/tokenizers/kytea.cpp (+4 -2)
===================================================================
--- plugins/tokenizers/kytea.cpp    2012-05-30 00:00:35 +0900 (58efb58)
+++ plugins/tokenizers/kytea.cpp    2012-05-30 10:02:53 +0900 (2e12f9b)
@@ -187,7 +187,9 @@ grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args,
   grn_plugin_mutex_lock(ctx, kytea_mutex);
   try {
     const std::string str(query->ptr, query->length);
-    tokenizer->sentence = kytea::KyteaSentence(kytea_util->mapString(str));
+    const kytea::KyteaString &surface_str = kytea_util->mapString(str);
+    const kytea::KyteaString &normalized_str = kytea_util->normalize(surface_str);
+    tokenizer->sentence = kytea::KyteaSentence(surface_str, normalized_str);
     kytea_tagger->calculateWS(tokenizer->sentence);
   } catch (...) {
     grn_plugin_mutex_unlock(ctx, kytea_mutex);
@@ -200,7 +202,7 @@ grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args,
   try {
     for (std::size_t i = 0; i < tokenizer->sentence.words.size(); ++i) {
       const std::string &token =
-          kytea_util->showString(tokenizer->sentence.words[i].surf);
+          kytea_util->showString(tokenizer->sentence.words[i].surface);
       const char *ptr = token.c_str();
       unsigned int left = static_cast<unsigned int>(token.length());
       while (left > 0) {




Groonga-commit メーリングリストの案内
Back to archive index