[Groonga-commit] groonga/groonga [master] Revert "[normalizer][nfkc] NormalizerUTF8NFKC -> NormalizerNFKC51."

Back to archive index

null+****@clear***** null+****@clear*****
2012年 2月 14日 (火) 13:47:45 JST


Kouhei Sutou	2012-02-14 13:47:45 +0900 (Tue, 14 Feb 2012)

  New Revision: 046eb60ae9aba8734799aaa19447b36ec88960d6

  Log:
    Revert "[normalizer][nfkc] NormalizerUTF8NFKC -> NormalizerNFKC51."
    
    This reverts commit 730d953118097be6b05bffc286bce19a2df50685.

  Modified files:
    include/groonga.h
    lib/normalizer.c
    plugins/normalizers/Makefile.am
    plugins/normalizers/nfkc.c
    test/benchmark/bench-normalize.c
    test/unit/core/test-command-dump.c
  Renamed files:
    plugins/normalizers/nfkc-core.c
      (from plugins/normalizers/nfkc-unicode-5.1.c)
    plugins/normalizers/nfkc.h
      (from plugins/normalizers/nfkc-unicode-5.1.h)

  Modified: include/groonga.h (+1 -1)
===================================================================
--- include/groonga.h    2012-02-13 21:15:49 +0900 (750e37e)
+++ include/groonga.h    2012-02-14 13:47:45 +0900 (17a0b28)
@@ -593,11 +593,11 @@ typedef enum {
 
 typedef enum {
   GRN_DB_NORMALIZER_ASCII = 96,
+  GRN_DB_NORMALIZER_UTF8_NFKC,       /* Normalization Form KC */
   GRN_DB_NORMALIZER_EUC_JP,
   GRN_DB_NORMALIZER_SJIS,
   GRN_DB_NORMALIZER_LATIN1,
   GRN_DB_NORMALIZER_KOI8R,
-  GRN_DB_NORMALIZER_NFKC51,          /* Normalization Form KC for Unicode 5.1 */
   GRN_DB_NORMALIZER_UTF8_UCA         /* Unicode Collation Algorithm */
 } grn_builtin_normalizer;
 

  Modified: lib/normalizer.c (+9 -9)
===================================================================
--- lib/normalizer.c    2012-02-13 21:15:49 +0900 (dbd140a)
+++ lib/normalizer.c    2012-02-14 13:47:45 +0900 (3462498)
@@ -32,7 +32,7 @@ grn_normalizer_find(grn_ctx *ctx, grn_encoding encoding)
     break;
   case GRN_ENC_UTF8 :
 #ifdef WITH_NFKC
-    normalizer_id = GRN_DB_NORMALIZER_NFKC51;
+    normalizer_id = GRN_DB_NORMALIZER_UTF8_NFKC;
 #else /* WITH_NFKC */
     normalizer_id = GRN_DB_NORMALIZER_ASCII;
 #endif /* WITH_NFKC */
@@ -1123,6 +1123,14 @@ grn_db_init_builtin_normalizers(grn_ctx *ctx)
   if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_ASCII) {
     return GRN_FILE_CORRUPT;
   }
+#ifdef WITH_NFKC
+  if (grn_plugin_register(ctx, "normalizers/nfkc")) {
+    ERRCLR(ctx);
+#endif
+    grn_obj_register(ctx, grn_ctx_db(ctx), "NormalizerUTF8NFKC", 18);
+#ifdef WITH_NFKC
+  }
+#endif
   obj = DEF_NORMALIZERIZER("NormalizerEUCJP", eucjp_normalize);
   if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_EUC_JP) {
     return GRN_FILE_CORRUPT;
@@ -1139,14 +1147,6 @@ grn_db_init_builtin_normalizers(grn_ctx *ctx)
   if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_KOI8R) {
     return GRN_FILE_CORRUPT;
   }
-#ifdef WITH_NFKC
-  if (grn_plugin_register(ctx, "normalizers/nfkc")) {
-    ERRCLR(ctx);
-#endif
-    grn_obj_register(ctx, grn_ctx_db(ctx), "NormalizerNFKC51", 16);
-#ifdef WITH_NFKC
-  }
-#endif
   /* obj = DEF_NORMALIZERIZER("NormalizerUTF8UCA", utf8_uca_normalize); */
   /* if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_UTF8_UCA) { */
   /*   return GRN_FILE_CORRUPT; */

  Modified: plugins/normalizers/Makefile.am (+2 -5)
===================================================================
--- plugins/normalizers/Makefile.am    2012-02-13 21:15:49 +0900 (9113882)
+++ plugins/normalizers/Makefile.am    2012-02-14 13:47:45 +0900 (7652acc)
@@ -20,8 +20,5 @@ if WITH_NFKC
 normalizer_plugins_LTLIBRARIES += nfkc.la
 endif
 
-nfkc_la_SOURCES =				\
-	nfkc.c					\
-	nfkc-unicode-5.1.c
-noinst_HEADERS =				\
-	nfkc-unicode-5.1.h
+nfkc_la_SOURCES = nfkc.c nfkc-core.c
+noinst_HEADERS = nfkc.h

  Renamed: plugins/normalizers/nfkc-core.c (+5 -5) 99%
===================================================================
--- plugins/normalizers/nfkc-unicode-5.1.c    2012-02-13 21:15:49 +0900 (3d08aae)
+++ plugins/normalizers/nfkc-core.c    2012-02-14 13:47:45 +0900 (18092d9)
@@ -1,5 +1,5 @@
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2010-2012 Brazil
+/* Copyright(C) 2010 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -17,10 +17,10 @@
 don't edit this file by hand. it generated automatically by nfkc.rb
 */
 
-#include "nfkc-unicode-5.1.h"
+#include "nfkc.h"
 
 unsigned char
-grn_nfkc_unicode_51_ctype(const unsigned char *str)
+grn_nfkc_ctype(const unsigned char *str)
 {
 switch (str[0]) {
 case 0x01 :
@@ -9687,7 +9687,7 @@ default :
 }
 
 const char *
-grn_nfkc_unicode_51_map1(const unsigned char *str)
+grn_nfkc_map1(const unsigned char *str)
 {
 switch (str[0]) {
 case 0x41 :
@@ -24472,7 +24472,7 @@ case 0xF0 :
 }
 
 const char *
-grn_nfkc_unicode_51_map2(const unsigned char *prefix, const unsigned char *suffix)
+grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix)
 {
 switch (suffix[0]) {
 case 0xCC :

  Modified: plugins/normalizers/nfkc.c (+11 -11)
===================================================================
--- plugins/normalizers/nfkc.c    2012-02-13 21:15:49 +0900 (5dc5136)
+++ plugins/normalizers/nfkc.c    2012-02-14 13:47:45 +0900 (cc6ff51)
@@ -20,7 +20,7 @@
 #include <string.h>
 
 #include <groonga/normalizer.h>
-#include "nfkc-unicode-5.1.h"
+#include "nfkc.h"
 
 static grn_obj *
 utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
@@ -45,7 +45,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
   if (!(norm = GRN_PLUGIN_MALLOC(ctx, ds + 1))) {
     GRN_PLUGIN_ERROR(ctx,
                      GRN_NO_MEMORY_AVAILABLE,
-                     "[normalizer][nfkc][unicode5.1] "
+                     "[normalizer][utf8][nfkc] "
                      "failed to allocate normalized text space");
     return NULL;
   }
@@ -54,7 +54,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
       GRN_PLUGIN_FREE(ctx, norm);
       GRN_PLUGIN_ERROR(ctx,
                        GRN_NO_MEMORY_AVAILABLE,
-                       "[normalizer][nfkc][unicode5.1] "
+                       "[normalizer][utf8][nfkc] "
                        "failed to allocate checks space");
       return NULL;
     }
@@ -66,7 +66,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
       GRN_PLUGIN_FREE(ctx, norm);
       GRN_PLUGIN_ERROR(ctx,
                        GRN_NO_MEMORY_AVAILABLE,
-                       "[normalizer][nfkc][unicode5.1] "
+                       "[normalizer][utf8][nfkc] "
                        "failed to allocate character types space");
       return NULL;
     }
@@ -80,13 +80,13 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
     if (!(ls = grn_charlen_utf8(ctx, s, e))) {
       break;
     }
-    if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) {
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
       pe = p + strlen((char *)p);
     } else {
       p = s;
       pe = p + ls;
     }
-    if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) {
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
       p = p2;
       pe = p + strlen((char *)p);
       if (cp) { cp--; }
@@ -113,7 +113,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
             GRN_PLUGIN_FREE(ctx, norm);
             GRN_PLUGIN_ERROR(ctx,
                              GRN_NO_MEMORY_AVAILABLE,
-                             "[normalizer][nfkc][unicode5.1] "
+                             "[normalizer][utf8][nfkc] "
                              "failed to reallocate normalized text space");
             return NULL;
           }
@@ -129,7 +129,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
               GRN_PLUGIN_FREE(ctx, norm);
               GRN_PLUGIN_ERROR(ctx,
                                GRN_NO_MEMORY_AVAILABLE,
-                               "[normalizer][nfkc][unicode5.1] "
+                               "[normalizer][utf8][nfkc] "
                                "failed to reallocate checks space");
               return NULL;
             }
@@ -144,7 +144,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
               GRN_PLUGIN_FREE(ctx, norm);
               GRN_PLUGIN_ERROR(ctx,
                                GRN_NO_MEMORY_AVAILABLE,
-                               "[normalizer][nfkc][unicode5.1] "
+                               "[normalizer][utf8][nfkc] "
                                "failed to reallocate character types space");
               return NULL;
             }
@@ -156,7 +156,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
         d_ = d;
         d += lp;
         length++;
-        if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); }
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
         if (ch) {
           size_t i;
           if (s_ == s + ls) {
@@ -192,7 +192,7 @@ GRN_PLUGIN_REGISTER(grn_ctx *ctx)
 {
   grn_obj *normalizer;
 
-  normalizer = GRN_NORMALIZER_REGISTER(ctx, "NormalizerNFKC51",
+  normalizer = GRN_NORMALIZER_REGISTER(ctx, "NormalizerUTF8NFKC",
                                        NULL, utf8_nfkc_normalize, NULL);
   if (normalizer) {
     return GRN_SUCCESS;

  Renamed: plugins/normalizers/nfkc.h (+4 -5) 74%
===================================================================
--- plugins/normalizers/nfkc-unicode-5.1.h    2012-02-13 21:15:49 +0900 (8eb6bd0)
+++ plugins/normalizers/nfkc.h    2012-02-14 13:47:45 +0900 (077391d)
@@ -1,5 +1,5 @@
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2009-2012 Brazil
+/* Copyright(C) 2009 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -23,10 +23,9 @@
 extern "C" {
 #endif
 
-unsigned char grn_nfkc_unicode_51_ctype(const unsigned char *str);
-const char *grn_nfkc_unicode_51_map1(const unsigned char *str);
-const char *grn_nfkc_unicode_51_map2(const unsigned char *prefix,
-                                     const unsigned char *suffix);
+unsigned char grn_nfkc_ctype(const unsigned char *str);
+const char *grn_nfkc_map1(const unsigned char *str);
+const char *grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix);
 
 #ifdef __cplusplus
 }

  Modified: test/benchmark/bench-normalize.c (+16 -16)
===================================================================
--- test/benchmark/bench-normalize.c    2012-02-13 21:15:49 +0900 (97e0fbd)
+++ test/benchmark/bench-normalize.c    2012-02-14 13:47:45 +0900 (5b989d8)
@@ -44,11 +44,11 @@
 #include <groonga.h>
 #include <groonga_in.h>
 
-#define grn_nfkc_unicode_51_ctype bundle_grn_nfkc_ctype
-#define grn_nfkc_unicode_51_map1 bundle_grn_nfkc_map1
-#define grn_nfkc_unicode_51_map2 bundle_grn_nfkc_map2
+#define grn_nfkc_ctype bundle_grn_nfkc_ctype
+#define grn_nfkc_map1 bundle_grn_nfkc_map1
+#define grn_nfkc_map2 bundle_grn_nfkc_map2
 
-#include "plugins/normalizers/nfkc-unicode-5.1.c"
+#include "plugins/normalizers/nfkc-core.c"
 
 #define GRN_STR_REMOVEBLANK  (0x01<<0)
 #define GRN_STR_WITH_TYPES   (0x01<<1)
@@ -110,13 +110,13 @@ utf8_nfkc_normalize_original(grn_ctx *ctx, grn_str *nstr)
     if (!(ls = grn_charlen_utf8(ctx, s, e))) {
       break;
     }
-    if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) {
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
       pe = p + strlen((char *)p);
     } else {
       p = s;
       pe = p + ls;
     }
-    if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) {
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
       p = p2;
       pe = p + strlen((char *)p);
       if (cp) { cp--; }
@@ -182,7 +182,7 @@ utf8_nfkc_normalize_original(grn_ctx *ctx, grn_str *nstr)
         d_ = d;
         d += lp;
         length++;
-        if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); }
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
         if (ch) {
           size_t i;
           if (s_ == s + ls) {
@@ -247,13 +247,13 @@ utf8_nfkc_normalize_short(grn_ctx *ctx, grn_str *nstr)
     if (!(ls = grn_charlen_utf8(ctx, s, e))) {
       break;
     }
-    if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) {
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
       pe = p + strlen((char *)p);
     } else {
       p = s;
       pe = p + ls;
     }
-    if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) {
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
       p = p2;
       pe = p + strlen((char *)p);
       if (cp) { cp--; }
@@ -319,7 +319,7 @@ utf8_nfkc_normalize_short(grn_ctx *ctx, grn_str *nstr)
         d_ = d;
         d += lp;
         length++;
-        if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); }
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
         if (ch) {
           size_t i;
           if (s_ == s + ls) {
@@ -384,13 +384,13 @@ utf8_nfkc_normalize_unsigned_char(grn_ctx *ctx, grn_str *nstr)
     if (!(ls = grn_charlen_utf8(ctx, s, e))) {
       break;
     }
-    if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) {
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
       pe = p + strlen((char *)p);
     } else {
       p = s;
       pe = p + ls;
     }
-    if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) {
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
       p = p2;
       pe = p + strlen((char *)p);
       if (cp) { cp--; }
@@ -456,7 +456,7 @@ utf8_nfkc_normalize_unsigned_char(grn_ctx *ctx, grn_str *nstr)
         d_ = d;
         d += lp;
         length++;
-        if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); }
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
         if (ch) {
           size_t i;
           if (s_ == s + ls) {
@@ -521,13 +521,13 @@ utf8_nfkc_normalize_local(grn_ctx *ctx, grn_str *nstr)
     if (!(ls = grn_charlen_utf8(ctx, s, e))) {
       break;
     }
-    if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) {
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
       pe = p + strlen((char *)p);
     } else {
       p = s;
       pe = p + ls;
     }
-    if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) {
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
       p = p2;
       pe = p + strlen((char *)p);
       if (cp) { cp--; }
@@ -593,7 +593,7 @@ utf8_nfkc_normalize_local(grn_ctx *ctx, grn_str *nstr)
         d_ = d;
         d += lp;
         length++;
-        if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); }
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
         if (ch) {
           size_t i;
           if (s_ == s + ls) {

  Modified: test/unit/core/test-command-dump.c (+6 -6)
===================================================================
--- test/unit/core/test-command-dump.c    2012-02-13 21:15:49 +0900 (04bb916)
+++ test/unit/core/test-command-dump.c    2012-02-14 13:47:45 +0900 (9d2a2b4)
@@ -160,14 +160,14 @@ data_hash_table_create(void)
            NULL);
   ADD_DATA("hash - key normalize",
            "table_create Blog TABLE_HASH_KEY ShortText "
-           "--normalizer NormalizerNFKC51",
+           "--normalizer NormalizerUTF8NFKC",
            "Blog",
            GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_NORMALIZE,
            "ShortText",
            NULL);
   ADD_DATA("hash - key normalize - value",
            "table_create Blog TABLE_HASH_KEY ShortText Int32 "
-           "--normalizer NormalizerNFKC51",
+           "--normalizer NormalizerUTF8NFKC",
            "Blog",
            GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_NORMALIZE,
            "ShortText",
@@ -191,14 +191,14 @@ data_patricia_trie_create(void)
            NULL);
   ADD_DATA("patricia trie - key normalize",
            "table_create Blog TABLE_PAT_KEY ShortText "
-           "--normalizer NormalizerNFKC51",
+           "--normalizer NormalizerUTF8NFKC",
            "Blog",
            GRN_OBJ_TABLE_PAT_KEY | GRN_OBJ_KEY_NORMALIZE,
            "ShortText",
            NULL);
   ADD_DATA("patricia trie - key normalize - value",
            "table_create Blog TABLE_PAT_KEY ShortText Int32 "
-           "--normalizer NormalizerNFKC51",
+           "--normalizer NormalizerUTF8NFKC",
            "Blog",
            GRN_OBJ_TABLE_PAT_KEY | GRN_OBJ_KEY_NORMALIZE,
            "ShortText",
@@ -222,14 +222,14 @@ data_double_array_trie_create(void)
            NULL);
   ADD_DATA("double-array trie - key normalize",
            "table_create Blog TABLE_DAT_KEY ShortText "
-           "--normalizer NormalizerNFKC51",
+           "--normalizer NormalizerUTF8NFKC",
            "Blog",
            GRN_OBJ_TABLE_DAT_KEY | GRN_OBJ_KEY_NORMALIZE,
            "ShortText",
            NULL);
   ADD_DATA("double-array trie - key normalize - value",
            "table_create Blog TABLE_DAT_KEY ShortText Int32 "
-           "--normalizer NormalizerNFKC51",
+           "--normalizer NormalizerUTF8NFKC",
            "Blog",
            GRN_OBJ_TABLE_DAT_KEY | GRN_OBJ_KEY_NORMALIZE,
            "ShortText",




Groonga-commit メーリングリストの案内
Back to archive index