null+****@clear*****
null+****@clear*****
2012年 2月 14日 (火) 13:47:45 JST
Kouhei Sutou 2012-02-14 13:47:45 +0900 (Tue, 14 Feb 2012) New Revision: 046eb60ae9aba8734799aaa19447b36ec88960d6 Log: Revert "[normalizer][nfkc] NormalizerUTF8NFKC -> NormalizerNFKC51." This reverts commit 730d953118097be6b05bffc286bce19a2df50685. Modified files: include/groonga.h lib/normalizer.c plugins/normalizers/Makefile.am plugins/normalizers/nfkc.c test/benchmark/bench-normalize.c test/unit/core/test-command-dump.c Renamed files: plugins/normalizers/nfkc-core.c (from plugins/normalizers/nfkc-unicode-5.1.c) plugins/normalizers/nfkc.h (from plugins/normalizers/nfkc-unicode-5.1.h) Modified: include/groonga.h (+1 -1) =================================================================== --- include/groonga.h 2012-02-13 21:15:49 +0900 (750e37e) +++ include/groonga.h 2012-02-14 13:47:45 +0900 (17a0b28) @@ -593,11 +593,11 @@ typedef enum { typedef enum { GRN_DB_NORMALIZER_ASCII = 96, + GRN_DB_NORMALIZER_UTF8_NFKC, /* Normalization Form KC */ GRN_DB_NORMALIZER_EUC_JP, GRN_DB_NORMALIZER_SJIS, GRN_DB_NORMALIZER_LATIN1, GRN_DB_NORMALIZER_KOI8R, - GRN_DB_NORMALIZER_NFKC51, /* Normalization Form KC for Unicode 5.1 */ GRN_DB_NORMALIZER_UTF8_UCA /* Unicode Collation Algorithm */ } grn_builtin_normalizer; Modified: lib/normalizer.c (+9 -9) =================================================================== --- lib/normalizer.c 2012-02-13 21:15:49 +0900 (dbd140a) +++ lib/normalizer.c 2012-02-14 13:47:45 +0900 (3462498) @@ -32,7 +32,7 @@ grn_normalizer_find(grn_ctx *ctx, grn_encoding encoding) break; case GRN_ENC_UTF8 : #ifdef WITH_NFKC - normalizer_id = GRN_DB_NORMALIZER_NFKC51; + normalizer_id = GRN_DB_NORMALIZER_UTF8_NFKC; #else /* WITH_NFKC */ normalizer_id = GRN_DB_NORMALIZER_ASCII; #endif /* WITH_NFKC */ @@ -1123,6 +1123,14 @@ grn_db_init_builtin_normalizers(grn_ctx *ctx) if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_ASCII) { return GRN_FILE_CORRUPT; } +#ifdef WITH_NFKC + if (grn_plugin_register(ctx, "normalizers/nfkc")) { + ERRCLR(ctx); +#endif + grn_obj_register(ctx, grn_ctx_db(ctx), "NormalizerUTF8NFKC", 18); +#ifdef WITH_NFKC + } +#endif obj = DEF_NORMALIZERIZER("NormalizerEUCJP", eucjp_normalize); if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_EUC_JP) { return GRN_FILE_CORRUPT; @@ -1139,14 +1147,6 @@ grn_db_init_builtin_normalizers(grn_ctx *ctx) if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_KOI8R) { return GRN_FILE_CORRUPT; } -#ifdef WITH_NFKC - if (grn_plugin_register(ctx, "normalizers/nfkc")) { - ERRCLR(ctx); -#endif - grn_obj_register(ctx, grn_ctx_db(ctx), "NormalizerNFKC51", 16); -#ifdef WITH_NFKC - } -#endif /* obj = DEF_NORMALIZERIZER("NormalizerUTF8UCA", utf8_uca_normalize); */ /* if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_UTF8_UCA) { */ /* return GRN_FILE_CORRUPT; */ Modified: plugins/normalizers/Makefile.am (+2 -5) =================================================================== --- plugins/normalizers/Makefile.am 2012-02-13 21:15:49 +0900 (9113882) +++ plugins/normalizers/Makefile.am 2012-02-14 13:47:45 +0900 (7652acc) @@ -20,8 +20,5 @@ if WITH_NFKC normalizer_plugins_LTLIBRARIES += nfkc.la endif -nfkc_la_SOURCES = \ - nfkc.c \ - nfkc-unicode-5.1.c -noinst_HEADERS = \ - nfkc-unicode-5.1.h +nfkc_la_SOURCES = nfkc.c nfkc-core.c +noinst_HEADERS = nfkc.h Renamed: plugins/normalizers/nfkc-core.c (+5 -5) 99% =================================================================== --- plugins/normalizers/nfkc-unicode-5.1.c 2012-02-13 21:15:49 +0900 (3d08aae) +++ plugins/normalizers/nfkc-core.c 2012-02-14 13:47:45 +0900 (18092d9) @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2010-2012 Brazil +/* Copyright(C) 2010 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -17,10 +17,10 @@ don't edit this file by hand. it generated automatically by nfkc.rb */ -#include "nfkc-unicode-5.1.h" +#include "nfkc.h" unsigned char -grn_nfkc_unicode_51_ctype(const unsigned char *str) +grn_nfkc_ctype(const unsigned char *str) { switch (str[0]) { case 0x01 : @@ -9687,7 +9687,7 @@ default : } const char * -grn_nfkc_unicode_51_map1(const unsigned char *str) +grn_nfkc_map1(const unsigned char *str) { switch (str[0]) { case 0x41 : @@ -24472,7 +24472,7 @@ case 0xF0 : } const char * -grn_nfkc_unicode_51_map2(const unsigned char *prefix, const unsigned char *suffix) +grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix) { switch (suffix[0]) { case 0xCC : Modified: plugins/normalizers/nfkc.c (+11 -11) =================================================================== --- plugins/normalizers/nfkc.c 2012-02-13 21:15:49 +0900 (5dc5136) +++ plugins/normalizers/nfkc.c 2012-02-14 13:47:45 +0900 (cc6ff51) @@ -20,7 +20,7 @@ #include <string.h> #include <groonga/normalizer.h> -#include "nfkc-unicode-5.1.h" +#include "nfkc.h" static grn_obj * utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, @@ -45,7 +45,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, if (!(norm = GRN_PLUGIN_MALLOC(ctx, ds + 1))) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, - "[normalizer][nfkc][unicode5.1] " + "[normalizer][utf8][nfkc] " "failed to allocate normalized text space"); return NULL; } @@ -54,7 +54,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, GRN_PLUGIN_FREE(ctx, norm); GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, - "[normalizer][nfkc][unicode5.1] " + "[normalizer][utf8][nfkc] " "failed to allocate checks space"); return NULL; } @@ -66,7 +66,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, GRN_PLUGIN_FREE(ctx, norm); GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, - "[normalizer][nfkc][unicode5.1] " + "[normalizer][utf8][nfkc] " "failed to allocate character types space"); return NULL; } @@ -80,13 +80,13 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, if (!(ls = grn_charlen_utf8(ctx, s, e))) { break; } - if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) { + if ((p = (unsigned char *)grn_nfkc_map1(s))) { pe = p + strlen((char *)p); } else { p = s; pe = p + ls; } - if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) { + if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) { p = p2; pe = p + strlen((char *)p); if (cp) { cp--; } @@ -113,7 +113,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, GRN_PLUGIN_FREE(ctx, norm); GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, - "[normalizer][nfkc][unicode5.1] " + "[normalizer][utf8][nfkc] " "failed to reallocate normalized text space"); return NULL; } @@ -129,7 +129,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, GRN_PLUGIN_FREE(ctx, norm); GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, - "[normalizer][nfkc][unicode5.1] " + "[normalizer][utf8][nfkc] " "failed to reallocate checks space"); return NULL; } @@ -144,7 +144,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, GRN_PLUGIN_FREE(ctx, norm); GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, - "[normalizer][nfkc][unicode5.1] " + "[normalizer][utf8][nfkc] " "failed to reallocate character types space"); return NULL; } @@ -156,7 +156,7 @@ utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, d_ = d; d += lp; length++; - if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); } + if (cp) { *cp++ = grn_nfkc_ctype(p); } if (ch) { size_t i; if (s_ == s + ls) { @@ -192,7 +192,7 @@ GRN_PLUGIN_REGISTER(grn_ctx *ctx) { grn_obj *normalizer; - normalizer = GRN_NORMALIZER_REGISTER(ctx, "NormalizerNFKC51", + normalizer = GRN_NORMALIZER_REGISTER(ctx, "NormalizerUTF8NFKC", NULL, utf8_nfkc_normalize, NULL); if (normalizer) { return GRN_SUCCESS; Renamed: plugins/normalizers/nfkc.h (+4 -5) 74% =================================================================== --- plugins/normalizers/nfkc-unicode-5.1.h 2012-02-13 21:15:49 +0900 (8eb6bd0) +++ plugins/normalizers/nfkc.h 2012-02-14 13:47:45 +0900 (077391d) @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2012 Brazil +/* Copyright(C) 2009 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -23,10 +23,9 @@ extern "C" { #endif -unsigned char grn_nfkc_unicode_51_ctype(const unsigned char *str); -const char *grn_nfkc_unicode_51_map1(const unsigned char *str); -const char *grn_nfkc_unicode_51_map2(const unsigned char *prefix, - const unsigned char *suffix); +unsigned char grn_nfkc_ctype(const unsigned char *str); +const char *grn_nfkc_map1(const unsigned char *str); +const char *grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix); #ifdef __cplusplus } Modified: test/benchmark/bench-normalize.c (+16 -16) =================================================================== --- test/benchmark/bench-normalize.c 2012-02-13 21:15:49 +0900 (97e0fbd) +++ test/benchmark/bench-normalize.c 2012-02-14 13:47:45 +0900 (5b989d8) @@ -44,11 +44,11 @@ #include <groonga.h> #include <groonga_in.h> -#define grn_nfkc_unicode_51_ctype bundle_grn_nfkc_ctype -#define grn_nfkc_unicode_51_map1 bundle_grn_nfkc_map1 -#define grn_nfkc_unicode_51_map2 bundle_grn_nfkc_map2 +#define grn_nfkc_ctype bundle_grn_nfkc_ctype +#define grn_nfkc_map1 bundle_grn_nfkc_map1 +#define grn_nfkc_map2 bundle_grn_nfkc_map2 -#include "plugins/normalizers/nfkc-unicode-5.1.c" +#include "plugins/normalizers/nfkc-core.c" #define GRN_STR_REMOVEBLANK (0x01<<0) #define GRN_STR_WITH_TYPES (0x01<<1) @@ -110,13 +110,13 @@ utf8_nfkc_normalize_original(grn_ctx *ctx, grn_str *nstr) if (!(ls = grn_charlen_utf8(ctx, s, e))) { break; } - if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) { + if ((p = (unsigned char *)grn_nfkc_map1(s))) { pe = p + strlen((char *)p); } else { p = s; pe = p + ls; } - if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) { + if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) { p = p2; pe = p + strlen((char *)p); if (cp) { cp--; } @@ -182,7 +182,7 @@ utf8_nfkc_normalize_original(grn_ctx *ctx, grn_str *nstr) d_ = d; d += lp; length++; - if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); } + if (cp) { *cp++ = grn_nfkc_ctype(p); } if (ch) { size_t i; if (s_ == s + ls) { @@ -247,13 +247,13 @@ utf8_nfkc_normalize_short(grn_ctx *ctx, grn_str *nstr) if (!(ls = grn_charlen_utf8(ctx, s, e))) { break; } - if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) { + if ((p = (unsigned char *)grn_nfkc_map1(s))) { pe = p + strlen((char *)p); } else { p = s; pe = p + ls; } - if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) { + if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) { p = p2; pe = p + strlen((char *)p); if (cp) { cp--; } @@ -319,7 +319,7 @@ utf8_nfkc_normalize_short(grn_ctx *ctx, grn_str *nstr) d_ = d; d += lp; length++; - if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); } + if (cp) { *cp++ = grn_nfkc_ctype(p); } if (ch) { size_t i; if (s_ == s + ls) { @@ -384,13 +384,13 @@ utf8_nfkc_normalize_unsigned_char(grn_ctx *ctx, grn_str *nstr) if (!(ls = grn_charlen_utf8(ctx, s, e))) { break; } - if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) { + if ((p = (unsigned char *)grn_nfkc_map1(s))) { pe = p + strlen((char *)p); } else { p = s; pe = p + ls; } - if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) { + if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) { p = p2; pe = p + strlen((char *)p); if (cp) { cp--; } @@ -456,7 +456,7 @@ utf8_nfkc_normalize_unsigned_char(grn_ctx *ctx, grn_str *nstr) d_ = d; d += lp; length++; - if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); } + if (cp) { *cp++ = grn_nfkc_ctype(p); } if (ch) { size_t i; if (s_ == s + ls) { @@ -521,13 +521,13 @@ utf8_nfkc_normalize_local(grn_ctx *ctx, grn_str *nstr) if (!(ls = grn_charlen_utf8(ctx, s, e))) { break; } - if ((p = (unsigned char *)grn_nfkc_unicode_51_map1(s))) { + if ((p = (unsigned char *)grn_nfkc_map1(s))) { pe = p + strlen((char *)p); } else { p = s; pe = p + ls; } - if (d_ && (p2 = (unsigned char *)grn_nfkc_unicode_51_map2(d_, p))) { + if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) { p = p2; pe = p + strlen((char *)p); if (cp) { cp--; } @@ -593,7 +593,7 @@ utf8_nfkc_normalize_local(grn_ctx *ctx, grn_str *nstr) d_ = d; d += lp; length++; - if (cp) { *cp++ = grn_nfkc_unicode_51_ctype(p); } + if (cp) { *cp++ = grn_nfkc_ctype(p); } if (ch) { size_t i; if (s_ == s + ls) { Modified: test/unit/core/test-command-dump.c (+6 -6) =================================================================== --- test/unit/core/test-command-dump.c 2012-02-13 21:15:49 +0900 (04bb916) +++ test/unit/core/test-command-dump.c 2012-02-14 13:47:45 +0900 (9d2a2b4) @@ -160,14 +160,14 @@ data_hash_table_create(void) NULL); ADD_DATA("hash - key normalize", "table_create Blog TABLE_HASH_KEY ShortText " - "--normalizer NormalizerNFKC51", + "--normalizer NormalizerUTF8NFKC", "Blog", GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_NORMALIZE, "ShortText", NULL); ADD_DATA("hash - key normalize - value", "table_create Blog TABLE_HASH_KEY ShortText Int32 " - "--normalizer NormalizerNFKC51", + "--normalizer NormalizerUTF8NFKC", "Blog", GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_NORMALIZE, "ShortText", @@ -191,14 +191,14 @@ data_patricia_trie_create(void) NULL); ADD_DATA("patricia trie - key normalize", "table_create Blog TABLE_PAT_KEY ShortText " - "--normalizer NormalizerNFKC51", + "--normalizer NormalizerUTF8NFKC", "Blog", GRN_OBJ_TABLE_PAT_KEY | GRN_OBJ_KEY_NORMALIZE, "ShortText", NULL); ADD_DATA("patricia trie - key normalize - value", "table_create Blog TABLE_PAT_KEY ShortText Int32 " - "--normalizer NormalizerNFKC51", + "--normalizer NormalizerUTF8NFKC", "Blog", GRN_OBJ_TABLE_PAT_KEY | GRN_OBJ_KEY_NORMALIZE, "ShortText", @@ -222,14 +222,14 @@ data_double_array_trie_create(void) NULL); ADD_DATA("double-array trie - key normalize", "table_create Blog TABLE_DAT_KEY ShortText " - "--normalizer NormalizerNFKC51", + "--normalizer NormalizerUTF8NFKC", "Blog", GRN_OBJ_TABLE_DAT_KEY | GRN_OBJ_KEY_NORMALIZE, "ShortText", NULL); ADD_DATA("double-array trie - key normalize - value", "table_create Blog TABLE_DAT_KEY ShortText Int32 " - "--normalizer NormalizerNFKC51", + "--normalizer NormalizerUTF8NFKC", "Blog", GRN_OBJ_TABLE_DAT_KEY | GRN_OBJ_KEY_NORMALIZE, "ShortText",