Kouhei Sutou
null+****@clear*****
Fri Feb 12 00:13:22 JST 2016
Kouhei Sutou 2016-02-12 00:13:22 +0900 (Fri, 12 Feb 2016) New Revision: 72a261b0bb0722a9fe3af57751f496522486e9e8 https://github.com/pgroonga/pgroonga/commit/72a261b0bb0722a9fe3af57751f496522486e9e8 Message: Support multibyte column name in UTF-8 Added files: expected/column-name/japanese.out sql/column-name/japanese.sql src/pgrn_column_name.c src/pgrn_column_name.h Modified files: Makefile src/pgrn_create.c src/pgrn_groonga.c src/pgroonga.c Modified: Makefile (+1 -0) =================================================================== --- Makefile 2016-02-09 17:29:55 +0900 (2fea1cf) +++ Makefile 2016-02-12 00:13:22 +0900 (766b743) @@ -4,6 +4,7 @@ GROONGA_PKG = "groonga >= $(REQUIRED_GROONGA_VERSION)" MODULE_big = pgroonga SRCS = \ src/pgroonga.c \ + src/pgrn_column_name.c \ src/pgrn_convert.c \ src/pgrn_create.c \ src/pgrn_global.c \ Added: expected/column-name/japanese.out (+20 -0) 100644 =================================================================== --- /dev/null +++ expected/column-name/japanese.out 2016-02-12 00:13:22 +0900 (1493323) @@ -0,0 +1,20 @@ +CREATE TABLE メモ ( + id integer, + コンテンツ text +); +INSERT INTO メモ VALUES (1, 'PostgreSQLはRDBMSです。'); +INSERT INTO メモ VALUES (2, 'Groongaは高速な全文検索エンジンです。'); +INSERT INTO メモ VALUES (3, 'PGroongaはGroongaを使うPostgreSQLの拡張機能です。'); +CREATE INDEX 全文検索索引 ON メモ USING pgroonga (コンテンツ); +SET enable_seqscan = off; +SET enable_indexscan = on; +SET enable_bitmapscan = off; +SELECT id, コンテンツ + FROM メモ + WHERE コンテンツ %% '全文検索'; + id | コンテンツ +----+--------------------------------------- + 2 | Groongaは高速な全文検索エンジンです。 +(1 row) + +DROP TABLE メモ; Added: sql/column-name/japanese.sql (+20 -0) 100644 =================================================================== --- /dev/null +++ sql/column-name/japanese.sql 2016-02-12 00:13:22 +0900 (f522fb8) @@ -0,0 +1,20 @@ +CREATE TABLE メモ ( + id integer, + コンテンツ text +); + +INSERT INTO メモ VALUES (1, 'PostgreSQLはRDBMSです。'); +INSERT INTO メモ VALUES (2, 'Groongaは高速な全文検索エンジンです。'); +INSERT INTO メモ VALUES (3, 'PGroongaはGroongaを使うPostgreSQLの拡張機能です。'); + +CREATE INDEX 全文検索索引 ON メモ USING pgroonga (コンテンツ); + +SET enable_seqscan = off; +SET enable_indexscan = on; +SET enable_bitmapscan = off; + +SELECT id, コンテンツ + FROM メモ + WHERE コンテンツ %% '全文検索'; + +DROP TABLE メモ; Added: src/pgrn_column_name.c (+135 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_column_name.c 2016-02-12 00:13:22 +0900 (07c6c70) @@ -0,0 +1,135 @@ +#include "pgroonga.h" + +#include "pgrn_column_name.h" + +#include <groonga.h> + +#include <postgres.h> +#include <mb/pg_wchar.h> + +static const char *ENCODED_CHARACTER_FORMAT = "@%05x"; +static const int ENCODED_CHARACTER_LENGTH = 5; + +static bool +PGrnColumnNameIsUsableCharacterASCII(char character) +{ + return (character == '_' || + ('0' <= character && character <= '9') || + ('A' <= character && character <= 'Z') || + ('a' <= character && character <= 'z')); +} + +static void +PGrnColumnNameEncodeCharacterUTF8(const char *utf8Character, char *encodedName) +{ + pg_wchar codepoint; + codepoint = utf8_to_unicode((const unsigned char *)utf8Character); + snprintf(encodedName, + ENCODED_CHARACTER_LENGTH + 1, + ENCODED_CHARACTER_FORMAT, + codepoint); +} + +static void +checkSize(size_t size) +{ + if (size >= GRN_TABLE_MAX_KEY_SIZE) + ereport(ERROR, + (errcode(ERRCODE_NAME_TOO_LONG), + errmsg("pgroonga: encoded column name >= %d", + GRN_TABLE_MAX_KEY_SIZE))); +} + +static size_t +PGrnColumnNameEncodeUTF8(const char *name, char *encodedName) +{ + const char *current; + char *encodedCurrent; + size_t encodedNameSize = 0; + + current = name; + encodedCurrent = encodedName; + while (*current != '\0') + { + int length; + + length = pg_mblen(current); + + if (length == 1 && + PGrnColumnNameIsUsableCharacterASCII(*current) && + !(*current == '_' && current == name)) + { + checkSize(encodedNameSize + length + 1); + *encodedCurrent++ = *current; + encodedNameSize++; + } + else + { + checkSize(encodedNameSize + ENCODED_CHARACTER_LENGTH + 1); + PGrnColumnNameEncodeCharacterUTF8(current, encodedCurrent); + encodedCurrent += ENCODED_CHARACTER_LENGTH; + encodedNameSize += ENCODED_CHARACTER_LENGTH; + } + + current += length; + } + + *encodedCurrent = '\0'; + + return encodedNameSize; +} + +size_t +PGrnColumnNameEncode(const char *name, char *encodedName) +{ + const char *current; + char *encodedCurrent; + size_t encodedNameSize = 0; + + if (GetDatabaseEncoding() == PG_UTF8) + return PGrnColumnNameEncodeUTF8(name, encodedName); + + current = name; + encodedCurrent = encodedName; + while (*current != '\0') + { + int length; + + length = pg_mblen(current); + if (length != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("pgroonga: multibyte character isn't supported " + "for column name except UTF-8 encoding: <%s>(%s)", + name, + GetDatabaseEncodingName()))); + + if (PGrnColumnNameIsUsableCharacterASCII(*current) && + !(*current == '_' && current == name)) + { + checkSize(encodedNameSize + length + 1); + *encodedCurrent++ = *current; + encodedNameSize++; + } + else + { + checkSize(encodedNameSize + ENCODED_CHARACTER_LENGTH + 1); + PGrnColumnNameEncodeCharacterUTF8(current, encodedCurrent); + encodedCurrent += ENCODED_CHARACTER_LENGTH; + encodedNameSize += ENCODED_CHARACTER_LENGTH; + } + + current++; + } + + *encodedCurrent = '\0'; + + return encodedNameSize; +} + +size_t +PGrnColumnNameDecode(const char *encodedName, char *name) +{ + /* TODO */ + return 0; +} Added: src/pgrn_column_name.h (+4 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_column_name.h 2016-02-12 00:13:22 +0900 (3001024) @@ -0,0 +1,4 @@ +#pragma once + +size_t PGrnColumnNameEncode(const char *name, char *encodedName); +size_t PGrnColumnNameDecode(const char *encodedName, char *name); Modified: src/pgrn_create.c (+10 -4) =================================================================== --- src/pgrn_create.c 2016-02-09 17:29:55 +0900 (b84a796) +++ src/pgrn_create.c 2016-02-12 00:13:22 +0900 (1de8de3) @@ -1,5 +1,6 @@ #include "pgroonga.h" +#include "pgrn_column_name.h" #include "pgrn_create.h" #include "pgrn_global.h" #include "pgrn_groonga.h" @@ -57,10 +58,15 @@ PGrnCreateDataColumn(PGrnCreateData *data) } } - PGrnCreateColumn(data->sourcesTable, - data->desc->attrs[data->i]->attname.data, - flags, - grn_ctx_at(ctx, data->attributeTypeID)); + { + char columnName[GRN_TABLE_MAX_KEY_SIZE]; + PGrnColumnNameEncode(data->desc->attrs[data->i]->attname.data, + columnName); + PGrnCreateColumn(data->sourcesTable, + columnName, + flags, + grn_ctx_at(ctx, data->attributeTypeID)); + } } void Modified: src/pgrn_groonga.c (+5 -1) =================================================================== --- src/pgrn_groonga.c 2016-02-09 17:29:55 +0900 (c5d7788) +++ src/pgrn_groonga.c 2016-02-12 00:13:22 +0900 (cc56e90) @@ -1,5 +1,6 @@ #include "pgroonga.h" +#include "pgrn_column_name.h" #include "pgrn_global.h" #include "pgrn_groonga.h" @@ -80,9 +81,12 @@ PGrnLookup(const char *name, int errorLevel) grn_obj * PGrnLookupColumn(grn_obj *table, const char *name, int errorLevel) { + char columnName[GRN_TABLE_MAX_KEY_SIZE]; + size_t columnNameSize; grn_obj *column; - column = grn_obj_column(ctx, table, name, strlen(name)); + columnNameSize = PGrnColumnNameEncode(name, columnName); + column = grn_obj_column(ctx, table, columnName, columnNameSize); if (!column) { char tableName[GRN_TABLE_MAX_KEY_SIZE]; Modified: src/pgroonga.c (+2 -2) =================================================================== --- src/pgroonga.c 2016-02-09 17:29:55 +0900 (67d19a1) +++ src/pgroonga.c 2016-02-12 00:13:22 +0900 (e754c77) @@ -1610,8 +1610,8 @@ PGrnInsert(Relation index, if (isnull[i]) continue; - dataColumn = grn_obj_column(ctx, sourcesTable, - name->data, strlen(name->data)); + dataColumn = PGrnLookupColumn(sourcesTable, name->data, ERROR); + if (PGrnAttributeIsJSONB(attribute->atttypid)) { PGrnJSONBInsert(index, values, i, &(buffers->general)); -------------- next part -------------- HTML����������������������������... Download