Kouhei Sutou
null+****@clear*****
Tue Mar 10 17:32:19 JST 2015
Kouhei Sutou 2015-03-10 17:32:19 +0900 (Tue, 10 Mar 2015) New Revision: e8d829f7ec12c8df65bb68e96b8434d87452c5c6 https://github.com/groonga/groonga/commit/e8d829f7ec12c8df65bb68e96b8434d87452c5c6 Message: Add GRN_OP_REGEXP and grn_operator_exec_regexp() TODO: * Support executing some special patterns with inverted index. Special patterns are "\Aliteral" and "literal\z". They are beginning of string search and end of string search. "literal1.*literal2" may also be supportable. Modified files: include/groonga/groonga.h lib/operator.c test/unit/core/test-operator.c Modified: include/groonga/groonga.h (+4 -1) =================================================================== --- include/groonga/groonga.h 2015-03-10 16:50:28 +0900 (704778f) +++ include/groonga/groonga.h 2015-03-10 17:32:19 +0900 (18194d8) @@ -711,7 +711,8 @@ typedef enum { GRN_OP_TABLE_SORT, GRN_OP_TABLE_GROUP, GRN_OP_JSON_PUT, - GRN_OP_GET_MEMBER + GRN_OP_GET_MEMBER, + GRN_OP_REGEXP } grn_operator; GRN_API const char *grn_operator_to_string(grn_operator op); @@ -728,6 +729,8 @@ GRN_API grn_bool grn_operator_exec_match(grn_ctx *ctx, grn_obj *target, grn_obj *sub_text); GRN_API grn_bool grn_operator_exec_prefix(grn_ctx *ctx, grn_obj *target, grn_obj *prefix); +GRN_API grn_bool grn_operator_exec_regexp(grn_ctx *ctx, + grn_obj *target, grn_obj *pattern); struct _grn_table_group_result { grn_obj *table; Modified: lib/operator.c (+92 -1) =================================================================== --- lib/operator.c 2015-03-10 16:50:28 +0900 (65faa44) +++ lib/operator.c 2015-03-10 17:32:19 +0900 (0ac2320) @@ -23,6 +23,14 @@ #include <string.h> +#ifdef GRN_WITH_ONIGMO +# define GRN_SUPPORT_REGEXP +#endif + +#ifdef GRN_SUPPORT_REGEXP +# include <oniguruma.h> +#endif + static const char *operator_names[] = { "push", "pop", @@ -100,7 +108,8 @@ static const char *operator_names[] = { "table_sort", "table_group", "json_put", - "get_member" + "get_member", + "regexp" }; const char * @@ -630,6 +639,76 @@ string_have_prefix(grn_ctx *ctx, } static grn_bool +string_match_regexp(grn_ctx *ctx, + const char *target, unsigned int target_len, + const char *pattern, unsigned int pattern_len) +{ +#ifdef GRN_SUPPORT_REGEXP + OnigRegex regex; + OnigEncoding onig_encoding; + int onig_result; + OnigErrorInfo onig_error_info; + + if (ctx->encoding == GRN_ENC_NONE) { + return GRN_FALSE; + } + + switch (ctx->encoding) { + case GRN_ENC_EUC_JP : + onig_encoding = ONIG_ENCODING_EUC_JP; + break; + case GRN_ENC_UTF8 : + onig_encoding = ONIG_ENCODING_UTF8; + break; + case GRN_ENC_SJIS : + onig_encoding = ONIG_ENCODING_CP932; + break; + case GRN_ENC_LATIN1 : + onig_encoding = ONIG_ENCODING_ISO_8859_1; + break; + case GRN_ENC_KOI8R : + onig_encoding = ONIG_ENCODING_KOI8_R; + break; + default : + return GRN_FALSE; + } + + onig_result = onig_new(®ex, + pattern, + pattern + pattern_len, + ONIG_OPTION_NONE, + onig_encoding, + ONIG_SYNTAX_RUBY, + &onig_error_info); + if (onig_result != ONIG_NORMAL) { + char message[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(message, onig_result, onig_error_info); + ERR(GRN_INVALID_ARGUMENT, + "[operator][regexp] " + "failed to create regular expression object: <%.*s>: %s", + pattern_len, pattern, + message); + return GRN_FALSE; + } + + { + OnigPosition position; + position = onig_search(regex, + target, + target + target_len, + target, + target + target_len, + NULL, + ONIG_OPTION_NONE); + onig_free(regex); + return position != ONIG_MISMATCH; + } +#else + return GRN_FALSE; +#endif +} + +static grn_bool exec_text_operator(grn_ctx *ctx, grn_operator op, const char *target, @@ -646,6 +725,9 @@ exec_text_operator(grn_ctx *ctx, case GRN_OP_PREFIX : matched = string_have_prefix(ctx, target, target_len, query, query_len); break; + case GRN_OP_REGEXP : + matched = string_match_regexp(ctx, target, target_len, query, query_len); + break; default : matched = GRN_FALSE; break; @@ -819,3 +901,12 @@ grn_operator_exec_prefix(grn_ctx *ctx, grn_obj *target, grn_obj *prefix) matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_PREFIX, target, prefix); GRN_API_RETURN(matched); } + +grn_bool +grn_operator_exec_regexp(grn_ctx *ctx, grn_obj *target, grn_obj *pattern) +{ + grn_bool matched; + GRN_API_ENTER; + matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_REGEXP, target, pattern); + GRN_API_RETURN(matched); +} Modified: test/unit/core/test-operator.c (+66 -0) =================================================================== --- test/unit/core/test-operator.c 2015-03-10 16:50:28 +0900 (8adee4b) +++ test/unit/core/test-operator.c 2015-03-10 17:32:19 +0900 (016885b) @@ -16,6 +16,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "../../../config.h" + #include <groonga.h> #include <gcutter.h> @@ -57,6 +59,12 @@ void data_exec_prefix_true(void); void test_exec_prefix_true(gconstpointer data); void data_exec_prefix_false(void); void test_exec_prefix_false(gconstpointer data); +#ifdef GRN_WITH_ONIGMO +void data_exec_regexp_true(void); +void test_exec_regexp_true(gconstpointer data); +void data_exec_regexp_false(void); +void test_exec_regexp_false(gconstpointer data); +#endif static gchar *tmp_directory; @@ -576,3 +584,61 @@ test_exec_prefix_false(gconstpointer data) set_text(&rhs, "ell"); cut_assert_false(grn_operator_exec_prefix(context, &lhs, &rhs)); } + +#ifdef GRN_WITH_ONIGMO +void +data_exec_regexp_true(void) +{ +#define ADD_DATA(lhs_type, rhs_type) \ + gcut_add_datum(lhs_type " @~ " rhs_type, \ + "lhs_type", G_TYPE_STRING, lhs_type, \ + "rhs_type", G_TYPE_STRING, rhs_type, \ + NULL) + + ADD_DATA("text", "text"); + +#undef ADD_DATA +} + +void +test_exec_regexp_true(gconstpointer data) +{ + const gchar *lhs_type; + const gchar *rhs_type; + + lhs_type = gcut_data_get_string(data, "lhs_type"); + rhs_type = gcut_data_get_string(data, "rhs_type"); + + set_text(&lhs, "Hello"); + set_text(&rhs, "e.l"); + cut_assert_true(grn_operator_exec_regexp(context, &lhs, &rhs)); +} + +void +data_exec_regexp_false(void) +{ +#define ADD_DATA(lhs_type, rhs_type) \ + gcut_add_datum(lhs_type " @~ " rhs_type, \ + "lhs_type", G_TYPE_STRING, lhs_type, \ + "rhs_type", G_TYPE_STRING, rhs_type, \ + NULL) + + ADD_DATA("text", "text"); + +#undef ADD_DATA +} + +void +test_exec_regexp_false(gconstpointer data) +{ + const gchar *lhs_type; + const gchar *rhs_type; + + lhs_type = gcut_data_get_string(data, "lhs_type"); + rhs_type = gcut_data_get_string(data, "rhs_type"); + + set_text(&lhs, "Hello"); + set_text(&rhs, "llox\\z"); + cut_assert_false(grn_operator_exec_regexp(context, &lhs, &rhs)); +} +#endif -------------- next part -------------- HTML����������������������������... Download