[Groonga-commit] groonga/groonga at e8d829f [master] Add GRN_OP_REGEXP and grn_operator_exec_regexp()

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Mar 10 17:32:19 JST 2015


Kouhei Sutou	2015-03-10 17:32:19 +0900 (Tue, 10 Mar 2015)

  New Revision: e8d829f7ec12c8df65bb68e96b8434d87452c5c6
  https://github.com/groonga/groonga/commit/e8d829f7ec12c8df65bb68e96b8434d87452c5c6

  Message:
    Add GRN_OP_REGEXP and grn_operator_exec_regexp()
    
    TODO:
    
      * Support executing some special patterns with inverted index.
        Special patterns are "\Aliteral" and "literal\z". They are
        beginning of string search and end of string
        search. "literal1.*literal2" may also be supportable.

  Modified files:
    include/groonga/groonga.h
    lib/operator.c
    test/unit/core/test-operator.c

  Modified: include/groonga/groonga.h (+4 -1)
===================================================================
--- include/groonga/groonga.h    2015-03-10 16:50:28 +0900 (704778f)
+++ include/groonga/groonga.h    2015-03-10 17:32:19 +0900 (18194d8)
@@ -711,7 +711,8 @@ typedef enum {
   GRN_OP_TABLE_SORT,
   GRN_OP_TABLE_GROUP,
   GRN_OP_JSON_PUT,
-  GRN_OP_GET_MEMBER
+  GRN_OP_GET_MEMBER,
+  GRN_OP_REGEXP
 } grn_operator;
 
 GRN_API const char *grn_operator_to_string(grn_operator op);
@@ -728,6 +729,8 @@ GRN_API grn_bool grn_operator_exec_match(grn_ctx *ctx,
                                          grn_obj *target, grn_obj *sub_text);
 GRN_API grn_bool grn_operator_exec_prefix(grn_ctx *ctx,
                                           grn_obj *target, grn_obj *prefix);
+GRN_API grn_bool grn_operator_exec_regexp(grn_ctx *ctx,
+                                          grn_obj *target, grn_obj *pattern);
 
 struct _grn_table_group_result {
   grn_obj *table;

  Modified: lib/operator.c (+92 -1)
===================================================================
--- lib/operator.c    2015-03-10 16:50:28 +0900 (65faa44)
+++ lib/operator.c    2015-03-10 17:32:19 +0900 (0ac2320)
@@ -23,6 +23,14 @@
 
 #include <string.h>
 
+#ifdef GRN_WITH_ONIGMO
+# define GRN_SUPPORT_REGEXP
+#endif
+
+#ifdef GRN_SUPPORT_REGEXP
+# include <oniguruma.h>
+#endif
+
 static const char *operator_names[] = {
   "push",
   "pop",
@@ -100,7 +108,8 @@ static const char *operator_names[] = {
   "table_sort",
   "table_group",
   "json_put",
-  "get_member"
+  "get_member",
+  "regexp"
 };
 
 const char *
@@ -630,6 +639,76 @@ string_have_prefix(grn_ctx *ctx,
 }
 
 static grn_bool
+string_match_regexp(grn_ctx *ctx,
+                    const char *target, unsigned int target_len,
+                    const char *pattern, unsigned int pattern_len)
+{
+#ifdef GRN_SUPPORT_REGEXP
+  OnigRegex regex;
+  OnigEncoding onig_encoding;
+  int onig_result;
+  OnigErrorInfo onig_error_info;
+
+  if (ctx->encoding == GRN_ENC_NONE) {
+    return GRN_FALSE;
+  }
+
+  switch (ctx->encoding) {
+  case GRN_ENC_EUC_JP :
+    onig_encoding = ONIG_ENCODING_EUC_JP;
+    break;
+  case GRN_ENC_UTF8 :
+    onig_encoding = ONIG_ENCODING_UTF8;
+    break;
+  case GRN_ENC_SJIS :
+    onig_encoding = ONIG_ENCODING_CP932;
+    break;
+  case GRN_ENC_LATIN1 :
+    onig_encoding = ONIG_ENCODING_ISO_8859_1;
+    break;
+  case GRN_ENC_KOI8R :
+    onig_encoding = ONIG_ENCODING_KOI8_R;
+    break;
+  default :
+    return GRN_FALSE;
+  }
+
+  onig_result = onig_new(&regex,
+                         pattern,
+                         pattern + pattern_len,
+                         ONIG_OPTION_NONE,
+                         onig_encoding,
+                         ONIG_SYNTAX_RUBY,
+                         &onig_error_info);
+  if (onig_result != ONIG_NORMAL) {
+    char message[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str(message, onig_result, onig_error_info);
+    ERR(GRN_INVALID_ARGUMENT,
+        "[operator][regexp] "
+        "failed to create regular expression object: <%.*s>: %s",
+        pattern_len, pattern,
+        message);
+    return GRN_FALSE;
+  }
+
+  {
+    OnigPosition position;
+    position = onig_search(regex,
+                           target,
+                           target + target_len,
+                           target,
+                           target + target_len,
+                           NULL,
+                           ONIG_OPTION_NONE);
+    onig_free(regex);
+    return position != ONIG_MISMATCH;
+  }
+#else
+  return GRN_FALSE;
+#endif
+}
+
+static grn_bool
 exec_text_operator(grn_ctx *ctx,
                    grn_operator op,
                    const char *target,
@@ -646,6 +725,9 @@ exec_text_operator(grn_ctx *ctx,
   case GRN_OP_PREFIX :
     matched = string_have_prefix(ctx, target, target_len, query, query_len);
     break;
+  case GRN_OP_REGEXP :
+    matched = string_match_regexp(ctx, target, target_len, query, query_len);
+    break;
   default :
     matched = GRN_FALSE;
     break;
@@ -819,3 +901,12 @@ grn_operator_exec_prefix(grn_ctx *ctx, grn_obj *target, grn_obj *prefix)
   matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_PREFIX, target, prefix);
   GRN_API_RETURN(matched);
 }
+
+grn_bool
+grn_operator_exec_regexp(grn_ctx *ctx, grn_obj *target, grn_obj *pattern)
+{
+  grn_bool matched;
+  GRN_API_ENTER;
+  matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_REGEXP, target, pattern);
+  GRN_API_RETURN(matched);
+}

  Modified: test/unit/core/test-operator.c (+66 -0)
===================================================================
--- test/unit/core/test-operator.c    2015-03-10 16:50:28 +0900 (8adee4b)
+++ test/unit/core/test-operator.c    2015-03-10 17:32:19 +0900 (016885b)
@@ -16,6 +16,8 @@
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
+#include "../../../config.h"
+
 #include <groonga.h>
 
 #include <gcutter.h>
@@ -57,6 +59,12 @@ void data_exec_prefix_true(void);
 void test_exec_prefix_true(gconstpointer data);
 void data_exec_prefix_false(void);
 void test_exec_prefix_false(gconstpointer data);
+#ifdef GRN_WITH_ONIGMO
+void data_exec_regexp_true(void);
+void test_exec_regexp_true(gconstpointer data);
+void data_exec_regexp_false(void);
+void test_exec_regexp_false(gconstpointer data);
+#endif
 
 static gchar *tmp_directory;
 
@@ -576,3 +584,61 @@ test_exec_prefix_false(gconstpointer data)
   set_text(&rhs, "ell");
   cut_assert_false(grn_operator_exec_prefix(context, &lhs, &rhs));
 }
+
+#ifdef GRN_WITH_ONIGMO
+void
+data_exec_regexp_true(void)
+{
+#define ADD_DATA(lhs_type, rhs_type)                            \
+  gcut_add_datum(lhs_type " @~ " rhs_type,                      \
+                 "lhs_type", G_TYPE_STRING, lhs_type,           \
+                 "rhs_type", G_TYPE_STRING, rhs_type,           \
+                 NULL)
+
+  ADD_DATA("text", "text");
+
+#undef ADD_DATA
+}
+
+void
+test_exec_regexp_true(gconstpointer data)
+{
+  const gchar *lhs_type;
+  const gchar *rhs_type;
+
+  lhs_type = gcut_data_get_string(data, "lhs_type");
+  rhs_type = gcut_data_get_string(data, "rhs_type");
+
+  set_text(&lhs, "Hello");
+  set_text(&rhs, "e.l");
+  cut_assert_true(grn_operator_exec_regexp(context, &lhs, &rhs));
+}
+
+void
+data_exec_regexp_false(void)
+{
+#define ADD_DATA(lhs_type, rhs_type)                            \
+  gcut_add_datum(lhs_type " @~ " rhs_type,                      \
+                 "lhs_type", G_TYPE_STRING, lhs_type,           \
+                 "rhs_type", G_TYPE_STRING, rhs_type,           \
+                 NULL)
+
+  ADD_DATA("text", "text");
+
+#undef ADD_DATA
+}
+
+void
+test_exec_regexp_false(gconstpointer data)
+{
+  const gchar *lhs_type;
+  const gchar *rhs_type;
+
+  lhs_type = gcut_data_get_string(data, "lhs_type");
+  rhs_type = gcut_data_get_string(data, "rhs_type");
+
+  set_text(&lhs, "Hello");
+  set_text(&rhs, "llox\\z");
+  cut_assert_false(grn_operator_exec_regexp(context, &lhs, &rhs));
+}
+#endif
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index