null+****@clear*****
null+****@clear*****
2012年 4月 23日 (月) 21:43:18 JST
Kouhei Sutou 2012-04-23 21:43:18 +0900 (Mon, 23 Apr 2012) New Revision: ce283e3a7a9233b0d2613050beecc4ea245936b7 Log: Support GRN_OP_SIMILAR in grn_expr With this patch, similar search by select --filter "column *S 'TEXT'" is also available. fixes #1342 Added files: test/function/suite/select/filter/similar.expected test/function/suite/select/filter/similar.test Modified files: lib/db.c lib/expr.c lib/ii.c Modified: lib/db.c (+2 -0) =================================================================== --- lib/db.c 2012-04-23 19:00:50 +0900 (61471b1) +++ lib/db.c 2012-04-23 21:43:18 +0900 (b540ef8) @@ -8083,6 +8083,7 @@ grn_column_index(grn_ctx *ctx, grn_obj *obj, grn_operator op, case GRN_OP_PREFIX : case GRN_OP_SUFFIX : case GRN_OP_MATCH : + case GRN_OP_SIMILAR : for (hooks = DB_OBJ(obj)->hooks[GRN_HOOK_SET]; hooks; hooks = hooks->next) { default_set_value_hook_data *data = (void *)NEXT_ADDR(hooks); grn_obj *target = grn_ctx_at(ctx, data->target); @@ -8150,6 +8151,7 @@ grn_column_index(grn_ctx *ctx, grn_obj *obj, grn_operator op, } break; case GRN_OP_MATCH : + case GRN_OP_SIMILAR : { grn_accessor *a = (grn_accessor *)obj; if (a->action == GRN_ACCESSOR_GET_KEY) { Modified: lib/expr.c (+9 -1) =================================================================== --- lib/expr.c 2012-04-23 19:00:50 +0900 (d569318) +++ lib/expr.c 2012-04-23 21:43:18 +0900 (eb43802) @@ -942,6 +942,7 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, case GRN_OP_TABLE_CREATE : case GRN_OP_EXPR_GET_VAR : case GRN_OP_MATCH : + case GRN_OP_SIMILAR : case GRN_OP_PREFIX : case GRN_OP_SUFFIX : case GRN_OP_NOT_EQUAL : @@ -3579,6 +3580,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, for (stat = SCAN_START, c = e->codes, ce = &e->codes[e->codes_curr]; c < ce; c++) { switch (c->op) { case GRN_OP_MATCH : + case GRN_OP_SIMILAR : case GRN_OP_PREFIX : case GRN_OP_SUFFIX : case GRN_OP_EQUAL : @@ -3641,6 +3643,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, for (i = 0, stat = SCAN_START, c = e->codes, ce = &e->codes[e->codes_curr]; c < ce; c++) { switch (c->op) { case GRN_OP_MATCH : + case GRN_OP_SIMILAR : case GRN_OP_PREFIX : case GRN_OP_SUFFIX : case GRN_OP_EQUAL : @@ -4111,13 +4114,18 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, } break; case GRN_OP_MATCH : + case GRN_OP_SIMILAR : { grn_obj wv, **ip = &GRN_PTR_VALUE(&si->index); int j = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *); int32_t *wp = &GRN_INT32_VALUE(&si->wv); grn_search_optarg optarg; GRN_INT32_INIT(&wv, GRN_OBJ_VECTOR); - optarg.mode = GRN_OP_EXACT; + if (si->op == GRN_OP_MATCH) { + optarg.mode = GRN_OP_EXACT; + } else { + optarg.mode = si->op; + } optarg.similarity_threshold = 0; optarg.max_interval = 0; optarg.weight_vector = (int *)GRN_BULK_HEAD(&wv); Modified: lib/ii.c (+8 -3) =================================================================== --- lib/ii.c 2012-04-23 19:00:50 +0900 (32cdc3e) +++ lib/ii.c 2012-04-23 21:43:18 +0900 (caec749) @@ -6016,9 +6016,14 @@ grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len { grn_select_optarg arg = {GRN_OP_EXACT, 0, 0, NULL, 0, NULL, NULL, 0}; if (!s) { return GRN_INVALID_ARGUMENT; } - if (optarg && optarg->vector_size > 0) { - arg.weight_vector = optarg->weight_vector; - arg.vector_size = optarg->vector_size; + if (optarg) { + if (optarg->mode == GRN_OP_SIMILAR) { + arg.mode = optarg->mode; + } + if (optarg->vector_size > 0) { + arg.weight_vector = optarg->weight_vector; + arg.vector_size = optarg->vector_size; + } } /* todo : support subrec grn_rset_init(ctx, s, grn_rec_document, 0, grn_rec_none, 0, 0); Added: test/function/suite/select/filter/similar.expected (+66 -0) 100644 =================================================================== --- /dev/null +++ test/function/suite/select/filter/similar.expected 2012-04-23 21:43:18 +0900 (6804053) @@ -0,0 +1,66 @@ +table_create Documents TABLE_HASH_KEY ShortText +[[0,0.0,0.0],true] +column_create Documents content COLUMN_SCALAR Text +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Documents content +[[0,0.0,0.0],true] +load --table Documents +[ +["_key", "content"], +["groonga ã®æ¦è¦", "groonga ã¯è»¢ç½®ç´¢å¼ãç¨ããé«éã»é«ç²¾åº¦ãªå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ãããç»é²ãããææ¸ãããã«æ¤ç´¢çµæã«åæ ã§ãã¾ããã¾ããåç §ããããã¯ããã«æ´æ°ã§ãããã¨ãããå³ææ´æ°ã®å¿ è¦ãªã¢ããªã±ã¼ã·ã§ã³ã«ããã¦ãé«ãæ§è½ãçºæ®ãã¾ãã\n\nå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ãã¦éçºããã groonga ã§ãããç¬èªã®ã«ã©ã ã¹ãã¢ãæã¤åæåã®ãã¼ã¿ãã¼ã¹ã¨ãã¦ã®å´é¢ãæã£ã¦ãã¾ãããã®ãããMySQL ã PostgreSQL ãªã©ãæ¢åã®ä»£è¡¨çãªãã¼ã¿ãã¼ã¹ãè¦æã¨ããéè¨ã¯ã¨ãªãé«éã«å¦çã§ããã¨ããç¹å¾´ããããçµã¿åããã«ãã£ã¦å¼±ç¹ãè£ããããªä½¿ãæ¹ãã§ãã¾ãã\n\ngroonga ã®åºæ¬æ©è½ã¯ C ã©ã¤ãã©ãªã¨ãã¦æä¾ããã¦ãã¾ãããMySQL ã PostgreSQL ã¨é£æºãããããRuby ããå¼ã³åºããããããã¨ãã§ãã¾ãããã®ãããä»»æ ã®ã¢ããªã±ã¼ã·ã§ã³ã«çµã¿è¾¼ããã¨ãå¯è½ã§ãããå¤æ§ãªä½¿ãæ¹ãèãããã¾ãã èå³ã®ããæ¹ã¯ å©ç¨ä¾ ãã覧ãã ããã"], +["å ¨ææ¤ç´¢ã¨å³ææ´æ°", "ä¸è¬çãªãã¼ã¿ãã¼ã¹ã«ããã¦ã¯ã追å ã»åé¤ãªã©ã®æä½ãããã«åæ ããã¾ããä¸æ¹ãå ¨ææ¤ç´¢ã«ããã¦ã¯ã転置索å¼ãé次æ´æ°ã®é£ãããã¼ã¿æ§é ã§ãããã¨ãããææ¸ã®è¿½å ã»åé¤ã«å¯¾å¿ããªãã¨ã³ã¸ã³ãå°ãªãããã¾ããã\n\nããã«å¯¾ãã転置索å¼ãç¨ããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ãããªãããgroonga ã¯ææ¸ãçæéã§è¿½å ã»åé¤ãããã¨ãã§ãã¾ãããã®ä¸ãæ´æ°ããªããã§ãæ¤ç´¢ã§ããã¨ããåªããç¹å¾´ãæã£ã¦ãããããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ãã¦ã¯ã¨ã¦ãæè»æ§ãããã¾ããã¾ããè¤æ°ã®è»¢ç½®ç´¢å¼ãçµ±åãããããªéãå¦çãå¿ è¦ã¨ããªãã®ã§ãå®å®ãã¦é«ãæ§è½ãçºæ®ãããã¨ãæå¾ ã§ãã¾ãã"], +["ã«ã©ã ã¹ãã¢ã¨éè¨ã¯ã¨ãª", "ç¾ä»£ã¯ãã¤ã³ã¿ã¼ããããæ å ±æºã¨ããã°ããããã§ãæ å ±ãåéã§ããæ代ã§ããããããè¨å¤§ãªæ å ±ããæçãªæ å ±ãå¼ãåºãã®ã¯å°é£ã§ãããå¤é¢çãªåæã«ãã試è¡é¯èª¤ãå¿ è¦ã¨ãªãã¾ãããã¨ãã°ãæ¥ä»ãæé帯ã«ããçµãè¾¼ãã§ã¿ãããå°åã«ããçµãè¾¼ãã§ã¿ãããæ§å¥ãå¹´é½¢ã«ããçµãè¾¼ãã§ã¿ãããããã¨ã§ããããããã¦ããã®ãããªã¨ãã«ä¾¿å©ãªåå¨ãéè¨ã¯ã¨ãªã§ãã\n\néè¨ã¯ã¨ãªã¨ã¯ãæå®ããã«ã©ã ã®å¤ã«ãã£ã¦ã¬ã³ã¼ããã°ã«ã¼ãåããåã°ã«ã¼ãã«å«ã¾ããã¬ã³ã¼ãã®æ°ãæ±ããã¯ã¨ãªã§ãããã¨ãã°ãå°åã® ID ãæ ¼ç´ãã¦ããã«ã©ã ãæå®ããã°ãå°åæ¯ã®ã¬ã³ã¼ãæ°ãæ±ã¾ãã¾ããæ¥ä»ã®ã«ã©ã ãæå®ããã¨ãã®åºåãã°ã©ãåããã°ãã¬ã³ã¼ãæ°ã®æéå¤åã è¦è¦åãããã¨ãã§ãã¾ããããã«ãå°åã«ããçµãè¾¼ã¿ã¨æ¥ä»ã«å¯¾ããéè¨ã¯ã¨ãªãçµã¿åãããã°ãç¹å®ã®å°åã«ãããã¬ã³ã¼ãæ°ã®æéå¤åãè¦è¦åãã¨ãå¯è½ã§ãããã®ããã«ã尺度ãèªç±ã«é¸æãã¦çµãè¾¼ã¿ã»éè¨ã§ãããã¨ã¯ãè¨å¤§ãªæ å ±ãæ±ãä¸ã§ã¨ã¦ãéè¦ã«ãªãã¾ãã\n\ngroonga ãéè¨ã¯ã¨ãªãé«éã«å¦çã§ããçç±ã¯ããã¼ã¿ãã¼ã¹ã®è«çæ§é ã«ã«ã©ã ã¹ãã¢ãæ¡ç¨ãã¦ããããã§ããéè¨ã¯ã¨ãªãåç §ããã®ã¯æå®ãããã«ã©ã ã®ã¿ã§ãããããã«ã©ã åä½ã§ãã¼ã¿ãæ ¼ç´ããåæåã®ãã¼ã¿ãã¼ã¹ã§ã¯ãå¿ è¦ãªã«ã©ã ã®ã¿ãç¡é§ãªãèªã¿åºãããã¨ãå©ç¹ã¨ãªãã¾ããä¸æ¹ãã¬ã³ã¼ãåä½ã§ãã¼ã¿ãæ ¼ç´ããè¡æåã®ãã¼ã¿ãã¼ã¹ã§ã¯ãé£æ¥ããã«ã©ã ãã¾ã¨ãã¦èªã¿åºãã¦ãã¾ããã¨ãæ¬ ç¹ã¨ãªãã¾ãã"], +["転置索å¼ã¨ãã¼ã¯ãã¤ã¶", "転置索å¼ã¯å¤§è¦æ¨¡ãªå ¨ææ¤ç´¢ã«ç¨ããããä¼çµ±çãªãã¼ã¿æ§é ã§ãã転置索å¼ãç¨ããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ã¯ãææ¸ã追å ããã¨ãã«ç´¢å¼èªãè¨é²ãã¦ãããæ¤ç´¢ããã¨ãã¯ã¯ã¨ãªãç´¢å¼èªã«åå²ãã¦åºç¾ææ¸ãæ±ãã¾ãããã®ãããææ¸ãã¯ã¨ãªããç´¢å¼èªãæãåºãæ¹æ³ãéè¦ã«ãªãã¾ãã\n\nãã¼ã¯ãã¤ã¶ã¯ãæååããç´¢å¼èªãæãåºãã¢ã¸ã¥ã¼ã«ã§ããæ¥æ¬èªã対象ã¨ããå ¨ææ¤ç´¢ã«ããã¦ã¯ãå½¢æ ç´ ãç´¢å¼èªã¨ãã¦æãåºãæ¹å¼ã¨æå N-gram ãæãåºãæ¹å¼ã®ããããããããã¯ä¸¡æ¹ãç¨ããã®ãä¸è¬çã§ããå½¢æ ç´ æ¹å¼ã¯æ¤ç´¢æéãç´¢å¼ãµã¤ãºã®é¢ã§åªãã¦ããã»ããæ¤ç´¢çµæã«ä¸è¦ãªææ¸ãå«ã¾ãã«ããã¨ããå©ç¹ãæã£ã¦ãã¾ããä¸æ¹ãN-gram æ¹å¼ã«ã¯æ¤ç´¢æ¼ããçºçãã«ããã¨ããå©ç¹ã ãããç¶æ³ã«ãã£ã¦é©ããæ¹å¼ãé¸æãããã¨ãæã¾ããã¨ããã¦ãã¾ãã\n\ngroonga ã¯å½¢æ ç´ æ¹å¼ã¨ N-gram æ¹å¼ã®ä¸¡æ¹ã«å¯¾å¿ãã¦ãã¾ããåæç¶æ ã§å©ç¨ã§ãããã¼ã¯ãã¤ã¶ã¯ç©ºç½ãåºåãæåã¨ãã¦ç¨ããæ¹å¼ã¨ N-gram æ¹å¼ã®ã¿ã§ãããå½¢æ ç´ è§£æå¨ MeCab ãçµã¿è¾¼ãã ã¨ã㯠MeCab ã«ããåãã¡æ¸ãã®çµæãç¨ããå½¢æ ç´ æ¹å¼ãæå¹ã«ãªãã¾ãããã¼ã¯ãã¤ã¶ã¯ãã©ã°ã¤ã³ã¨ãã¦è¿½å ã§ãããããç¹å¾´çãªãã¼ã¯ã¼ãã®ã¿ãç´¢å¼èªã¨ãã¦æ¡ç¨ãããªã©ãç¬èªã®ãã¼ã¯ãã¤ã¶ãéçºãããã¨ãå¯è½ã§ãã"], +["å ±æå¯è½ãªã¹ãã¬ã¼ã¸ã¨åç §ããã¯ããªã¼", "CPU ã®ãã«ãã³ã¢åãé²ãã§ãããããåæã«è¤æ°ã®ã¯ã¨ãªãå®è¡ããããä¸ã¤ã®ã¯ã¨ãªãè¤æ°ã®ã¹ã¬ããã§å®è¡ããããããã¨ã®éè¦æ§ã¯ã¾ãã¾ãé«ã¾ã£ã¦ãã¾ãã\n\ngroonga ã®ã¹ãã¬ã¼ã¸ã¯ãè¤æ°ã®ã¹ã¬ããã»ããã»ã¹ã§å ±æãããã¨ãã§ãã¾ããã¾ããåç §ããã¯ããªã¼ãªãã¼ã¿æ§é ãæ¡ç¨ãã¦ãããããæ´æ°ã¯ã¨ãªãå®è¡ãã¦ããç¶æ³ã§ãåç §ã¯ã¨ãªãå®è¡ãããã¨ãã§ãã¾ããåç §ã¯ã¨ãªãå®è¡ã§ããç¶æ ãç¶æããªããæ´æ°ã¯ã¨ãªãå®è¡ã§ããã®ã§ããªã¢ã«ã¿ã¤ã ãªã·ã¹ãã ã«é©ãã¦ãã¾ããããã«ã¯ãMySQL ãä»ãã¦æ´æ°ã¯ã¨ãªãå®è¡ãã¦ããæä¸ã« groonga ã® HTTP ãµã¼ããä»ãã¦åç §ã¯ã¨ãªãå®è¡ãããªã©ãå¤å½©ãªéç¨ãå¯è½ã¨ãªã£ã¦ãã¾ãã"], +["ä½ç½®æ å ±ï¼ç·¯åº¦ã»çµåº¦ï¼æ¤ç´¢", "GPS ã«ä»£è¡¨ããã測ä½ã·ã¹ãã ãæè¼ããé«æ©è½ãªæºå¸¯ç«¯æ«ã®æ®åãªã©ã«ãã£ã¦ãä½ç½®æ å ±ãæ±ããµã¼ãã¹ã¯ã¾ãã¾ã便å©ã«ãªã£ã¦ãã¾ãããã¨ãã°ãè¿ãã«ããã¬ã¹ãã©ã³ãæ¢ãã¦ããã¨ãã¯ãç¾å¨å°ããã®è·é¢ãåºæºã¨ãã¦æ¤ç´¢ããããªããæ¤ç´¢çµæãå°å³ä¸ã«è¡¨ç¤ºãã¦ããããããªãµã¼ãã¹ã便å©ã§ãããã®ãããä½ç½®æ å ±æ¤ç´¢ãé«éã«å®ç¾ã§ãããã¨ãéè¦ã«ãªã£ã¦ãã¾ãã\n\ngroonga ã§ã¯è»¢ç½®ç´¢å¼ãå¿ç¨ãã¦é«éãªä½ç½®æ å ±æ¤ç´¢ãå®ç¾ãã¦ãã¾ããç©å½¢ã»åã«ããç¯å²æ¤ç´¢ã«å¯¾å¿ãã¦ããã»ããåºæºç¹ã®è¿ããåªå çã«æ¢ç´¢ããããã¨ãã§ãã¾ããã¾ããè·é¢è¨ç®ããµãã¼ããã¦ããã®ã§ãä½ç½®æ å ±æ¤ç´¢ã®çµæãåºæºç¹ããã®è·é¢ã«ãã£ã¦æ´åãããã¨ãå¯è½ã§ãã"], +["groonga ã©ã¤ãã©ãª", "Groonga ã®åºæ¬æ©è½ã¯ C ã©ã¤ãã©ãªã¨ãã¦æä¾ããã¦ããã®ã§ãä»»æã®ã¢ããªã±ã¼ã·ã§ã³ã«çµã¿è¾¼ãã§å©ç¨ãããã¨ãã§ãã¾ããC/C++ 以å¤ã«ã¤ãã¦ã¯ãRuby ãã groonga ãå©ç¨ããã©ã¤ãã©ãªãªã©ãé¢é£ããã¸ã§ã¯ãã«ããã¦æä¾ããã¦ãã¾ãã詳ãã㯠é¢é£ããã¸ã§ã¯ã ãåç §ãã¦ãã ããã"], +["groonga ãµã¼ã", "groonga ã«ã¯ãµã¼ãæ©è½ããããããã¬ã³ã¿ã«ãµã¼ããªã©ã®æ°ããã©ã¤ãã©ãªãã¤ã³ã¹ãã¼ã«ã§ããªãç°å¢ã«ããã¦ãå©ç¨ã§ãã¾ãã対å¿ãã¦ããã®ã¯ HTTP, memcached binary ãããã³ã«ãããã³ã« groonga ã®ç¬èªãããã³ã«ã§ãã gqtp ã§ãããµã¼ãã¨ãã¦å©ç¨ããã¨ãã¯ã¯ã¨ãªã®ãã£ãã·ã¥æ©è½ãæå¹ã«ãªããããåãã¯ã¨ãªãåãåã£ãã¨ãã¯å¿çæéãçããªãã¨ããç¹å¾´ãããã¾ãã"], +["groonga ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³", "groonga ã¯ç¬èªã®ã«ã©ã ã¹ãã¢ãæã¤åæåã®ãã¼ã¿ãã¼ã¹ã¨ãã¦ã®å´é¢ãæã£ã¦ãã¾ãããæ¢åã® RDBMS ã®ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³ã¨ãã¦å©ç¨ãããã¨ãã§ãã¾ãããã¨ãã°ãgroonga ããã¼ã¹ã¨ãã MySQL ã®ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³ã¨ã㦠mroonga ãéçºããã¦ãã¾ããmroonga 㯠MySQL ã®ãã©ã°ã¤ã³ã¨ãã¦åçã«ãã¼ããããã¨ãå¯è½ã§ãããgroonga ã®ã«ã©ã ã¹ãã¢ãã¹ãã¬ã¼ã¸ã¨ãã¦å©ç¨ããããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ã㦠groonga ã MyISAM ã InnoDB ã¨é£æºãããããããã¨ãã§ãã¾ããgroonga åä½ã§ã®å©ç¨ãããã³ã« MyISAM, InnoDB ã¨ã®é£æºã«ã¯ä¸é·ä¸çãããã®ã§ãç¨éã«å¿ãã¦é©åãªçµã¿åãããé¸ã¶ãã¨ã大åã§ãã詳ãã㯠é¢é£ããã¸ã§ã¯ã ãåç §ãã¦ãã ããã"] +] +[[0,0.0,0.0],9] +select Documents --filter 'content *S "MySQLã§å ¨ææ¤ç´¢"' --output_columns '_key, _score, content' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 3 + ], + [ + [ + "_key", + "ShortText" + ], + [ + "_score", + "Int32" + ], + [ + "content", + "Text" + ] + ], + [ + "groonga ã®æ¦è¦", + 419432, + "groonga ã¯è»¢ç½®ç´¢å¼ãç¨ããé«éã»é«ç²¾åº¦ãªå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ãããç»é²ãããææ¸ãããã«æ¤ç´¢çµæã«åæ ã§ãã¾ããã¾ããåç §ããããã¯ããã«æ´æ°ã§ãããã¨ãããå³ææ´æ°ã®å¿ è¦ãªã¢ããªã±ã¼ã·ã§ã³ã«ããã¦ãé«ãæ§è½ãçºæ®ãã¾ãã\n\nå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ãã¦éçºããã groonga ã§ãããç¬èªã®ã«ã©ã ã¹ãã¢ãæã¤åæåã®ãã¼ã¿ãã¼ã¹ã¨ãã¦ã®å´é¢ãæã£ã¦ãã¾ãããã®ãããMySQL ã PostgreSQL ãªã©ãæ¢åã®ä»£è¡¨çãªãã¼ã¿ãã¼ã¹ãè¦æã¨ããéè¨ã¯ã¨ãªãé«éã«å¦çã§ããã¨ããç¹å¾´ããããçµã¿åããã«ãã£ã¦å¼±ç¹ãè£ããããªä½¿ãæ¹ãã§ãã¾ãã\n\ngroonga ã®åºæ¬æ©è½ã¯ C ã©ã¤ãã©ãªã¨ãã¦æä¾ããã¦ãã¾ãããMySQL ã PostgreSQL ã¨é£æºãããããRuby ããå¼ã³åºããããããã¨ãã§ãã¾ãããã®ãããä»»æã®ã¢ã㪠ã±ã¼ã·ã§ã³ã«çµã¿è¾¼ããã¨ãå¯è½ã§ãããå¤æ§ãªä½¿ãæ¹ãèãããã¾ãã èå³ã®ããæ¹ã¯ å©ç¨ä¾ ãã覧ãã ããã" + ], + [ + "å ±æå¯è½ãªã¹ãã¬ã¼ã¸ã¨åç §ããã¯ããªã¼", + 209716, + "CPU ã®ãã«ãã³ã¢åãé²ãã§ãããããåæã«è¤æ°ã®ã¯ã¨ãªãå®è¡ããããä¸ã¤ã®ã¯ã¨ãªãè¤æ°ã®ã¹ã¬ããã§å®è¡ããããããã¨ã®éè¦æ§ã¯ã¾ãã¾ãé«ã¾ã£ã¦ãã¾ãã\n\ngroonga ã®ã¹ãã¬ã¼ã¸ã¯ãè¤æ°ã®ã¹ã¬ããã»ããã»ã¹ã§å ±æãããã¨ãã§ãã¾ããã¾ããåç §ããã¯ããªã¼ãªãã¼ã¿æ§é ãæ¡ç¨ãã¦ãããããæ´æ°ã¯ã¨ãªãå®è¡ãã¦ããç¶æ³ã§ãåç §ã¯ã¨ãªãå®è¡ãããã¨ãã§ãã¾ããåç §ã¯ã¨ãªãå®è¡ã§ããç¶æ ãç¶æããªããæ´æ°ã¯ã¨ãªãå®è¡ã§ããã®ã§ããªã¢ã«ã¿ã¤ã ãªã·ã¹ãã ã«é©ãã¦ãã¾ããããã«ã¯ãMySQL ãä»ãã¦æ´æ°ã¯ã¨ãªãå®è¡ãã¦ããæä¸ã« groonga ã® HTTP ãµã¼ããä»ãã¦åç §ã¯ã¨ãªãå®è¡ãããªã©ãå¤å½©ãªéç¨ãå¯è½ã¨ãªã£ã¦ãã¾ãã" + ], + [ + "groonga ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³", + 419432, + "groonga ã¯ç¬èªã®ã«ã©ã ã¹ãã¢ãæã¤åæåã®ãã¼ã¿ãã¼ã¹ã¨ãã¦ã®å´é¢ãæã£ã¦ãã¾ãããæ¢åã® RDBMS ã®ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³ã¨ãã¦å©ç¨ãããã¨ãã§ãã¾ãããã¨ãã°ãgroonga ããã¼ã¹ã¨ãã MySQL ã®ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³ã¨ã㦠mroonga ãéçºããã¦ãã¾ããmroonga 㯠MySQL ã®ãã©ã°ã¤ã³ã¨ãã¦åçã«ãã¼ããããã¨ãå¯è½ã§ãããgroonga ã®ã«ã©ã ã¹ãã¢ãã¹ãã¬ã¼ã¸ã¨ãã¦å©ç¨ããããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ã㦠groonga ã MyISAM ã InnoDB ã¨é£æºãããããããã¨ãã§ãã¾ããgroonga åä½ã§ã®å©ç¨ãããã³ã« MyISAM, InnoDB ã¨ã®é£æºã«ã¯ä¸é·ä¸çãããã®ã§ãç¨éã«å¿ãã¦é©åãªçµã¿åãããé¸ã¶ãã¨ã大åã§ãã詳ãã㯠é¢é£ããã¸ã§ã¯ã ãåç §ãã¦ãã ããã" + ] + ] + ] +] Added: test/function/suite/select/filter/similar.test (+21 -0) 100644 =================================================================== --- /dev/null +++ test/function/suite/select/filter/similar.test 2012-04-23 21:43:18 +0900 (7451353) @@ -0,0 +1,21 @@ +table_create Documents TABLE_HASH_KEY ShortText +column_create Documents content COLUMN_SCALAR Text + +table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Documents content + +load --table Documents +[ +["_key", "content"], +["groonga ã®æ¦è¦", "groonga ã¯è»¢ç½®ç´¢å¼ãç¨ããé«éã»é«ç²¾åº¦ãªå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ãããç»é²ãããææ¸ãããã«æ¤ç´¢çµæã«åæ ã§ãã¾ããã¾ããåç §ããããã¯ããã«æ´æ°ã§ãããã¨ãããå³ææ´æ°ã®å¿ è¦ãªã¢ããªã±ã¼ã·ã§ã³ã«ããã¦ãé«ãæ§è½ãçºæ®ãã¾ãã\n\nå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ãã¦éçºããã groonga ã§ãããç¬èªã®ã«ã©ã ã¹ãã¢ãæã¤åæåã®ãã¼ã¿ãã¼ã¹ã¨ãã¦ã®å´é¢ãæã£ã¦ãã¾ãããã®ãããMySQL ã PostgreSQL ãªã©ãæ¢åã®ä»£è¡¨çãªãã¼ã¿ãã¼ã¹ãè¦æã¨ããéè¨ã¯ã¨ãªãé«éã«å¦çã§ããã¨ããç¹å¾´ããããçµã¿åããã«ãã£ã¦å¼±ç¹ãè£ããããªä½¿ãæ¹ãã§ãã¾ãã\n\ngroonga ã®åºæ¬æ©è½ã¯ C ã©ã¤ãã©ãªã¨ãã¦æä¾ããã¦ãã¾ãããMySQL ã PostgreSQL ã¨é£æºãããããRuby ããå¼ã³åºããããããã¨ãã§ãã¾ãããã®ãããä»»æ ã®ã¢ããªã±ã¼ã·ã§ã³ã«çµã¿è¾¼ããã¨ãå¯è½ã§ãããå¤æ§ãªä½¿ãæ¹ãèãããã¾ãã èå³ã®ããæ¹ã¯ å©ç¨ä¾ ãã覧ãã ããã"], +["å ¨ææ¤ç´¢ã¨å³ææ´æ°", "ä¸è¬çãªãã¼ã¿ãã¼ã¹ã«ããã¦ã¯ã追å ã»åé¤ãªã©ã®æä½ãããã«åæ ããã¾ããä¸æ¹ãå ¨ææ¤ç´¢ã«ããã¦ã¯ã転置索å¼ãé次æ´æ°ã®é£ãããã¼ã¿æ§é ã§ãããã¨ãããææ¸ã®è¿½å ã»åé¤ã«å¯¾å¿ããªãã¨ã³ã¸ã³ãå°ãªãããã¾ããã\n\nããã«å¯¾ãã転置索å¼ãç¨ããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ãããªãããgroonga ã¯ææ¸ãçæéã§è¿½å ã»åé¤ãããã¨ãã§ãã¾ãããã®ä¸ãæ´æ°ããªããã§ãæ¤ç´¢ã§ããã¨ããåªããç¹å¾´ãæã£ã¦ãããããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ãã¦ã¯ã¨ã¦ãæè»æ§ãããã¾ããã¾ããè¤æ°ã®è»¢ç½®ç´¢å¼ãçµ±åãããããªéãå¦çãå¿ è¦ã¨ããªãã®ã§ãå®å®ãã¦é«ãæ§è½ãçºæ®ãããã¨ãæå¾ ã§ãã¾ãã"], +["ã«ã©ã ã¹ãã¢ã¨éè¨ã¯ã¨ãª", "ç¾ä»£ã¯ãã¤ã³ã¿ã¼ããããæ å ±æºã¨ããã°ããããã§ãæ å ±ãåéã§ããæ代ã§ããããããè¨å¤§ãªæ å ±ããæçãªæ å ±ãå¼ãåºãã®ã¯å°é£ã§ãããå¤é¢çãªåæã«ãã試è¡é¯èª¤ãå¿ è¦ã¨ãªãã¾ãããã¨ãã°ãæ¥ä»ãæé帯ã«ããçµãè¾¼ãã§ã¿ãããå°åã«ããçµãè¾¼ãã§ã¿ãããæ§å¥ãå¹´é½¢ã«ããçµãè¾¼ãã§ã¿ãããããã¨ã§ããããããã¦ããã®ãããªã¨ãã«ä¾¿å©ãªåå¨ãéè¨ã¯ã¨ãªã§ãã\n\néè¨ã¯ã¨ãªã¨ã¯ãæå®ããã«ã©ã ã®å¤ã«ãã£ã¦ã¬ã³ã¼ããã°ã«ã¼ãåããåã°ã«ã¼ãã«å«ã¾ããã¬ã³ã¼ãã®æ°ãæ±ããã¯ã¨ãªã§ãããã¨ãã°ãå°åã® ID ãæ ¼ç´ãã¦ããã«ã©ã ãæå®ããã°ãå°åæ¯ã®ã¬ã³ã¼ãæ°ãæ±ã¾ãã¾ããæ¥ä»ã®ã«ã©ã ãæå®ããã¨ãã®åºåãã°ã©ãåããã°ãã¬ã³ã¼ãæ°ã®æéå¤åã è¦è¦åãããã¨ãã§ãã¾ããããã«ãå°åã«ããçµãè¾¼ã¿ã¨æ¥ä»ã«å¯¾ããéè¨ã¯ã¨ãªãçµã¿åãããã°ãç¹å®ã®å°åã«ãããã¬ã³ã¼ãæ°ã®æéå¤åãè¦è¦åãã¨ãå¯è½ã§ãããã®ããã«ã尺度ãèªç±ã«é¸æãã¦çµãè¾¼ã¿ã»éè¨ã§ãããã¨ã¯ãè¨å¤§ãªæ å ±ãæ±ãä¸ã§ã¨ã¦ãéè¦ã«ãªãã¾ãã\n\ngroonga ãéè¨ã¯ã¨ãªãé«éã«å¦çã§ããçç±ã¯ããã¼ã¿ãã¼ã¹ã®è«çæ§é ã«ã«ã©ã ã¹ãã¢ãæ¡ç¨ãã¦ããããã§ããéè¨ã¯ã¨ãªãåç §ããã®ã¯æå®ãããã«ã©ã ã®ã¿ã§ãããããã«ã©ã åä½ã§ãã¼ã¿ãæ ¼ç´ããåæåã®ãã¼ã¿ãã¼ã¹ã§ã¯ãå¿ è¦ãªã«ã©ã ã®ã¿ãç¡é§ãªãèªã¿åºãããã¨ãå©ç¹ã¨ãªãã¾ããä¸æ¹ãã¬ã³ã¼ãåä½ã§ãã¼ã¿ãæ ¼ç´ããè¡æåã®ãã¼ã¿ãã¼ã¹ã§ã¯ãé£æ¥ããã«ã©ã ãã¾ã¨ãã¦èªã¿åºãã¦ãã¾ããã¨ãæ¬ ç¹ã¨ãªãã¾ãã"], +["転置索å¼ã¨ãã¼ã¯ãã¤ã¶", "転置索å¼ã¯å¤§è¦æ¨¡ãªå ¨ææ¤ç´¢ã«ç¨ããããä¼çµ±çãªãã¼ã¿æ§é ã§ãã転置索å¼ãç¨ããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã§ã¯ãææ¸ã追å ããã¨ãã«ç´¢å¼èªãè¨é²ãã¦ãããæ¤ç´¢ããã¨ãã¯ã¯ã¨ãªãç´¢å¼èªã«åå²ãã¦åºç¾ææ¸ãæ±ãã¾ãããã®ãããææ¸ãã¯ã¨ãªããç´¢å¼èªãæãåºãæ¹æ³ãéè¦ã«ãªãã¾ãã\n\nãã¼ã¯ãã¤ã¶ã¯ãæååããç´¢å¼èªãæãåºãã¢ã¸ã¥ã¼ã«ã§ããæ¥æ¬èªã対象ã¨ããå ¨ææ¤ç´¢ã«ããã¦ã¯ãå½¢æ ç´ ãç´¢å¼èªã¨ãã¦æãåºãæ¹å¼ã¨æå N-gram ãæãåºãæ¹å¼ã®ããããããããã¯ä¸¡æ¹ãç¨ããã®ãä¸è¬çã§ããå½¢æ ç´ æ¹å¼ã¯æ¤ç´¢æéãç´¢å¼ãµã¤ãºã®é¢ã§åªãã¦ããã»ããæ¤ç´¢çµæã«ä¸è¦ãªææ¸ãå«ã¾ãã«ããã¨ããå©ç¹ãæã£ã¦ãã¾ããä¸æ¹ãN-gram æ¹å¼ã«ã¯æ¤ç´¢æ¼ããçºçãã«ããã¨ããå©ç¹ã ãããç¶æ³ã«ãã£ã¦é©ããæ¹å¼ãé¸æãããã¨ãæã¾ããã¨ããã¦ãã¾ãã\n\ngroonga ã¯å½¢æ ç´ æ¹å¼ã¨ N-gram æ¹å¼ã®ä¸¡æ¹ã«å¯¾å¿ãã¦ãã¾ããåæç¶æ ã§å©ç¨ã§ãããã¼ã¯ãã¤ã¶ã¯ç©ºç½ãåºåãæåã¨ãã¦ç¨ããæ¹å¼ã¨ N-gram æ¹å¼ã®ã¿ã§ãããå½¢æ ç´ è§£æå¨ MeCab ãçµã¿è¾¼ãã ã¨ã㯠MeCab ã«ããåãã¡æ¸ãã®çµæãç¨ããå½¢æ ç´ æ¹å¼ãæå¹ã«ãªãã¾ãããã¼ã¯ãã¤ã¶ã¯ãã©ã°ã¤ã³ã¨ãã¦è¿½å ã§ãããããç¹å¾´çãªãã¼ã¯ã¼ãã®ã¿ãç´¢å¼èªã¨ãã¦æ¡ç¨ãããªã©ãç¬èªã®ãã¼ã¯ãã¤ã¶ãéçºãããã¨ãå¯è½ã§ãã"], +["å ±æå¯è½ãªã¹ãã¬ã¼ã¸ã¨åç §ããã¯ããªã¼", "CPU ã®ãã«ãã³ã¢åãé²ãã§ãããããåæã«è¤æ°ã®ã¯ã¨ãªãå®è¡ããããä¸ã¤ã®ã¯ã¨ãªãè¤æ°ã®ã¹ã¬ããã§å®è¡ããããããã¨ã®éè¦æ§ã¯ã¾ãã¾ãé«ã¾ã£ã¦ãã¾ãã\n\ngroonga ã®ã¹ãã¬ã¼ã¸ã¯ãè¤æ°ã®ã¹ã¬ããã»ããã»ã¹ã§å ±æãããã¨ãã§ãã¾ããã¾ããåç §ããã¯ããªã¼ãªãã¼ã¿æ§é ãæ¡ç¨ãã¦ãããããæ´æ°ã¯ã¨ãªãå®è¡ãã¦ããç¶æ³ã§ãåç §ã¯ã¨ãªãå®è¡ãããã¨ãã§ãã¾ããåç §ã¯ã¨ãªãå®è¡ã§ããç¶æ ãç¶æããªããæ´æ°ã¯ã¨ãªãå®è¡ã§ããã®ã§ããªã¢ã«ã¿ã¤ã ãªã·ã¹ãã ã«é©ãã¦ãã¾ããããã«ã¯ãMySQL ãä»ãã¦æ´æ°ã¯ã¨ãªãå®è¡ãã¦ããæä¸ã« groonga ã® HTTP ãµã¼ããä»ãã¦åç §ã¯ã¨ãªãå®è¡ãããªã©ãå¤å½©ãªéç¨ãå¯è½ã¨ãªã£ã¦ãã¾ãã"], +["ä½ç½®æ å ±ï¼ç·¯åº¦ã»çµåº¦ï¼æ¤ç´¢", "GPS ã«ä»£è¡¨ããã測ä½ã·ã¹ãã ãæè¼ããé«æ©è½ãªæºå¸¯ç«¯æ«ã®æ®åãªã©ã«ãã£ã¦ãä½ç½®æ å ±ãæ±ããµã¼ãã¹ã¯ã¾ãã¾ã便å©ã«ãªã£ã¦ãã¾ãããã¨ãã°ãè¿ãã«ããã¬ã¹ãã©ã³ãæ¢ãã¦ããã¨ãã¯ãç¾å¨å°ããã®è·é¢ãåºæºã¨ãã¦æ¤ç´¢ããããªããæ¤ç´¢çµæãå°å³ä¸ã«è¡¨ç¤ºãã¦ããããããªãµã¼ãã¹ã便å©ã§ãããã®ãããä½ç½®æ å ±æ¤ç´¢ãé«éã«å®ç¾ã§ãããã¨ãéè¦ã«ãªã£ã¦ãã¾ãã\n\ngroonga ã§ã¯è»¢ç½®ç´¢å¼ãå¿ç¨ãã¦é«éãªä½ç½®æ å ±æ¤ç´¢ãå®ç¾ãã¦ãã¾ããç©å½¢ã»åã«ããç¯å²æ¤ç´¢ã«å¯¾å¿ãã¦ããã»ããåºæºç¹ã®è¿ããåªå çã«æ¢ç´¢ããããã¨ãã§ãã¾ããã¾ããè·é¢è¨ç®ããµãã¼ããã¦ããã®ã§ãä½ç½®æ å ±æ¤ç´¢ã®çµæãåºæºç¹ããã®è·é¢ã«ãã£ã¦æ´åãããã¨ãå¯è½ã§ãã"], +["groonga ã©ã¤ãã©ãª", "Groonga ã®åºæ¬æ©è½ã¯ C ã©ã¤ãã©ãªã¨ãã¦æä¾ããã¦ããã®ã§ãä»»æã®ã¢ããªã±ã¼ã·ã§ã³ã«çµã¿è¾¼ãã§å©ç¨ãããã¨ãã§ãã¾ããC/C++ 以å¤ã«ã¤ãã¦ã¯ãRuby ãã groonga ãå©ç¨ããã©ã¤ãã©ãªãªã©ãé¢é£ããã¸ã§ã¯ãã«ããã¦æä¾ããã¦ãã¾ãã詳ãã㯠é¢é£ããã¸ã§ã¯ã ãåç §ãã¦ãã ããã"], +["groonga ãµã¼ã", "groonga ã«ã¯ãµã¼ãæ©è½ããããããã¬ã³ã¿ã«ãµã¼ããªã©ã®æ°ããã©ã¤ãã©ãªãã¤ã³ã¹ãã¼ã«ã§ããªãç°å¢ã«ããã¦ãå©ç¨ã§ãã¾ãã対å¿ãã¦ããã®ã¯ HTTP, memcached binary ãããã³ã«ãããã³ã« groonga ã®ç¬èªãããã³ã«ã§ãã gqtp ã§ãããµã¼ãã¨ãã¦å©ç¨ããã¨ãã¯ã¯ã¨ãªã®ãã£ãã·ã¥æ©è½ãæå¹ã«ãªããããåãã¯ã¨ãªãåãåã£ãã¨ãã¯å¿çæéãçããªãã¨ããç¹å¾´ãããã¾ãã"], +["groonga ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³", "groonga ã¯ç¬èªã®ã«ã©ã ã¹ãã¢ãæã¤åæåã®ãã¼ã¿ãã¼ã¹ã¨ãã¦ã®å´é¢ãæã£ã¦ãã¾ãããæ¢åã® RDBMS ã®ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³ã¨ãã¦å©ç¨ãããã¨ãã§ãã¾ãããã¨ãã°ãgroonga ããã¼ã¹ã¨ãã MySQL ã®ã¹ãã¬ã¼ã¸ã¨ã³ã¸ã³ã¨ã㦠mroonga ãéçºããã¦ãã¾ããmroonga 㯠MySQL ã®ãã©ã°ã¤ã³ã¨ãã¦åçã«ãã¼ããããã¨ãå¯è½ã§ãããgroonga ã®ã«ã©ã ã¹ãã¢ãã¹ãã¬ã¼ã¸ã¨ãã¦å©ç¨ããããå ¨ææ¤ç´¢ã¨ã³ã¸ã³ã¨ã㦠groonga ã MyISAM ã InnoDB ã¨é£æºãããããããã¨ãã§ãã¾ããgroonga åä½ã§ã®å©ç¨ãããã³ã« MyISAM, InnoDB ã¨ã®é£æºã«ã¯ä¸é·ä¸çãããã®ã§ãç¨éã«å¿ãã¦é©åãªçµã¿åãããé¸ã¶ãã¨ã大åã§ãã詳ãã㯠é¢é£ããã¸ã§ã¯ã ãåç §ãã¦ãã ããã"] +] + +select Documents --filter 'content *S "MySQLã§å ¨ææ¤ç´¢"' --output_columns '_key, _score, content'