Kouhei Sutou
null+****@clear*****
Thu Apr 9 14:30:30 JST 2015
Kouhei Sutou 2015-04-09 14:30:30 +0900 (Thu, 09 Apr 2015) New Revision: 19725397583fd61951e03a42bc7361d873127199 https://github.com/groonga/groonga/commit/19725397583fd61951e03a42bc7361d873127199 Message: Use the min estimated size in terms as estimated size for query Because all terms should be exist in query. The total number of matched records must be less than the min number of matched records in terms. Modified files: lib/ii.c test/unit/core/test-inverted-index.c Modified: lib/ii.c (+6 -1) =================================================================== --- lib/ii.c 2015-04-08 15:59:17 +0900 (488abe1) +++ lib/ii.c 2015-04-09 14:30:30 +0900 (8835236) @@ -19,6 +19,7 @@ #include <fcntl.h> #include <string.h> #include <sys/stat.h> +#include <math.h> #include "grn_ii.h" #include "grn_ctx_impl.h" @@ -6505,7 +6506,11 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii, token_info *ti = tis[i]; double term_estimated_size; term_estimated_size = ((double)ti->size / ti->ntoken); - estimated_size += (term_estimated_size - estimated_size) / (i + 1); + if (i == 0) { + estimated_size = term_estimated_size; + } else { + estimated_size = fmin(estimated_size, term_estimated_size); + } } exit : Modified: test/unit/core/test-inverted-index.c (+38 -0) =================================================================== --- test/unit/core/test-inverted-index.c 2015-04-08 15:59:17 +0900 (6d0e631) +++ test/unit/core/test-inverted-index.c 2015-04-09 14:30:30 +0900 (b19dfd1) @@ -40,6 +40,7 @@ void test_scalar_index(void); void test_int_index(void); void test_mroonga_index(void); void test_mroonga_index_score(void); +void test_estimate_size_for_query(void); #define TYPE_SIZE 1024 @@ -934,3 +935,40 @@ test_mroonga_index_score(void) grn_obj_close(context, lc); grn_obj_close(context, t1); } + +void +test_estimate_size_for_query(void) +{ + grn_obj *index_column; + grn_ii *ii; + + grn_obj_close(context, db); + db = grn_db_create(context, + cut_build_path(tmp_directory, "estimate.grn", NULL), + NULL); + + assert_send_command("table_create Memos TABLE_NO_KEY"); + assert_send_command("column_create Memos content COLUMN_SCALAR Text"); + assert_send_command("table_create Terms TABLE_PAT_KEY ShortText " + "--default_tokenizer TokenBigramSplitSymbolAlphaDigit " + "--normalizer NormalizerAuto"); + assert_send_command("column_create Terms index COLUMN_INDEX|WITH_POSITION " + "Memos content"); + assert_send_command("load --table Memos\n" + "[" + "[\"content\"]," + "[\"Groonga\"]," + "[\"Rroonga\"]," + "[\"Mroonga\"]" + "]"); + + index_column = grn_ctx_get(context, "Terms.index", strlen("Terms.index")); + ii = (grn_ii *)index_column; + + cut_assert_equal_double(1, DBL_EPSILON, + grn_ii_estimate_size_for_query(context, + ii, + "Groonga", + strlen("Groonga"), + NULL)); +} -------------- next part -------------- HTML����������������������������... Download