[Groonga-commit] groonga/groonga at 1e870f3 [master] hash: increase max key size

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Apr 4 20:42:16 JST 2015


Kouhei Sutou	2015-04-04 20:42:16 +0900 (Sat, 04 Apr 2015)

  New Revision: 1e870f31a57dce5114e375df4da7ed78f71b41e8
  https://github.com/groonga/groonga/commit/1e870f31a57dce5114e375df4da7ed78f71b41e8

  Message:
    hash: increase max key size
    
    (0x1000)  4096 ->
    (0xffff) 65535

  Modified files:
    lib/db.c
    lib/grn_hash.h
    lib/hash.c
    lib/proc.c
    test/unit/core/test-hash-cursor.c
    test/unit/core/test-hash-sort.c
    test/unit/core/test-hash.c

  Modified: lib/db.c (+2 -2)
===================================================================
--- lib/db.c    2015-04-03 17:37:10 +0900 (960fdad)
+++ lib/db.c    2015-04-04 20:42:16 +0900 (9ed0864)
@@ -7744,7 +7744,7 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value)
       switch (DB_OBJ(obj)->header.type) {
       case GRN_TABLE_HASH_KEY :
         ((grn_hash *)obj)->tokenizer = value;
-        ((grn_hash *)obj)->header->tokenizer = grn_obj_id(ctx, value);
+        ((grn_hash *)obj)->header.common->tokenizer = grn_obj_id(ctx, value);
         rc = GRN_SUCCESS;
         break;
       case GRN_TABLE_PAT_KEY :
@@ -7765,7 +7765,7 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value)
       switch (DB_OBJ(obj)->header.type) {
       case GRN_TABLE_HASH_KEY :
         ((grn_hash *)obj)->normalizer = value;
-        ((grn_hash *)obj)->header->normalizer = grn_obj_id(ctx, value);
+        ((grn_hash *)obj)->header.common->normalizer = grn_obj_id(ctx, value);
         rc = GRN_SUCCESS;
         break;
       case GRN_TABLE_PAT_KEY :

  Modified: lib/grn_hash.h (+45 -21)
===================================================================
--- lib/grn_hash.h    2015-04-03 17:37:10 +0900 (44f5bcb)
+++ lib/grn_hash.h    2015-04-04 20:42:16 +0900 (d50836d)
@@ -1,5 +1,5 @@
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2009-2012 Brazil
+/* Copyright(C) 2009-2015 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -187,7 +187,15 @@ GRN_API grn_id grn_table_queue_tail(grn_table_queue *queue);
 /**** grn_hash ****/
 
 #define GRN_HASH_TINY         (0x01<<6)
-#define GRN_HASH_MAX_KEY_SIZE GRN_TABLE_MAX_KEY_SIZE
+#define GRN_HASH_MAX_KEY_SIZE_NORMAL GRN_TABLE_MAX_KEY_SIZE
+#define GRN_HASH_MAX_KEY_SIZE_LARGE  (0xffff)
+
+#define GRN_HASH_IS_LARGE_KEY(hash)\
+  ((hash)->key_size > GRN_HASH_MAX_KEY_SIZE_NORMAL)
+
+typedef struct _grn_hash_header_common grn_hash_header_common;
+typedef struct _grn_hash_header_normal grn_hash_header_normal;
+typedef struct _grn_hash_header_large  grn_hash_header_large;
 
 struct _grn_hash {
   grn_db_obj obj;
@@ -205,7 +213,11 @@ struct _grn_hash {
 
   /* For grn_io_hash. */
   grn_io *io;
-  struct grn_hash_header *header;
+  union {
+    grn_hash_header_common *common;
+    grn_hash_header_normal *normal;
+    grn_hash_header_large  *large;
+  } header;
   uint32_t *lock;
   // uint32_t nref;
   // unsigned int max_n_subrecs;
@@ -230,24 +242,36 @@ struct _grn_hash {
   grn_tiny_bitmap bitmap;
 };
 
-/* Header of grn_io_hash. */
-struct grn_hash_header {
-  uint32_t flags;
-  grn_encoding encoding;
-  uint32_t key_size;
-  uint32_t value_size;
-  grn_id tokenizer;
-  uint32_t curr_rec;
-  int32_t curr_key;
-  uint32_t idx_offset;
-  uint32_t entry_size;
-  uint32_t max_offset;
-  uint32_t n_entries;
-  uint32_t n_garbages;
-  uint32_t lock;
-  grn_id normalizer;
-  uint32_t reserved[15];
-  grn_id garbages[GRN_HASH_MAX_KEY_SIZE];
+#define GRN_HASH_HEADER_COMMON_FIELDS\
+  uint32_t flags;\
+  grn_encoding encoding;\
+  uint32_t key_size;\
+  uint32_t value_size;\
+  grn_id tokenizer;\
+  uint32_t curr_rec;\
+  int32_t curr_key;\
+  uint32_t idx_offset;\
+  uint32_t entry_size;\
+  uint32_t max_offset;\
+  uint32_t n_entries;\
+  uint32_t n_garbages;\
+  uint32_t lock;\
+  grn_id normalizer;\
+  uint32_t reserved[15]
+
+struct _grn_hash_header_common {
+  GRN_HASH_HEADER_COMMON_FIELDS;
+};
+
+struct _grn_hash_header_normal {
+  GRN_HASH_HEADER_COMMON_FIELDS;
+  grn_id garbages[GRN_HASH_MAX_KEY_SIZE_NORMAL];
+  grn_table_queue queue;
+};
+
+struct _grn_hash_header_large {
+  GRN_HASH_HEADER_COMMON_FIELDS;
+  grn_id garbages[GRN_HASH_MAX_KEY_SIZE_LARGE];
   grn_table_queue queue;
 };
 

  Modified: lib/hash.c (+61 -27)
===================================================================
--- lib/hash.c    2015-04-03 17:37:10 +0900 (22491c5)
+++ lib/hash.c    2015-04-04 20:42:16 +0900 (bfdc695)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2012 Brazil
+  Copyright(C) 2009-2015 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -1167,7 +1167,11 @@ grn_array_unblock(grn_ctx *ctx, grn_array *array)
 /* grn_hash : hash table */
 
 #define GRN_HASH_MAX_SEGMENT  0x400
-#define GRN_HASH_HEADER_SIZE  0x9000
+#define GRN_HASH_HEADER_SIZE_NORMAL 0x9000
+#define GRN_HASH_HEADER_SIZE_LARGE\
+  (GRN_HASH_HEADER_SIZE_NORMAL +\
+   (sizeof(grn_id) *\
+    (GRN_HASH_MAX_KEY_SIZE_LARGE - GRN_HASH_MAX_KEY_SIZE_NORMAL)))
 #define GRN_HASH_SEGMENT_SIZE 0x400000
 #define W_OF_KEY_IN_A_SEGMENT 22
 #define IDX_MASK_IN_A_SEGMENT 0xfffff
@@ -1298,7 +1302,7 @@ inline static grn_id *
 grn_hash_idx_at(grn_ctx *ctx, grn_hash *hash, grn_id id)
 {
   if (grn_hash_is_io_hash(hash)) {
-    id = (id & *hash->max_offset) + hash->header->idx_offset;
+    id = (id & *hash->max_offset) + hash->header.common->idx_offset;
     return grn_io_hash_idx_at(ctx, hash, id);
   } else {
     return hash->index + (id & *hash->max_offset);
@@ -1380,15 +1384,18 @@ grn_io_hash_entry_put_key(grn_ctx *ctx, grn_hash *hash,
     key_offset = entry->key.offset;
   } else {
     uint32_t segment_id;
+    grn_hash_header_common *header;
+
+    header = hash->header.common;
     if (key_size >= GRN_HASH_SEGMENT_SIZE) {
       return GRN_INVALID_ARGUMENT;
     }
-    key_offset = hash->header->curr_key;
+    key_offset = header->curr_key;
     segment_id = (key_offset + key_size) >> W_OF_KEY_IN_A_SEGMENT;
     if ((key_offset >> W_OF_KEY_IN_A_SEGMENT) != segment_id) {
-      key_offset = hash->header->curr_key = segment_id << W_OF_KEY_IN_A_SEGMENT;
+      key_offset = header->curr_key = segment_id << W_OF_KEY_IN_A_SEGMENT;
     }
-    hash->header->curr_key += key_size;
+    header->curr_key += key_size;
     entry->key.offset = key_offset;
   }
 
@@ -1537,7 +1544,8 @@ grn_io_hash_calculate_entry_size(uint32_t key_size, uint32_t value_size,
 }
 
 static grn_io *
-grn_io_hash_create_io(grn_ctx *ctx, const char *path, uint32_t entry_size)
+grn_io_hash_create_io(grn_ctx *ctx, const char *path,
+                      uint32_t header_size, uint32_t entry_size)
 {
   uint32_t w_of_element = 0;
   grn_io_array_spec array_spec[4];
@@ -1555,7 +1563,7 @@ grn_io_hash_create_io(grn_ctx *ctx, const char *path, uint32_t entry_size)
   array_spec[GRN_HASH_INDEX_SEGMENT].max_n_segments = 1U << (30 - (22 - 2));
   array_spec[GRN_HASH_BITMAP_SEGMENT].w_of_element = 0;
   array_spec[GRN_HASH_BITMAP_SEGMENT].max_n_segments = 1U << (30 - (22 + 3));
-  return grn_io_create_with_array(ctx, path, GRN_HASH_HEADER_SIZE,
+  return grn_io_create_with_array(ctx, path, header_size,
                                   GRN_HASH_SEGMENT_SIZE,
                                   grn_io_auto, 4, array_spec);
 }
@@ -1566,12 +1574,17 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
                  grn_encoding encoding, uint32_t init_size)
 {
   grn_io *io;
-  struct grn_hash_header *header;
-  uint32_t entry_size, max_offset;
+  grn_hash_header_common *header;
+  uint32_t header_size, entry_size, max_offset;
 
+  if (key_size <= GRN_HASH_MAX_KEY_SIZE_NORMAL) {
+    header_size = GRN_HASH_HEADER_SIZE_NORMAL;
+  } else {
+    header_size = GRN_HASH_HEADER_SIZE_LARGE;
+  }
   entry_size = grn_io_hash_calculate_entry_size(key_size, value_size, flags);
 
-  io = grn_io_hash_create_io(ctx, path, entry_size);
+  io = grn_io_hash_create_io(ctx, path, header_size, entry_size);
   if (!io) {
     return GRN_NO_MEMORY_AVAILABLE;
   }
@@ -1587,6 +1600,8 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
     encoding = ctx->encoding;
   }
 
+  hash->key_size = key_size;
+
   header = grn_io_header(io);
   header->flags = flags;
   header->encoding = encoding;
@@ -1610,11 +1625,18 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
     header->normalizer = GRN_ID_NIL;
   }
   GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
-  grn_table_queue_init(ctx, &header->queue);
+  {
+    grn_table_queue *queue;
+    if (GRN_HASH_IS_LARGE_KEY(hash)) {
+      queue = &(((grn_hash_header_large *)(header))->queue);
+    } else {
+      queue = &(((grn_hash_header_normal *)(header))->queue);
+    }
+    grn_table_queue_init(ctx, queue);
+  }
 
   hash->obj.header.flags = header->flags;
   hash->ctx = ctx;
-  hash->key_size = key_size;
   hash->encoding = encoding;
   hash->value_size = value_size;
   hash->entry_size = entry_size;
@@ -1622,7 +1644,7 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
   hash->n_entries = &header->n_entries;
   hash->max_offset = &header->max_offset;
   hash->io = io;
-  hash->header = header;
+  hash->header.common = header;
   hash->lock = &header->lock;
   hash->tokenizer = NULL;
   return GRN_SUCCESS;
@@ -1711,7 +1733,7 @@ grn_hash_create(grn_ctx *ctx, const char *path, uint32_t key_size, uint32_t valu
   if (!ctx) {
     return NULL;
   }
-  if (key_size > GRN_HASH_MAX_KEY_SIZE) {
+  if (key_size > GRN_HASH_MAX_KEY_SIZE_LARGE) {
     return NULL;
   }
   hash = (grn_hash *)GRN_MALLOC(sizeof(grn_hash));
@@ -1732,7 +1754,7 @@ grn_hash_open(grn_ctx *ctx, const char *path)
   if (ctx) {
     grn_io * const io = grn_io_open(ctx, path, grn_io_auto);
     if (io) {
-      struct grn_hash_header * const header = grn_io_header(io);
+      grn_hash_header_common * const header = grn_io_header(io);
       if (grn_io_get_type(io) == GRN_TABLE_HASH_KEY) {
         grn_hash * const hash = (grn_hash *)GRN_MALLOC(sizeof(grn_hash));
         if (hash) {
@@ -1747,7 +1769,7 @@ grn_hash_open(grn_ctx *ctx, const char *path)
             hash->n_entries = &header->n_entries;
             hash->max_offset = &header->max_offset;
             hash->io = io;
-            hash->header = header;
+            hash->header.common = header;
             hash->lock = &header->lock;
             hash->tokenizer = grn_ctx_at(ctx, header->tokenizer);
             if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
@@ -1911,7 +1933,7 @@ grn_hash_reset(grn_ctx *ctx, grn_hash *hash, uint32_t expected_n_entries)
 
   if (grn_hash_is_io_hash(hash)) {
     uint32_t i;
-    src_offset = hash->header->idx_offset;
+    src_offset = hash->header.common->idx_offset;
     dest_offset = MAX_INDEX_SIZE - src_offset;
     for (i = 0; i < new_index_size; i += (IDX_MASK_IN_A_SEGMENT + 1)) {
       /*
@@ -1979,7 +2001,7 @@ grn_hash_reset(grn_ctx *ctx, grn_hash *hash, uint32_t expected_n_entries)
   }
 
   if (grn_hash_is_io_hash(hash)) {
-    hash->header->idx_offset = dest_offset;
+    hash->header.common->idx_offset = dest_offset;
   } else {
     grn_id * const old_index = hash->index;
     hash->index = new_index;
@@ -2038,15 +2060,22 @@ grn_io_hash_add(grn_ctx *ctx, grn_hash *hash, uint32_t hash_value,
 {
   grn_id entry_id;
   grn_hash_entry *entry;
-  struct grn_hash_header * const header = hash->header;
+  grn_hash_header_common * const header = hash->header.common;
+  grn_id *garbages;
 
-  entry_id = header->garbages[key_size - 1];
+  if (GRN_HASH_IS_LARGE_KEY(hash)) {
+    garbages = hash->header.large->garbages;
+  } else {
+    garbages = hash->header.normal->garbages;
+  }
+
+  entry_id = garbages[key_size - 1];
   if (entry_id) {
     entry = grn_io_hash_entry_at(ctx, hash, entry_id, GRN_TABLE_ADD);
     if (!entry) {
       return GRN_ID_NIL;
     }
-    header->garbages[key_size - 1] = *(grn_id *)entry;
+    garbages[key_size - 1] = *(grn_id *)entry;
     if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) {
       /* keep entry->io_entry's hash_value, flag, key_size and key. */
       memset(entry->io_entry.value, 0, header->value_size);
@@ -2436,9 +2465,14 @@ grn_hash_set_value(grn_ctx *ctx, grn_hash *hash, grn_id id,
   *ep = GARBAGE;\
   if (grn_hash_is_io_hash(hash)) {\
     uint32_t size = key_size - 1;\
-    struct grn_hash_header *hh = hash->header;\
-    ee->key = hh->garbages[size];\
-    hh->garbages[size] = e;\
+    grn_id *garbages;\
+    if (GRN_HASH_IS_LARGE_KEY(hash)) {\
+      garbages = hash->header.large->garbages;\
+    } else {\
+      garbages = hash->header.normal->garbages;\
+    }\
+    ee->key = garbages[size];\
+    garbages[size] = e;\
     grn_io_array_bit_off(ctx, hash->io, GRN_HASH_BITMAP_SEGMENT, e);\
   } else {\
     ee->key = hash->garbages;\
@@ -2537,7 +2571,7 @@ grn_hash_cursor_close(grn_ctx *ctx, grn_hash_cursor *c)
 }
 
 #define HASH_CURR_MAX(hash) \
-  ((grn_hash_is_io_hash(hash)) ? (hash)->header->curr_rec : (hash)->a.max)
+  ((grn_hash_is_io_hash(hash)) ? (hash)->header.common->curr_rec : (hash)->a.max)
 
 grn_hash_cursor *
 grn_hash_cursor_open(grn_ctx *ctx, grn_hash *hash,
@@ -3061,7 +3095,7 @@ void
 grn_hash_check(grn_ctx *ctx, grn_hash *hash)
 {
   char buf[8];
-  struct grn_hash_header *h = hash->header;
+  grn_hash_header_common *h = hash->header.common;
   GRN_OUTPUT_ARRAY_OPEN("RESULT", 1);
   GRN_OUTPUT_MAP_OPEN("SUMMARY", 25);
   GRN_OUTPUT_CSTR("flags");

  Modified: lib/proc.c (+1 -1)
===================================================================
--- lib/proc.c    2015-04-03 17:37:10 +0900 (10ce09f)
+++ lib/proc.c    2015-04-04 20:42:16 +0900 (1142f53)
@@ -2659,7 +2659,7 @@ dump_plugins(grn_ctx *ctx, grn_obj *outbuf)
     return;
   }
 
-  processed_paths = grn_hash_create(ctx, NULL, GRN_HASH_MAX_KEY_SIZE, 0,
+  processed_paths = grn_hash_create(ctx, NULL, GRN_TABLE_MAX_KEY_SIZE, 0,
                                     GRN_OBJ_TABLE_HASH_KEY |
                                     GRN_OBJ_KEY_VAR_SIZE);
   if (!processed_paths) {

  Modified: test/unit/core/test-hash-cursor.c (+2 -2)
===================================================================
--- test/unit/core/test-hash-cursor.c    2015-04-03 17:37:10 +0900 (af49367)
+++ test/unit/core/test-hash-cursor.c    2015-04-04 20:42:16 +0900 (0fc8928)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
 /*
-  Copyright (C) 2008-2012  Kouhei Sutou <kou �� clear-code.com>
+  Copyright (C) 2008-2015  Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -53,7 +53,7 @@ cut_setup(void)
   keys_and_values = NULL;
 
   grn_test_hash_factory_set_flags(factory, GRN_OBJ_KEY_VAR_SIZE);
-  grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE);
+  grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL);
 
   sample_value = NULL;
 }

  Modified: test/unit/core/test-hash-sort.c (+5 -4)
===================================================================
--- test/unit/core/test-hash-sort.c    2015-04-03 17:37:10 +0900 (35cb548)
+++ test/unit/core/test-hash-sort.c    2015-04-04 20:42:16 +0900 (95d3734)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
 /*
-  Copyright (C) 2008-2009  Kouhei Sutou <kou �� cozmixng.org>
+  Copyright (C) 2008-2015  Kouhei Sutou <kou �� cozmixng.org>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -156,12 +156,13 @@ retrieve_all_keys (grn_array *array, grn_id n_entries)
   while (id != GRN_ID_NIL) {
     grn_id *hash_id;
     void *array_value;
-    gchar key[GRN_HASH_MAX_KEY_SIZE];
+    gchar key[GRN_HASH_MAX_KEY_SIZE_NORMAL];
     int size;
 
     grn_array_cursor_get_value(context, cursor, &array_value);
     hash_id = array_value;
-    size = grn_hash_get_key(context, hash, *hash_id, key, GRN_HASH_MAX_KEY_SIZE);
+    size = grn_hash_get_key(context, hash, *hash_id,
+                            key, GRN_HASH_MAX_KEY_SIZE_NORMAL);
     key[size] = '\0';
     keys = g_list_append(keys, g_strdup(key));
     id = grn_array_cursor_next(context, cursor);
@@ -656,7 +657,7 @@ test_sort_by_variable_size_key(gconstpointer data)
   const GList *node;
   int n_entries;
 
-  grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE);
+  grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL);
   grn_test_hash_factory_add_flags(factory, GRN_OBJ_KEY_VAR_SIZE);
 
   if (test_data->set_parameters)

  Modified: test/unit/core/test-hash.c (+2 -2)
===================================================================
--- test/unit/core/test-hash.c    2015-04-03 17:37:10 +0900 (f7392ca)
+++ test/unit/core/test-hash.c    2015-04-04 20:42:16 +0900 (4eefb54)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
 /*
-  Copyright (C) 2008-2012  Kouhei Sutou <kou �� clear-code.com>
+  Copyright (C) 2008-2015  Kouhei Sutou <kou �� clear-code.com>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -96,7 +96,7 @@ set_value_size_to_zero(void)
 static void
 set_variable_size(void)
 {
-  grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE);
+  grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL);
   grn_test_hash_factory_add_flags(factory, GRN_OBJ_KEY_VAR_SIZE);
 }
 
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index