[Groonga-commit] groonga/grnxx at 96d45a8 [master] Add a cache mechanism and defrag() to Patricia for other than Bytes.

Back to archive index

susumu.yata null+****@clear*****
Tue Aug 6 15:01:41 JST 2013


susumu.yata	2013-08-06 15:01:41 +0900 (Tue, 06 Aug 2013)

  New Revision: 96d45a832c8ed4f774bb87b711eade37e5ba7bb1
  https://github.com/groonga/grnxx/commit/96d45a832c8ed4f774bb87b711eade37e5ba7bb1

  Message:
    Add a cache mechanism and defrag() to Patricia for other than Bytes.

  Modified files:
    lib/grnxx/map/patricia.cpp
    lib/grnxx/map/patricia.hpp

  Modified: lib/grnxx/map/patricia.cpp (+144 -14)
===================================================================
--- lib/grnxx/map/patricia.cpp    2013-08-06 12:02:13 +0900 (04d4651)
+++ lib/grnxx/map/patricia.cpp    2013-08-06 15:01:41 +0900 (12467a3)
@@ -210,7 +210,10 @@ Patricia<T>::Patricia()
       storage_node_id_(STORAGE_INVALID_NODE_ID),
       header_(nullptr),
       nodes_(),
-      pool_() {}
+      old_nodes_(),
+      pool_(),
+      cache_(),
+      nodes_id_(0) {}
 
 template <typename T>
 Patricia<T>::~Patricia() {}
@@ -271,6 +274,7 @@ bool Patricia<T>::get(int64_t key_id, Key *key) {
 
 template <typename T>
 bool Patricia<T>::unset(int64_t key_id) {
+  refresh_nodes();
   Key key;
   if (!get(key_id, &key)) {
     // Not found.
@@ -302,6 +306,7 @@ bool Patricia<T>::unset(int64_t key_id) {
 
 template <typename T>
 bool Patricia<T>::reset(int64_t key_id, KeyArg dest_key) {
+  refresh_nodes();
   // Find the source key.
   Key src_key;
   if (!get(key_id, &src_key)) {
@@ -385,6 +390,7 @@ bool Patricia<T>::reset(int64_t key_id, KeyArg dest_key) {
 
 template <typename T>
 bool Patricia<T>::find(KeyArg key, int64_t *key_id) {
+  refresh_nodes();
   const Key normalized_key = Helper<T>::normalize(key);
   uint64_t node_id = ROOT_NODE_ID;
   Node node = nodes_->get(node_id);
@@ -411,7 +417,21 @@ bool Patricia<T>::find(KeyArg key, int64_t *key_id) {
 
 template <typename T>
 bool Patricia<T>::add(KeyArg key, int64_t *key_id) {
+  refresh_nodes();
   const Key normalized_key = Helper<T>::normalize(key);
+  const uint64_t hash_value = Hash<Key>()(normalized_key);
+  CacheEntry &cache = cache_->get_value(hash_value & (cache_->size() - 1));
+  if (cache && cache.test_hash_value(hash_value)) {
+    Key cached_key;
+    if (pool_->get(cache.key_id(), &cached_key)) {
+      if (key == cached_key) {
+        if (key_id) {
+          *key_id = cache.key_id();
+        }
+        return false;
+      }
+    }
+  }
   uint64_t node_id = ROOT_NODE_ID;
   Node *node = &nodes_->get_value(node_id);
   if (node->status() == NODE_DEAD) {
@@ -421,6 +441,7 @@ bool Patricia<T>::add(KeyArg key, int64_t *key_id) {
     if (key_id) {
       *key_id = next_key_id;
     }
+    cache.set(next_key_id, hash_value);
     return true;
   }
   Node *history[(sizeof(Key) * 8) + 1];
@@ -439,6 +460,7 @@ bool Patricia<T>::add(KeyArg key, int64_t *key_id) {
     if (key_id) {
       *key_id = node->key_id();
     }
+    cache.set(node->key_id(), hash_value);
     return false;
   }
   // Find the branching point in "history".
@@ -463,11 +485,13 @@ bool Patricia<T>::add(KeyArg key, int64_t *key_id) {
   if (key_id) {
     *key_id = next_key_id;
   }
+  cache.set(next_key_id, hash_value);
   return true;
 }
 
 template <typename T>
 bool Patricia<T>::remove(KeyArg key) {
+  refresh_nodes();
   const Key normalized_key = Helper<T>::normalize(key);
   uint64_t node_id = ROOT_NODE_ID;
   Node *node = &nodes_->get_value(node_id);
@@ -500,6 +524,7 @@ bool Patricia<T>::remove(KeyArg key) {
 
 template <typename T>
 bool Patricia<T>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) {
+  refresh_nodes();
   const Key src_normalized_key = Helper<T>::normalize(src_key);
   uint64_t node_id = ROOT_NODE_ID;
   Node *src_node = &nodes_->get_value(node_id);
@@ -588,15 +613,46 @@ bool Patricia<T>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) {
 
 template <typename T>
 void Patricia<T>::defrag(double usage_rate_threshold) {
-  // TODO
+  refresh_nodes();
+  if (max_key_id() == MAP_MIN_KEY_ID) {
+    // Nothing to do.
+    return;
+  }
+  defrag_nodes();
   pool_->defrag(usage_rate_threshold);
 }
 
 template <typename T>
 void Patricia<T>::truncate() {
-  Node &root_node = nodes_->get_value(ROOT_NODE_ID);
-  pool_->truncate();
-  root_node = Node::dead_node();
+  refresh_nodes();
+  if (max_key_id() == MAP_MIN_KEY_ID) {
+    // Nothing to do.
+    return;
+  }
+  std::unique_ptr<NodeArray> new_nodes(
+      NodeArray::create(storage_, storage_node_id_, NODE_ARRAY_SIZE));
+  try {
+    new_nodes->set(ROOT_NODE_ID, Node::dead_node());
+    new_nodes->set(DUMMY_NODE_ID, Node::dead_node());
+    for (uint64_t i = 0; i < CACHE_SIZE; ++i) {
+      cache_->set(i, CacheEntry::invalid_entry());
+    }
+    pool_->truncate();
+  } catch (...) {
+    NodeArray::unlink(storage_, new_nodes->storage_node_id());
+    throw;
+  }
+  {
+    // Validate a new nodes.
+    Lock lock(&header_->mutex);
+    header_->nodes_storage_node_id = new_nodes->storage_node_id();
+    header_->next_node_id = 2;
+    ++header_->nodes_id;
+    old_nodes_.swap(new_nodes);
+    nodes_.swap(old_nodes_);
+    nodes_id_ = header_->nodes_id;
+  }
+  NodeArray::unlink(storage_, old_nodes_->storage_node_id());
 }
 
 template <typename T>
@@ -611,8 +667,11 @@ void Patricia<T>::create_map(Storage *storage, uint32_t storage_node_id,
     *header_ = Header();
     nodes_.reset(NodeArray::create(storage, storage_node_id_, NODE_ARRAY_SIZE));
     pool_.reset(KeyPool<T>::create(storage, storage_node_id_));
+    cache_.reset(Cache::create(storage, storage_node_id_,
+                               CACHE_SIZE, CacheEntry::invalid_entry()));
     header_->nodes_storage_node_id = nodes_->storage_node_id();
     header_->pool_storage_node_id = pool_->storage_node_id();
+    header_->cache_storage_node_id = cache_->storage_node_id();
     nodes_->set(ROOT_NODE_ID, Node::dead_node());
     nodes_->set(DUMMY_NODE_ID, Node::dead_node());
   } catch (...) {
@@ -637,8 +696,85 @@ void Patricia<T>::open_map(Storage *storage, uint32_t storage_node_id) {
                   << ", actual = " << header_->common_header.format();
     throw LogicError();
   }
+  Lock lock(&header_->mutex);
   nodes_.reset(NodeArray::open(storage, header_->nodes_storage_node_id));
   pool_.reset(KeyPool<T>::open(storage, header_->pool_storage_node_id));
+  cache_.reset(Cache::open(storage, header_->cache_storage_node_id));
+  nodes_id_ = header_->nodes_id;
+}
+
+template <typename T>
+void Patricia<T>::defrag_nodes() {
+  // Create a new nodes.
+  std::unique_ptr<NodeArray> new_nodes(
+      NodeArray::create(storage_, storage_node_id_, NODE_ARRAY_SIZE));
+  uint64_t next_node_id = 2;
+  try {
+    next_node_id = rearrange_nodes(ROOT_NODE_ID, ROOT_NODE_ID,
+                                   next_node_id, new_nodes.get());
+    next_node_id = rearrange_nodes(DUMMY_NODE_ID, DUMMY_NODE_ID,
+                                   next_node_id, new_nodes.get());
+  } catch (...) {
+    NodeArray::unlink(storage_, new_nodes->storage_node_id());
+    throw;
+  }
+  {
+    // Validate a new nodes.
+    Lock lock(&header_->mutex);
+    header_->nodes_storage_node_id = new_nodes->storage_node_id();
+    header_->next_node_id = next_node_id;
+    ++header_->nodes_id;
+    old_nodes_.swap(new_nodes);
+    nodes_.swap(old_nodes_);
+    nodes_id_ = header_->nodes_id;
+  }
+  NodeArray::unlink(storage_, old_nodes_->storage_node_id());
+}
+
+template <typename T>
+uint64_t Patricia<T>::rearrange_nodes(uint64_t src_node_id,
+                                      uint64_t dest_node_id,
+                                      uint64_t next_node_id,
+                                      NodeArray *dest_nodes) {
+  const Node src_node = nodes_->get(src_node_id);
+  Node &dest_node = dest_nodes->get_value(dest_node_id);
+  switch (src_node.status()) {
+    case NODE_DEAD:
+    case NODE_LEAF: {
+      dest_node = src_node;
+      return next_node_id;
+    }
+    case NODE_BRANCH: {
+      dest_node = Node::branch_node(src_node.bit_pos(), next_node_id);
+      break;
+    }
+    case NODE_TERMINAL: {
+      dest_node = Node::terminal_node(src_node.bit_size(), next_node_id);
+      break;
+    }
+  }
+  src_node_id = src_node.offset();
+  dest_node_id = next_node_id;
+  next_node_id += 2;
+  next_node_id = rearrange_nodes(src_node_id, dest_node_id,
+                                 next_node_id, dest_nodes);
+  next_node_id = rearrange_nodes(src_node_id + 1, dest_node_id + 1,
+                                 next_node_id, dest_nodes);
+  return next_node_id;
+}
+
+template <typename T>
+void Patricia<T>::refresh_nodes() {
+  if (nodes_id_ != header_->nodes_id) {
+    Lock lock(&header_->mutex);
+    if (nodes_id_ != header_->nodes_id) {
+      std::unique_ptr<NodeArray> new_nodes(
+          NodeArray::open(storage_, header_->nodes_storage_node_id));
+      old_nodes_.swap(new_nodes);
+      nodes_.swap(old_nodes_);
+      nodes_id_ = header_->nodes_id;
+    }
+  }
 }
 
 template <typename T>
@@ -850,9 +986,7 @@ bool Patricia<Bytes>::reset(int64_t key_id, KeyArg dest_key) {
     src_prev_node = src_node;
   }
   if (header_->next_node_id >= NODE_ARRAY_SIZE) {
-    GRNXX_ERROR() << "too many nodes: next_node_id = " << header_->next_node_id
-                  << ", max_node_id = " << NODE_ARRAY_SIZE;
-    throw LogicError();
+    defrag_nodes();
   }
   // Add the destination key.
   constexpr std::size_t HISTORY_SIZE = 8;
@@ -1048,9 +1182,7 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) {
     }
   }
   if (header_->next_node_id >= NODE_ARRAY_SIZE) {
-    GRNXX_ERROR() << "too many nodes: next_node_id = " << header_->next_node_id
-                  << ", max_node_id = " << NODE_ARRAY_SIZE;
-    throw LogicError();
+    defrag_nodes();
   }
   uint64_t node_id = ROOT_NODE_ID;
   Node *node = &nodes_->get_value(node_id);
@@ -1279,9 +1411,7 @@ bool Patricia<Bytes>::replace(KeyArg src_key, KeyArg dest_key,
     src_node = &nodes_->get_value(src_node_id);
   }
   if (header_->next_node_id >= NODE_ARRAY_SIZE) {
-    GRNXX_ERROR() << "too many nodes: next_node_id = " << header_->next_node_id
-                  << ", max_node_id = " << NODE_ARRAY_SIZE;
-    throw LogicError();
+    defrag_nodes();
   }
   // Add the destination key.
   constexpr std::size_t HISTORY_SIZE = 8;

  Modified: lib/grnxx/map/patricia.hpp (+14 -0)
===================================================================
--- lib/grnxx/map/patricia.hpp    2013-08-06 12:02:13 +0900 (a9b3e8a)
+++ lib/grnxx/map/patricia.hpp    2013-08-06 15:01:41 +0900 (1e6235c)
@@ -44,6 +44,8 @@ class Patricia : public Map<T> {
   using Header = PatriciaHeader;
   using Node = PatriciaNode;
   using NodeArray = Array<Node, 65536, 8192>;
+  using CacheEntry = PatriciaCacheEntry;
+  using Cache = Array<CacheEntry>;
 
  public:
   using Key = typename Map<T>::Key;
@@ -81,12 +83,24 @@ class Patricia : public Map<T> {
   uint32_t storage_node_id_;
   Header *header_;
   std::unique_ptr<NodeArray> nodes_;
+  std::unique_ptr<NodeArray> old_nodes_;
   std::unique_ptr<KeyPool<T>> pool_;
+  std::unique_ptr<Cache> cache_;
+  uint64_t nodes_id_;
 
   void create_map(Storage *storage, uint32_t storage_node_id,
                   const MapOptions &options);
   void open_map(Storage *storage, uint32_t storage_node_id);
 
+  // Resize "nodes_" and "cache_".
+  void defrag_nodes();
+  // Recursively arrange nodes.
+  uint64_t rearrange_nodes(uint64_t src_node_id, uint64_t dest_node_id,
+                           uint64_t next_node_id, NodeArray *dest_nodes);
+
+  // Refresh "nodes_" and "cache_" if new ones are available.
+  void refresh_nodes();
+
   static uint64_t get_ith_bit(KeyArg key, uint64_t bit_pos);
   static uint64_t count_common_prefix_bits(KeyArg lhs, KeyArg rhs);
 };
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index