[Groonga-commit] groonga/grnxx at 575be14 [master] Add Column<Vector<Int>>. (#110)

Back to archive index

susumu.yata null+****@clear*****
Tue Dec 16 10:42:55 JST 2014


susumu.yata	2014-11-17 17:31:49 +0900 (Mon, 17 Nov 2014)

  New Revision: 575be14337664c835394c10a6b03b36324229d77
  https://github.com/groonga/grnxx/commit/575be14337664c835394c10a6b03b36324229d77

  Message:
    Add Column<Vector<Int>>. (#110)

  Modified files:
    lib/grnxx/impl/column/base.cpp
    lib/grnxx/impl/column/vector.hpp
    lib/grnxx/impl/column/vector/Makefile.am
    lib/grnxx/impl/column/vector/int.cpp
    lib/grnxx/impl/column/vector/int.hpp

  Modified: lib/grnxx/impl/column/base.cpp (+4 -4)
===================================================================
--- lib/grnxx/impl/column/base.cpp    2014-11-17 17:17:38 +0900 (97d4f55)
+++ lib/grnxx/impl/column/base.cpp    2014-11-17 17:31:49 +0900 (efec40f)
@@ -203,10 +203,10 @@ std::unique_ptr<ColumnBase> ColumnBase::create(
       column.reset(new impl::Column<Vector<Bool>>(table, name, options));
       break;
     }
-//    case INT_VECTOR_DATA: {
-//      column.reset(new impl::Column<Vector<Int>>(table, name, options));
-//      break;
-//    }
+    case INT_VECTOR_DATA: {
+      column.reset(new impl::Column<Vector<Int>>(table, name, options));
+      break;
+    }
 //    case FLOAT_VECTOR_DATA: {
 //      column.reset(new impl::Column<Vector<Float>>(table, name, options));
 //      break;

  Modified: lib/grnxx/impl/column/vector.hpp (+1 -1)
===================================================================
--- lib/grnxx/impl/column/vector.hpp    2014-11-17 17:17:38 +0900 (1721cf5)
+++ lib/grnxx/impl/column/vector.hpp    2014-11-17 17:31:49 +0900 (76c7d66)
@@ -4,7 +4,7 @@
 #include "grnxx/impl/column/vector/bool.hpp"
 //#include "grnxx/impl/column/vector/float.hpp"
 //#include "grnxx/impl/column/vector/geo_point.hpp"
-//#include "grnxx/impl/column/vector/int.hpp"
+#include "grnxx/impl/column/vector/int.hpp"
 //#include "grnxx/impl/column/vector/text.hpp"
 
 #endif  // GRNXX_IMPL_COLUMN_VECTOR_HPP

  Modified: lib/grnxx/impl/column/vector/Makefile.am (+2 -2)
===================================================================
--- lib/grnxx/impl/column/vector/Makefile.am    2014-11-17 17:17:38 +0900 (01b77d1)
+++ lib/grnxx/impl/column/vector/Makefile.am    2014-11-17 17:31:49 +0900 (c176333)
@@ -9,11 +9,11 @@ lib_LTLIBRARIES = libgrnxx_impl_column_vector.la
 libgrnxx_impl_column_vector_la_LDFLAGS = @AM_LTLDFLAGS@
 
 libgrnxx_impl_column_vector_la_SOURCES =	\
-	bool.cpp
+	bool.cpp				\
+	int.cpp
 
 #	float.cpp				\
 #	geo_point.cpp				\
-#	int.cpp					\
 #	text.cpp
 
 libgrnxx_impl_column_vector_includedir = ${includedir}/grnxx/impl/column/vector

  Modified: lib/grnxx/impl/column/vector/int.cpp (+181 -110)
===================================================================
--- lib/grnxx/impl/column/vector/int.cpp    2014-11-17 17:17:38 +0900 (c743c08)
+++ lib/grnxx/impl/column/vector/int.cpp    2014-11-17 17:31:49 +0900 (370cd73)
@@ -1,152 +1,223 @@
-#include "grnxx/impl/column/column_vector_int.hpp"
+#include "grnxx/impl/column/vector/int.hpp"
+
+#include <cstring>
 
-#include "grnxx/cursor.hpp"
 #include "grnxx/impl/db.hpp"
 #include "grnxx/impl/table.hpp"
+//#include "grnxx/impl/index.hpp"
 
 namespace grnxx {
 namespace impl {
 
-bool Column<Vector<Int>>::set(Error *error, Int row_id,
-                              const Datum &datum) {
-  if (datum.type() != INT_VECTOR_DATA) {
-    GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Wrong data type");
-    return false;
+Column<Vector<Int>>::Column(Table *table,
+                            const String &name,
+                            const ColumnOptions &options)
+    : ColumnBase(table, name, INT_VECTOR_DATA),
+      headers_(),
+      bodies_() {
+  if (!options.reference_table_name.is_empty()) {
+    reference_table_ = table->_db()->find_table(options.reference_table_name);
+    if (!reference_table_) {
+      throw "Table not found";  // TODO
+    }
   }
-  if (!table_->test_row(error, row_id)) {
-    return false;
+}
+
+Column<Vector<Int>>::~Column() {}
+
+void Column<Vector<Int>>::set(Int row_id, const Datum &datum) {
+  Vector<Int> new_value = parse_datum(datum);
+  if (!table_->test_row(row_id)) {
+    throw "Invalid row ID";  // TODO
   }
-  Vector<Int> value = datum.force_int_vector();
-  if (value.size() == 0) {
-    headers_[row_id] = 0;
-    return true;
+  if (new_value.is_na()) {
+    unset(row_id);
+    return;
   }
-  if (ref_table_) {
-    for (Int i = 0; i < value.size(); ++i) {
-      if (!ref_table_->test_row(error, value[i])) {
-        return false;
+  if (reference_table_) {
+    size_t new_value_size = new_value.size().value();
+    for (size_t i = 0; i < new_value_size; ++i) {
+      if (!reference_table_->test_row(new_value[i])) {
+        throw "Invalid reference";  // TODO
       }
     }
   }
-  Int offset = bodies_.size();
-  if (value.size() < 0xFFFF) {
-    if (!bodies_.resize(error, offset + value.size())) {
-      return false;
-    }
-    for (Int i = 0; i < value.size(); ++i) {
-      bodies_[offset + i] = value[i];
-    }
-    headers_[row_id] = (offset << 16) | value.size();
+  Vector<Int> old_value = get(row_id);
+  if ((old_value == new_value).is_true()) {
+    return;
+  }
+  if (!old_value.is_na()) {
+    // TODO: Remove the old value from indexes.
+//    for (size_t i = 0; i < num_indexes(); ++i) {
+//      indexes_[i]->remove(row_id, old_value);
+//    }
+  }
+  size_t value_id = row_id.value();
+  if (value_id >= headers_.size()) {
+    headers_.resize(value_id + 1, na_header());
+  }
+  // TODO: Insert the new value into indexes.
+//  for (size_t i = 0; i < num_indexes(); ++i) try {
+//    indexes_[i]->insert(row_id, datum)) {
+//  } catch (...) {
+//    for (size_t j = 0; j < i; ++i) {
+//      indexes_[j]->remove(row_id, datum);
+//    }
+//    throw;
+//  }
+  // TODO: Error handling.
+  size_t offset = bodies_.size();
+  size_t size = new_value.size().value();
+  uint64_t header;
+  if (size < 0xFFFF) {
+    bodies_.resize(offset + size);
+    std::memcpy(&bodies_[offset], new_value.data(), sizeof(Int) * size);
+    header = (offset << 16) | size;
   } else {
     // The size of a long vector is stored in front of the body.
-    if (!bodies_.resize(error, offset + 1 + value.size())) {
-      return false;
+    if ((offset % sizeof(uint64_t)) != 0) {
+      offset += sizeof(uint64_t) - (offset % sizeof(uint64_t));
     }
-    bodies_[offset] = value.size();
-    for (Int i = 0; i < value.size(); ++i) {
-      bodies_[offset + 1 + i] = value[i];
-    }
-    headers_[row_id] = (offset << 16) | 0xFFFF;
+    bodies_.resize(offset + sizeof(uint64_t) + size);
+    *reinterpret_cast<uint64_t *>(&bodies_[offset]) = size;
+    std::memcpy(&bodies_[offset + sizeof(uint64_t)],
+                new_value.data(), sizeof(Int) * size);
+    header = (offset << 16) | 0xFFFF;
   }
-  return true;
+  headers_[value_id] = header;
 }
 
-bool Column<Vector<Int>>::get(Error *error, Int row_id, Datum *datum) const {
-  if (!table_->test_row(error, row_id)) {
-    return false;
+void Column<Vector<Int>>::get(Int row_id, Datum *datum) const {
+  size_t value_id = row_id.value();
+  if (value_id >= headers_.size()) {
+    *datum = Vector<Int>::na();
+  } else {
+    // TODO
+    *datum = get(row_id);
   }
-  *datum = get(row_id);
-  return true;
 }
 
-unique_ptr<Column<Vector<Int>>> Column<Vector<Int>>::create(
-    Error *error,
-    Table *table,
-    const StringCRef &name,
-    const ColumnOptions &options) {
-  unique_ptr<Column> column(new (nothrow) Column);
-  if (!column) {
-    GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed");
-    return nullptr;
-  }
-  if (!column->initialize_base(error, table, name, INT_VECTOR_DATA, options)) {
-    return nullptr;
-  }
-  if (!column->headers_.resize(error, table->max_row_id() + 1, 0)) {
-    return nullptr;
-  }
-  if (column->ref_table()) {
-    if (!column->ref_table_->append_referrer_column(error, column.get())) {
-      return nullptr;
+bool Column<Vector<Int>>::contains(const Datum &datum) const {
+  // TODO: Use an index if exists.
+  Vector<Int> value = parse_datum(datum);
+  if (value.is_na()) {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      if (headers_[i] == na_header()) {
+        return true;
+      }
+    }
+  } else {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      // TODO: Improve this.
+      if ((get(Int(i)) == value).is_true()) {
+        return true;
+      }
     }
   }
-  return column;
+  return false;
 }
 
-Column<Vector<Int>>::~Column() {}
-
-bool Column<Vector<Int>>::set_default_value(Error *error, Int row_id) {
-  if (row_id >= headers_.size()) {
-    if (!headers_.resize(error, row_id + 1)) {
-      return false;
+Int Column<Vector<Int>>::find_one(const Datum &datum) const {
+  // TODO: Use an index if exists.
+  Vector<Int> value = parse_datum(datum);
+  if (value.is_na()) {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      if (headers_[i] == na_header()) {
+        return Int(i);
+      }
+    }
+  } else {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      // TODO: Improve this.
+      if ((get(Int(i)) == value).is_true()) {
+        return Int(i);
+      }
     }
   }
-  headers_[row_id] = 0;
-  return true;
+  return Int::na();
 }
 
 void Column<Vector<Int>>::unset(Int row_id) {
-  headers_[row_id] = 0;
+  Vector<Int> value = get(row_id);
+  if (!value.is_na()) {
+    // TODO: Update indexes if exist.
+//    for (size_t i = 0; i < num_indexes(); ++i) {
+//      indexes_[i]->remove(row_id, value);
+//    }
+    headers_[row_id.value()] = na_header();
+  }
 }
 
-void Column<Vector<Int>>::clear_references(Int row_id) {
-  auto cursor = table_->create_cursor(nullptr);
-  if (!cursor) {
-    // Error.
-    return;
+void Column<Vector<Int>>::read(ArrayCRef<Record> records,
+                               ArrayRef<Vector<Int>> values) const {
+  if (records.size() != values.size()) {
+    throw "Data size conflict";  // TODO
+  }
+  for (size_t i = 0; i < records.size(); ++i) {
+    values.set(i, get(records[i].row_id));
   }
-  Array<Record> records;
-  for ( ; ; ) {
-    auto result = cursor->read(nullptr, 1024, &records);
-    if (!result.is_ok) {
-      // Error.
-      return;
-    } else if (result.count == 0) {
-      return;
+}
+
+Vector<Int> Column<Vector<Int>>::parse_datum(const Datum &datum) {
+  switch (datum.type()) {
+    case NA_DATA: {
+      return Vector<Int>::na();
     }
-    for (Int i = 0; i < records.size(); ++i) {
-      Int value_row_id = records.get_row_id(i);
-      Int value_size = static_cast<Int>(headers_[value_row_id] & 0xFFFF);
-      if (value_size == 0) {
-        continue;
-      }
-      Int value_offset = static_cast<Int>(headers_[value_row_id] >> 16);
-      if (value_size >= 0xFFFF) {
-        value_size = bodies_[value_offset];
-        ++value_offset;
-      }
-      Int count = 0;
-      for (Int j = 0; j < value_size; ++j) {
-        if (bodies_[value_offset + j] != row_id) {
-          bodies_[value_offset + count] = bodies_[value_offset + j];
-          ++count;
-        }
-      }
-      if (count < value_size) {
-        if (count == 0) {
-          headers_[value_row_id] = 0;
-        } else if (count < 0xFFFF) {
-          headers_[value_row_id] = count | (value_offset << 16);
-        } else {
-          bodies_[value_offset - 1] = count;
-        }
-      }
+    case INT_VECTOR_DATA: {
+      return datum.as_int_vector();
+    }
+    default: {
+      throw "Wrong data type";  // TODO
     }
-    records.clear();
   }
 }
 
-Column<Vector<Int>>::Column() : ColumnBase(), headers_(), bodies_() {}
+//void Column<Vector<Int>>::clear_references(Int row_id) {
+//  auto cursor = table_->create_cursor(nullptr);
+//  if (!cursor) {
+//    // Error.
+//    return;
+//  }
+//  Array<Record> records;
+//  for ( ; ; ) {
+//    auto result = cursor->read(nullptr, 1024, &records);
+//    if (!result.is_ok) {
+//      // Error.
+//      return;
+//    } else if (result.count == 0) {
+//      return;
+//    }
+//    for (Int i = 0; i < records.size(); ++i) {
+//      Int value_row_id = records.get_row_id(i);
+//      Int value_size = static_cast<Int>(headers_[value_row_id] & 0xFFFF);
+//      if (value_size == 0) {
+//        continue;
+//      }
+//      Int value_offset = static_cast<Int>(headers_[value_row_id] >> 16);
+//      if (value_size >= 0xFFFF) {
+//        value_size = bodies_[value_offset];
+//        ++value_offset;
+//      }
+//      Int count = 0;
+//      for (Int j = 0; j < value_size; ++j) {
+//        if (bodies_[value_offset + j] != row_id) {
+//          bodies_[value_offset + count] = bodies_[value_offset + j];
+//          ++count;
+//        }
+//      }
+//      if (count < value_size) {
+//        if (count == 0) {
+//          headers_[value_row_id] = 0;
+//        } else if (count < 0xFFFF) {
+//          headers_[value_row_id] = count | (value_offset << 16);
+//        } else {
+//          bodies_[value_offset - 1] = count;
+//        }
+//      }
+//    }
+//    records.clear();
+//  }
+//}
 
 }  // namespace impl
 }  // namespace grnxx

  Modified: lib/grnxx/impl/column/vector/int.hpp (+99 -31)
===================================================================
--- lib/grnxx/impl/column/vector/int.hpp    2014-11-17 17:17:38 +0900 (e009fe5)
+++ lib/grnxx/impl/column/vector/int.hpp    2014-11-17 17:31:49 +0900 (9fec61e)
@@ -1,71 +1,139 @@
 #ifndef GRNXX_IMPL_COLUMN_VECTOR_INT_HPP
 #define GRNXX_IMPL_COLUMN_VECTOR_INT_HPP
 
-#include "grnxx/impl/column/column.hpp"
+#include <limits>
+#include <cstdint>
+
+#include "grnxx/impl/column/base.hpp"
 
 namespace grnxx {
 namespace impl {
 
-// TODO
+template <typename T> class Column;
+
 template <>
 class Column<Vector<Int>> : public ColumnBase {
  public:
-  // -- Public API --
+  // -- Public API (grnxx/column.hpp) --
 
-  bool set(Error *error, Int row_id, const Datum &datum);
-  bool get(Error *error, Int row_id, Datum *datum) const;
+  Column(Table *table, const String &name, const ColumnOptions &options);
+  ~Column();
 
-  // -- Internal API --
+  void set(Int row_id, const Datum &datum);
+  void get(Int row_id, Datum *datum) const;
 
-  // Create a new column.
-  //
-  // Returns a pointer to the column on success.
-  // On failure, returns nullptr and stores error information into "*error" if
-  // "error" != nullptr.
-  static unique_ptr<Column> create(Error *error,
-                                   Table *table,
-                                   const StringCRef &name,
-                                   const ColumnOptions &options);
+  bool contains(const Datum &datum) const;
+  Int find_one(const Datum &datum) const;
 
-  ~Column();
+  // -- Internal API (grnxx/impl/column/base.hpp) --
 
-  bool set_default_value(Error *error, Int row_id);
   void unset(Int row_id);
 
-  void clear_references(Int row_id);
+  // -- Internal API --
 
-  // Return a value identified by "row_id".
+  // Return a value.
   //
-  // Assumes that "row_id" is valid. Otherwise, the result is undefined.
+  // If "row_id" is valid, returns the stored value.
+  // If "row_id" is invalid, returns N/A.
+  //
+  // TODO: Vector cannot reuse allocated memory because of this interface.
   Vector<Int> get(Int row_id) const {
-    Int size = static_cast<Int>(headers_[row_id] & 0xFFFF);
+    size_t value_id = row_id.value();
+    if (value_id >= headers_.size()) {
+      return Vector<Int>::na();
+    }
+    if (headers_[value_id] == na_header()) {
+      return Vector<Int>::na();
+    }
+    size_t size = headers_[value_id] & 0xFFFF;
     if (size == 0) {
       return Vector<Int>(nullptr, 0);
     }
-    Int offset = static_cast<Int>(headers_[row_id] >> 16);
+    size_t offset = headers_[value_id] >> 16;
     if (size < 0xFFFF) {
       return Vector<Int>(&bodies_[offset], size);
     } else {
       // The size of a long vector is stored in front of the body.
-      size = bodies_[offset];
+      size = *reinterpret_cast<const uint64_t *>(&bodies_[offset]);
       return Vector<Int>(&bodies_[offset + 1], size);
     }
   }
-
   // Read values.
-  void read(ArrayCRef<Record> records, ArrayRef<Vector<Int>> values) const {
-    for (Int i = 0; i < records.size(); ++i) {
-      values.set(i, get(records.get_row_id(i)));
-    }
-  }
+  //
+  // On failure, throws an exception.
+  void read(ArrayCRef<Record> records, ArrayRef<Vector<Int>> values) const;
 
  private:
-  Array<UInt> headers_;
+  Array<uint64_t> headers_;
   Array<Int> bodies_;
 
-  Column();
+  static constexpr uint64_t na_header() {
+    return std::numeric_limits<uint64_t>::max();
+  }
+
+  static Vector<Int> parse_datum(const Datum &datum);
 };
 
+//// TODO
+//template <>
+//class Column<Vector<Int>> : public ColumnBase {
+// public:
+//  // -- Public API --
+
+//  bool set(Error *error, Int row_id, const Datum &datum);
+//  bool get(Error *error, Int row_id, Datum *datum) const;
+
+//  // -- Internal API --
+
+//  // Create a new column.
+//  //
+//  // Returns a pointer to the column on success.
+//  // On failure, returns nullptr and stores error information into "*error" if
+//  // "error" != nullptr.
+//  static unique_ptr<Column> create(Error *error,
+//                                   Table *table,
+//                                   const StringCRef &name,
+//                                   const ColumnOptions &options);
+
+//  ~Column();
+
+//  bool set_default_value(Error *error, Int row_id);
+//  void unset(Int row_id);
+
+//  void clear_references(Int row_id);
+
+//  // Return a value identified by "row_id".
+//  //
+//  // Assumes that "row_id" is valid. Otherwise, the result is undefined.
+//  Vector<Int> get(Int row_id) const {
+//    Int size = static_cast<Int>(headers_[row_id] & 0xFFFF);
+//    if (size == 0) {
+//      return Vector<Int>(nullptr, 0);
+//    }
+//    Int offset = static_cast<Int>(headers_[row_id] >> 16);
+//    if (size < 0xFFFF) {
+//      return Vector<Int>(&bodies_[offset], size);
+//    } else {
+//      // The size of a long vector is stored in front of the body.
+//      size = bodies_[offset];
+//      return Vector<Int>(&bodies_[offset + 1], size);
+//    }
+//  }
+
+//  // Read values.
+//  void read(ArrayCRef<Record> records, ArrayRef<Vector<Int>> values) const {
+//    for (Int i = 0; i < records.size(); ++i) {
+//      values.set(i, get(records.get_row_id(i)));
+//    }
+//  }
+
+// private:
+//  Array<UInt> headers_;
+//  Array<Int> bodies_;
+
+//  Column();
+//};
+
 }  // namespace impl
 }  // namespace grnxx
 
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index