Kouhei Sutou
null+****@clear*****
Wed May 17 22:07:45 JST 2017
Kouhei Sutou 2017-05-17 22:07:45 +0900 (Wed, 17 May 2017) New Revision: 5de8314b89f1209e7fc7e87a674dc9db006e7a92 https://github.com/groonga/groonga/commit/5de8314b89f1209e7fc7e87a674dc9db006e7a92 Message: Add grn_arrow_dump_columns() It can customize dump columns. Modified files: include/groonga/arrow.h lib/arrow.cpp Modified: include/groonga/arrow.h (+4 -0) =================================================================== --- include/groonga/arrow.h 2017-05-15 22:02:44 +0900 (57f3abf) +++ include/groonga/arrow.h 2017-05-17 22:07:45 +0900 (248b44d) @@ -28,6 +28,10 @@ GRN_API grn_rc grn_arrow_load(grn_ctx *ctx, GRN_API grn_rc grn_arrow_dump(grn_ctx *ctx, grn_obj *table, const char *path); +GRN_API grn_rc grn_arrow_dump_columns(grn_ctx *ctx, + grn_obj *table, + grn_obj *columns, + const char *path); #ifdef __cplusplus } Modified: lib/arrow.cpp (+52 -26) =================================================================== --- lib/arrow.cpp 2017-05-15 22:02:44 +0900 (ec3d35a) +++ lib/arrow.cpp 2017-05-17 22:07:45 +0900 (2231141) @@ -409,31 +409,20 @@ namespace grnarrow { class FileDumper { public: - FileDumper(grn_ctx *ctx, grn_obj *grn_table) + FileDumper(grn_ctx *ctx, grn_obj *grn_table, grn_obj *grn_columns) : ctx_(ctx), - grn_table_(grn_table) { - grn_columns_ = grn_hash_create(ctx_, - NULL, - sizeof(grn_id), - 0, - GRN_OBJ_TABLE_HASH_KEY | GRN_HASH_TINY); - grn_table_columns(ctx_, - grn_table_, - "", 0, - reinterpret_cast<grn_obj *>(grn_columns_)); + grn_table_(grn_table), + grn_columns_(grn_columns) { } ~FileDumper() { - grn_hash_close(ctx_, grn_columns_); } grn_rc dump(arrow::io::OutputStream *output) { std::vector<std::shared_ptr<arrow::Field>> fields; - GRN_HASH_EACH_BEGIN(ctx_, grn_columns_, cursor, id) { - void *key; - grn_hash_cursor_get_key(ctx_, cursor, &key); - auto column_id = static_cast<grn_id *>(key); - auto column = grn_ctx_at(ctx_, *column_id); + auto n_columns = GRN_BULK_VSIZE(grn_columns_) / sizeof(grn_obj *); + for (auto i = 0; i < n_columns; ++i) { + auto column = GRN_PTR_VALUE_AT(grn_columns_, i); char column_name[GRN_TABLE_MAX_KEY_SIZE]; int column_name_size; @@ -492,7 +481,7 @@ namespace grnarrow { field_type, false); fields.push_back(field); - } GRN_HASH_EACH_END(ctx_, cursor); + }; auto schema = std::make_shared<arrow::Schema>(fields); @@ -525,17 +514,15 @@ namespace grnarrow { private: grn_ctx *ctx_; grn_obj *grn_table_; - grn_hash *grn_columns_; + grn_obj *grn_columns_; void write_record_batch(std::vector<grn_id> &ids, std::shared_ptr<arrow::Schema> &schema, std::shared_ptr<arrow::ipc::RecordBatchFileWriter> &writer) { std::vector<std::shared_ptr<arrow::Array>> columns; - GRN_HASH_EACH_BEGIN(ctx_, grn_columns_, cursor, id) { - void *key; - grn_hash_cursor_get_key(ctx_, cursor, &key); - auto grn_column_id = static_cast<grn_id *>(key); - auto grn_column = grn_ctx_at(ctx_, *grn_column_id); + auto n_columns = GRN_BULK_VSIZE(grn_columns_) / sizeof(grn_obj *); + for (auto i = 0; i < n_columns; ++i) { + auto grn_column = GRN_PTR_VALUE_AT(grn_columns_, i); arrow::Status status; std::shared_ptr<arrow::Array> column; @@ -588,7 +575,7 @@ namespace grnarrow { continue; } columns.push_back(column); - } GRN_HASH_EACH_END(ctx_, cursor); + } arrow::RecordBatch record_batch(schema, ids.size(), columns); writer->WriteRecordBatch(record_batch); @@ -802,6 +789,45 @@ grn_arrow_dump(grn_ctx *ctx, { GRN_API_ENTER; #ifdef GRN_WITH_ARROW + auto all_columns = + grn_hash_create(ctx, + NULL, + sizeof(grn_id), + 0, + GRN_OBJ_TABLE_HASH_KEY | GRN_HASH_TINY); + grn_table_columns(ctx, + table, + "", 0, + reinterpret_cast<grn_obj *>(all_columns)); + + grn_obj columns; + GRN_PTR_INIT(&columns, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_HASH_EACH_BEGIN(ctx, all_columns, cursor, id) { + void *key; + grn_hash_cursor_get_key(ctx, cursor, &key); + auto column_id = static_cast<grn_id *>(key); + auto column = grn_ctx_at(ctx, *column_id); + GRN_PTR_PUT(ctx, &columns, column); + } GRN_HASH_EACH_END(ctx, cursor); + grn_hash_close(ctx, all_columns); + + grn_arrow_dump_columns(ctx, table, &columns, path); + GRN_OBJ_FIN(ctx, &columns); +#else /* GRN_WITH_ARROW */ + ERR(GRN_FUNCTION_NOT_IMPLEMENTED, + "[arrow][dump] Apache Arrow support isn't enabled"); +#endif /* GRN_WITH_ARROW */ + GRN_API_RETURN(ctx->rc); +} + +grn_rc +grn_arrow_dump_columns(grn_ctx *ctx, + grn_obj *table, + grn_obj *columns, + const char *path) +{ + GRN_API_ENTER; +#ifdef GRN_WITH_ARROW std::shared_ptr<arrow::io::FileOutputStream> output; auto status = arrow::io::FileOutputStream::Open(path, &output); if (!grnarrow::check_status(ctx, @@ -812,7 +838,7 @@ grn_arrow_dump(grn_ctx *ctx, GRN_API_RETURN(ctx->rc); } - grnarrow::FileDumper dumper(ctx, table); + grnarrow::FileDumper dumper(ctx, table, columns); dumper.dump(output.get()); #else /* GRN_WITH_ARROW */ ERR(GRN_FUNCTION_NOT_IMPLEMENTED, -------------- next part -------------- HTML����������������������������... Download