[Groonga-commit] groonga/groonga [master] work with UTF-8 BOM input. #448

Back to archive index

null+****@clear***** null+****@clear*****
2010年 9月 6日 (月) 15:55:19 JST


Kouhei Sutou	2010-09-06 06:55:19 +0000 (Mon, 06 Sep 2010)

  New Revision: c365d69a531a97d8ccc6c6fa1eef6351a9c330cb

  Log:
    work with UTF-8 BOM input. #448

  Added files:
    test/unit/command/test-bom.rb
  Modified files:
    src/groonga.c
    test/unit/command/Makefile.am

  Modified: src/groonga.c (+13 -3)
===================================================================
--- src/groonga.c    2010-09-04 09:39:22 +0000 (5960c81)
+++ src/groonga.c    2010-09-06 06:55:19 +0000 (855b6c0)
@@ -166,8 +166,9 @@ show_version(void)
 #define BUFSIZE 0x1000000
 
 inline static int
-prompt(char *buf)
+prompt(grn_ctx *ctx, char *buf)
 {
+  static int the_first_read = GRN_TRUE;
   int len;
   if (!batchmode) {
 #ifdef HAVE_LIBEDIT
@@ -204,6 +205,15 @@ prompt(char *buf)
       len = 0;
     }
   }
+  if (the_first_read && len > 0) {
+    const char bom[] = {0xef, 0xbb, 0xbf};
+    if (GRN_CTX_GET_ENCODING(ctx) == GRN_ENC_UTF8 &&
+        len > 3 && !memcmp(buf, bom, 3)) {
+      memmove(buf, buf + 3, len - 3);
+      len -= 3;
+    }
+    the_first_read = GRN_FALSE;
+  }
   return len;
 }
 
@@ -631,7 +641,7 @@ do_alone(int argc, char **argv)
       if (!rc) {
         char *buf = GRN_TEXT_VALUE(&text);
         int  len;
-        while ((len = prompt(buf))) {
+        while ((len = prompt(ctx, buf))) {
           uint32_t size = len - 1;
           grn_ctx_send(ctx, buf, size, 0);
           if (ctx->stat == GRN_CTX_QUIT) { break; }
@@ -715,7 +725,7 @@ g_client(int argc, char **argv)
         char *buf = GRN_TEXT_VALUE(&text);
         int   len;
         if (batchmode) { BATCHMODE(ctx); }
-        while ((len = prompt(buf))) {
+        while ((len = prompt(ctx, buf))) {
           uint32_t size = len - 1;
           grn_ctx_send(ctx, buf, size, 0);
           rc = ctx->rc;

  Modified: test/unit/command/Makefile.am (+2 -2)
===================================================================
--- test/unit/command/Makefile.am    2010-09-04 09:39:22 +0000 (4596f5a)
+++ test/unit/command/Makefile.am    2010-09-06 06:55:19 +0000 (020194b)
@@ -1,4 +1,4 @@
 EXTRA_DIST =					\
 	test-config-file.rb			\
-	test-option-help.rb			\
-	test-option-pid-file.rb
+	test-option.rb				\
+	test-option-bom.rb

  Added: test/unit/command/test-bom.rb (+46 -0) 100644
===================================================================
--- /dev/null
+++ test/unit/command/test-bom.rb    2010-09-06 06:55:19 +0000 (e8ffa67)
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2010  Kouhei Sutou <kou****@clear*****>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+class BOMTest < Test::Unit::TestCase
+  include GroongaTestUtils
+
+  def setup
+    setup_database_path
+    @input_file = File.join(@tmp_dir, "commands")
+  end
+
+  def teardown
+    teardown_database_path
+  end
+
+  def test_no_bom
+    open(@input_file, "w") do |file|
+      file.puts("defrag")
+    end
+    assert_equal("[[0,0.0,0.0],true]\n",
+                 run_groonga("--file", @input_file, "-n", @database_path))
+  end
+
+  def test_bom
+    open(@input_file, "w") do |file|
+      file.print("\xef\xbb\xbf")
+      file.puts("defrag")
+    end
+    assert_equal("[[0,0.0,0.0],true]\n",
+                 run_groonga("--file", @input_file, "-n", @database_path))
+  end
+end




Groonga-commit メーリングリストの案内
Back to archive index