[Groonga-commit] groonga/groonga [master] doc: add about snippet_html function

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Nov 22 15:12:12 JST 2012


Kouhei Sutou	2012-11-22 15:12:12 +0900 (Thu, 22 Nov 2012)

  New Revision: afe65fb5245e1de6b8a4e848d569f865cad02300
  https://github.com/groonga/groonga/commit/afe65fb5245e1de6b8a4e848d569f865cad02300

  Log:
    doc: add about snippet_html function

  Added files:
    doc/source/example/reference/functions/snippet_html/usage.log
    doc/source/example/reference/functions/snippet_html/usage_basic.log
    doc/source/example/reference/functions/snippet_html/usage_setup.log
    doc/source/example/reference/functions/snippet_html/usage_string_literal.log
    doc/source/reference/functions/snippet_html.txt
  Modified files:
    doc/files.am

  Modified: doc/files.am (+12 -0)
===================================================================
--- doc/files.am    2012-11-22 14:41:00 +0900 (5657ed4)
+++ doc/files.am    2012-11-22 15:12:12 +0900 (cf08257)
@@ -73,6 +73,10 @@ absolute_source_files = \
 	$(top_srcdir)/doc/source/example/reference/functions/geo_distance_location_sphere.log \
 	$(top_srcdir)/doc/source/example/reference/functions/geo_distance_setup_distance.log \
 	$(top_srcdir)/doc/source/example/reference/functions/geo_distance_setup_location.log \
+	$(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage.log \
+	$(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage_basic.log \
+	$(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage_setup.log \
+	$(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage_string_literal.log \
 	$(top_srcdir)/doc/source/example/reference/grn_expr/query_syntax/setup.log \
 	$(top_srcdir)/doc/source/example/reference/grn_expr/query_syntax/simple_equal.log \
 	$(top_srcdir)/doc/source/example/reference/grn_expr/query_syntax/simple_full_text_search.log \
@@ -341,6 +345,7 @@ absolute_source_files = \
 	$(top_srcdir)/doc/source/reference/functions/geo_in_rectangle.txt \
 	$(top_srcdir)/doc/source/reference/functions/now.txt \
 	$(top_srcdir)/doc/source/reference/functions/rand.txt \
+	$(top_srcdir)/doc/source/reference/functions/snippet_html.txt \
 	$(top_srcdir)/doc/source/reference/grn_expr.txt \
 	$(top_srcdir)/doc/source/reference/grn_expr/query_syntax.txt \
 	$(top_srcdir)/doc/source/reference/grn_expr/script_syntax.txt \
@@ -471,6 +476,10 @@ source_files_relative_from_doc_dir = \
 	source/example/reference/functions/geo_distance_location_sphere.log \
 	source/example/reference/functions/geo_distance_setup_distance.log \
 	source/example/reference/functions/geo_distance_setup_location.log \
+	source/example/reference/functions/snippet_html/usage.log \
+	source/example/reference/functions/snippet_html/usage_basic.log \
+	source/example/reference/functions/snippet_html/usage_setup.log \
+	source/example/reference/functions/snippet_html/usage_string_literal.log \
 	source/example/reference/grn_expr/query_syntax/setup.log \
 	source/example/reference/grn_expr/query_syntax/simple_equal.log \
 	source/example/reference/grn_expr/query_syntax/simple_full_text_search.log \
@@ -739,6 +748,7 @@ source_files_relative_from_doc_dir = \
 	source/reference/functions/geo_in_rectangle.txt \
 	source/reference/functions/now.txt \
 	source/reference/functions/rand.txt \
+	source/reference/functions/snippet_html.txt \
 	source/reference/grn_expr.txt \
 	source/reference/grn_expr/query_syntax.txt \
 	source/reference/grn_expr/script_syntax.txt \
@@ -955,6 +965,7 @@ html_files_relative_from_locale_dir = \
 	html/_sources/reference/functions/geo_in_rectangle.txt \
 	html/_sources/reference/functions/now.txt \
 	html/_sources/reference/functions/rand.txt \
+	html/_sources/reference/functions/snippet_html.txt \
 	html/_sources/reference/grn_expr.txt \
 	html/_sources/reference/grn_expr/query_syntax.txt \
 	html/_sources/reference/grn_expr/script_syntax.txt \
@@ -1104,6 +1115,7 @@ html_files_relative_from_locale_dir = \
 	html/reference/functions/geo_in_rectangle.html \
 	html/reference/functions/now.html \
 	html/reference/functions/rand.html \
+	html/reference/functions/snippet_html.html \
 	html/reference/grn_expr.html \
 	html/reference/grn_expr/query_syntax.html \
 	html/reference/grn_expr/script_syntax.html \

  Added: doc/source/example/reference/functions/snippet_html/usage.log (+28 -0) 100644
===================================================================
--- /dev/null
+++ doc/source/example/reference/functions/snippet_html/usage.log    2012-11-22 15:12:12 +0900 (62b7501)
@@ -0,0 +1,28 @@
+Execution example::
+
+  select Documents --output_columns 'snippet_html("Groonga is very fast fulltext search engine.")' --command_version 2 --match_columns content --query "fast performance"
+  # [
+  #   [
+  #     0, 
+  #     1337566253.89858, 
+  #     0.000355720520019531
+  #   ], 
+  #   [
+  #     [
+  #       [
+  #         1
+  #       ], 
+  #       [
+  #         [
+  #           "snippet_html", 
+  #           "null"
+  #         ]
+  #       ], 
+  #       [
+  #         [
+  #           "Groonga is very <span class=\"keyword\">fast</span> fulltext search engine."
+  #         ]
+  #       ]
+  #     ]
+  #   ]
+  # ]

  Added: doc/source/example/reference/functions/snippet_html/usage_basic.log (+29 -0) 100644
===================================================================
--- /dev/null
+++ doc/source/example/reference/functions/snippet_html/usage_basic.log    2012-11-22 15:12:12 +0900 (423d3fe)
@@ -0,0 +1,29 @@
+Execution example::
+
+  select Documents --output_columns "snippet_html(content)" --command_version 2 --match_columns content --query "fast performance"
+  # [
+  #   [
+  #     0, 
+  #     1337566253.89858, 
+  #     0.000355720520019531
+  #   ], 
+  #   [
+  #     [
+  #       [
+  #         1
+  #       ], 
+  #       [
+  #         [
+  #           "snippet_html", 
+  #           "null"
+  #         ]
+  #       ], 
+  #       [
+  #         [
+  #           "Groonga is a <span class=\"keyword\">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of groonga is that a newly registered document instantly appears in search results. Also, gro", 
+  #           "onga allows updates without read locks. These characteristics result in superior <span class=\"keyword\">performance</span> on real-time applications."
+  #         ]
+  #       ]
+  #     ]
+  #   ]
+  # ]

  Added: doc/source/example/reference/functions/snippet_html/usage_setup.log (+17 -0) 100644
===================================================================
--- /dev/null
+++ doc/source/example/reference/functions/snippet_html/usage_setup.log    2012-11-22 15:12:12 +0900 (d696b2b)
@@ -0,0 +1,17 @@
+Execution example::
+
+  table_create Documents TABLE_NO_KEY
+  # [[0, 1337566253.89858, 0.000355720520019531], true]
+  column_create Documents content COLUMN_SCALAR Text
+  # [[0, 1337566253.89858, 0.000355720520019531], true]
+  table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram
+  # [[0, 1337566253.89858, 0.000355720520019531], true]
+  column_create Terms documents_content_index COLUMN_INDEX|WITH_POSITION Documents content
+  # [[0, 1337566253.89858, 0.000355720520019531], true]
+  load --table Documents
+  [
+  ["content"],
+  ["Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of groonga is that a newly registered document instantly appears in search results. Also, groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."],
+  ["Groonga is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, groonga can cover weakness of row-oriented systems."]
+  ]
+  # [[0, 1337566253.89858, 0.000355720520019531], 2]

  Added: doc/source/example/reference/functions/snippet_html/usage_string_literal.log (+28 -0) 100644
===================================================================
--- /dev/null
+++ doc/source/example/reference/functions/snippet_html/usage_string_literal.log    2012-11-22 15:12:12 +0900 (62b7501)
@@ -0,0 +1,28 @@
+Execution example::
+
+  select Documents --output_columns 'snippet_html("Groonga is very fast fulltext search engine.")' --command_version 2 --match_columns content --query "fast performance"
+  # [
+  #   [
+  #     0, 
+  #     1337566253.89858, 
+  #     0.000355720520019531
+  #   ], 
+  #   [
+  #     [
+  #       [
+  #         1
+  #       ], 
+  #       [
+  #         [
+  #           "snippet_html", 
+  #           "null"
+  #         ]
+  #       ], 
+  #       [
+  #         [
+  #           "Groonga is very <span class=\"keyword\">fast</span> fulltext search engine."
+  #         ]
+  #       ]
+  #     ]
+  #   ]
+  # ]

  Added: doc/source/reference/functions/snippet_html.txt (+120 -0) 100644
===================================================================
--- /dev/null
+++ doc/source/reference/functions/snippet_html.txt    2012-11-22 15:12:12 +0900 (859fccd)
@@ -0,0 +1,120 @@
+.. -*- rst -*-
+
+.. highlightlang:: none
+
+.. groonga-command
+.. database: functions_snippet_html
+
+snippet_html
+============
+
+.. caution::
+
+   This feature is experimental. API will be changed.
+
+Summary
+-------
+
+``snippet_html`` generates snippets (``KWIC``. ``KeyWord In Context``)
+of search keywords. The snippets are prepared for embedding
+HTML. Special characters such as ``<`` and ``>`` are escapsed as
+``&lt;`` and ``&gt;``. Keyword is surrounded with ``<span
+class="keyword">`` and ``</span>``. For example, a snippet of ``I am a
+groonga user. <3`` for keyword ``groonga`` is ``I am a <span
+class="keyword">groonga</span> user. &lt;3``.
+
+Syntax
+------
+
+``snippet_html`` has only one parameter::
+
+  snippet_html(column)
+
+``snippet_html`` has many parameters internally but they can't be
+specified for now. You will be able to custom those parameters soon.
+
+Usage
+-----
+
+Here are a schema definition and sample data to show usage.
+
+.. groonga-command
+.. include:: ../../example/reference/functions/snippet_html/usage_setup.log
+.. table_create Documents TABLE_NO_KEY
+.. column_create Documents content COLUMN_SCALAR Text
+.. table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram
+.. column_create Terms documents_content_index COLUMN_INDEX|WITH_POSITION Documents content
+.. load --table Documents
+.. [
+.. ["content"],
+.. ["Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of groonga is that a newly registered document instantly appears in search results. Also, groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."],
+.. ["Groonga is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, groonga can cover weakness of row-oriented systems."]
+.. ]
+
+``snippet_html`` can be used in only ``--output_columns`` in
+:doc:`/reference/commands/select`.
+
+You need to specify ``--command_version 2`` argument explicitly
+because function call in ``--output_columns`` is experimental feature
+in groonga 2.0.9. It will be enabled by default soon.
+
+You als oneed to specify ``--query`` and/or ``--filter``. Keywords are
+extracted from ``--query`` and ``--filter`` arguments.
+
+The following example uses ``--query "fast performance"``. In this
+case, ``fast`` and ``performance`` are used as keywords.
+
+.. groonga-command
+.. include:: ../../example/reference/functions/snippet_html/usage_basic.log
+.. select Documents --output_columns "snippet_html(content)" --command_version 2 --match_columns content --query "fast performance"
+
+``--query "fast performance"`` matches to only the first record's
+content. ``snippet_html(content)`` extracts two text parts that
+include the keywords ``fast`` or ``performance`` and surronds the
+keywords with ``<span class="keyword">`` and ``</span>``.
+
+The max number of text parts is 3. If there are 4 or more text parts
+that include the keywords, only the leading 3 parts are only used.
+
+The max size of a text part is 200 bytes. The unit is bytes not
+chracters. The size doesn't include inserted ``<span keyword="span">``
+and ``</span>``.
+
+Both the max number of text parts and the max size of a text part
+aren't customizable.
+
+You can specify string literal instead of column.
+
+.. groonga-command
+.. include:: ../../example/reference/functions/snippet_html/usage_string_literal.log
+.. select Documents --output_columns 'snippet_html("Groonga is very fast fulltext search engine.")' --command_version 2 --match_columns content --query "fast performance"
+
+Return value
+------------
+
+``snippet_html`` returns an array of string. An element of array is a
+snippet::
+
+  [SNIPPET1, SNIPPET2, SNIPPET3]
+
+A snippet includes one or more keywords. The max byte size of a
+snippet except ``<span keyword="span">`` and ``</span>`` is 200
+byte. The unit isn't the number of chracters.
+
+The array size is larger than or equal to 1 and less than or equal
+to 3. The max size 3 will be customizable soon.
+
+TODO
+----
+
+* Make the max number of text parts customizable.
+* Make the max size of a text part customizable.
+* Make keywords customizable.
+* Make tag that surrounds a keyword customizable.
+* Make normalization customizable.
+* Support options by object literal.
+
+See also
+--------
+
+* :doc:`/reference/commands/select`
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index