csft.conf 配置为
# # Sphinx configuration file sample # # WARNING! While this sample file mentions all available options, # it contains (very) short helper descriptions only. Please refer to # doc/sphinx.html for details. #
############################################################################# ## data source definition #############################################################################
source src1 { # data source type. mandatory, no default value # known types are 'mysql', 'pgsql', 'mssql', 'xmlpipe', 'xmlpipe2' type = mysql
##################################################################### ## SQL settings (for 'mysql' and 'pgsql' types) #####################################################################
# some straightforward parameters for SQL source types sql_host = localhost sql_user = root sql_pass = 123456 sql_db = test sql_port = 3306 # optional, default is 3306
# UNIX socket name # optional, default is empty (reuse client library defaults) # usually '/var/lib/mysql/mysql.sock' on Linux # usually '/tmp/mysql.sock' on FreeBSD # # sql_sock = /tmp/mysql.sock
# MySQL specific client connection flags # optional, default is 0 # # mysql_connect_flags = 32 # enable compression
# MS SQL specific Windows authentication mode flag # MUST be in sync with charset_type index-level setting # optional, default is 0 # # mssql_winauth = 1 # use currently logged on user credentials
# MS SQL specific Unicode indexing flag # optional, default is 0 (request SBCS data) # # mssql_unicode = 1 # request Unicode data from server
# pre-query, executed before the main fetch query # multi-value, optional, default is empty list of queries # sql_query_pre = SET NAMES utf8 # sql_query_pre = SET SESSION query_cache_type=OFF
# main document fetch query # mandatory, integer document ID field MUST be the first selected column sql_query = \ SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ FROM documents
# range query setup, query that must return min and max ID values # optional, default is empty # # sql_query will need to reference $start and $end boundaries # if using ranged query: # # sql_query = \ # SELECT doc.id, doc.id AS group, doc.title, doc.data \ # FROM documents doc \ # WHERE id>=$start AND id<=$end # # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
# range query step # optional, default is 1024 # # sql_range_step = 1000
# unsigned integer attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # optional bit size can be specified, default is 32 # # sql_attr_uint = author_id # sql_attr_uint = forum_id:9 # 9 bits for forum_id sql_attr_uint = group_id
# boolean attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # equivalent to sql_attr_uint with 1-bit size # # sql_attr_bool = is_deleted
# bigint attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # declares a signed (unlike uint!) 64-bit attribute # # sql_attr_bigint = my_bigint_id
# UNIX timestamp attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # similar to integer, but can also be used in date functions # # sql_attr_timestamp = posted_ts # sql_attr_timestamp = last_edited_ts sql_attr_timestamp = date_added
# string ordinal attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # sorts strings (bytewise), and stores their indexes in the sorted list # sorting by this attr is equivalent to sorting by the original strings # # sql_attr_str2ordinal = author_name
# floating point attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # values are stored in single precision, 32-bit IEEE 754 format # # sql_attr_float = lat_radians # sql_attr_float = long_radians
# multi-valued attribute (MVA) attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # MVA values are variable length lists of unsigned 32-bit integers # # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] # ATTR-TYPE is 'uint' or 'timestamp' # SOURCE-TYPE is 'field', 'query', or 'ranged-query' # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range' # # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags # sql_attr_multi = uint tag from ranged-query; \ # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \ # SELECT MIN(id), MAX(id) FROM tags
# post-query, executed on sql_query completion # optional, default is empty # # sql_query_post =
# post-index-query, executed on successful indexing completion # optional, default is empty # $maxid expands to max document ID actually fetched from DB # # sql_query_post_index = REPLACE INTO counters ( id, val ) \ # VALUES ( 'max_indexed_id', $maxid )
# ranged query throttling, in milliseconds # optional, default is 0 which means no delay # enforces given delay before each query step sql_ranged_throttle = 0
# document info query, ONLY for CLI search (ie. testing and debugging) # optional, default is empty # must contain $id macro and must fetch the document by that id sql_query_info = SELECT * FROM documents WHERE id=$id
# kill-list query, fetches the document IDs for kill-list # k-list will suppress matches from preceding indexes in the same query # optional, default is empty # # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
# columns to unpack on indexer side when indexing # multi-value, optional, default is empty list # # unpack_zlib = zlib_column # unpack_mysqlcompress = compressed_column # unpack_mysqlcompress = compressed_column_2
# maximum unpacked length allowed in MySQL COMPRESS() unpacker # optional, default is 16M # # unpack_mysqlcompress_maxsize = 16M
##################################################################### ## xmlpipe settings #####################################################################
# type = xmlpipe
# shell command to invoke xmlpipe stream producer # mandatory # # xmlpipe_command = cat @CONFDIR@/test.xml
##################################################################### ## xmlpipe2 settings #####################################################################
# type = xmlpipe2 # xmlpipe_command = cat @CONFDIR@/test2.xml
# xmlpipe2 field declaration # multi-value, optional, default is empty # # xmlpipe_field = subject # xmlpipe_field = content
# xmlpipe2 attribute declaration # multi-value, optional, default is empty # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX # # xmlpipe_attr_timestamp = published # xmlpipe_attr_uint = author_id }
# inherited source example # # all the parameters are copied from the parent source, # and may then be overridden in this source definition source src1throttled : src1 { sql_ranged_throttle = 100 }
############################################################################# ## index definition #############################################################################
# local index example # # this is an index which is stored locally in the filesystem # # all indexing-time options (such as morphology and charsets) # are configured per local index index test1 { # document source(s) to index # multi-value, mandatory # document IDs must be globally unique across all sources source = src1
# index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended path = D:/csft/data/test1
# document attribute values (docinfo) storage mode # optional, default is 'extern' # known values are 'none', 'extern' and 'inline' docinfo = extern
# memory locking for cached data (.spa and .spi), to prevent swapping # optional, default is 0 (do not mlock) # requires searchd to be run from root mlock = 0
# a list of morphology preprocessors to apply # optional, default is empty # # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', # 'soundex', and 'metaphone'; additional preprocessors available from # libstemmer are 'libstemmer_XXX', where XXX is algorithm code # (see libstemmer_c/libstemmer/modules.txt) # # morphology = stem_en, stem_ru, soundex # morphology = libstemmer_german # morphology = libstemmer_sv morphology = none
# minimum word length at which to enable stemming # optional, default is 1 (stem everything) # # min_stemming_len = 1
# stopword files list (space separated) # optional, default is empty # contents are plain text, charset_table and stemming are both applied # # stopwords = @CONFDIR@/data/stopwords.txt
# wordforms file, in "mapfrom > mapto" plain text format # optional, default is empty # # wordforms = @CONFDIR@/data/wordforms.txt
# tokenizing exceptions file # optional, default is empty # # plain text, case sensitive, space insensitive in map-from part # one "Map Several Words => ToASingleOne" entry per line # # exceptions = @CONFDIR@/data/exceptions.txt
# minimum indexed word length # default is 1 (index everything) min_word_len = 1
# charset encoding type # optional, default is 'sbcs' # known types are 'sbcs' (Single Byte CharSet) and 'utf-8' charset_type = zh_cn.utf-8 charset_dictpath = D:\csft3.1\bin\uni.lib
# charset definition and case folding rules "table" # optional, default value depends on charset_type # # defaults are configured to include English and Russian characters only # you need to change the table to include additional ones # this behavior MAY change i |