Coreseek®  
 | 首页 | 注册 | 回复 | 搜索 | 统计资料 |                 网站首页产品服务开放源码安装使用常见问题中文手册社区交流联系我们 
中文分词 论坛首页 / 中文分词 /

部署后 执行indexer --all 出现 Unigram dictionary load Error错误

 
liu12358
会员
#1 | 发表时间: 2009 12 24 12:49
回复 
csft.conf 配置为

#
# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#

#############################################################################
## data source definition
#############################################################################

source src1
{
    # data source type. mandatory, no default value
    # known types are 'mysql', 'pgsql', 'mssql', 'xmlpipe', 'xmlpipe2'
    type                    = mysql

    #####################################################################
    ## SQL settings (for 'mysql' and 'pgsql' types)
    #####################################################################

    # some straightforward parameters for SQL source types
    sql_host                = localhost
    sql_user                = root
    sql_pass                = 123456
    sql_db                    = test
    sql_port                = 3306    # optional, default is 3306

    # UNIX socket name
    # optional, default is empty (reuse client library defaults)
    # usually '/var/lib/mysql/mysql.sock' on Linux
    # usually '/tmp/mysql.sock' on FreeBSD
    #
    # sql_sock                = /tmp/mysql.sock


    # MySQL specific client connection flags
    # optional, default is 0
    #
    # mysql_connect_flags    = 32 # enable compression


    # MS SQL specific Windows authentication mode flag
    # MUST be in sync with charset_type index-level setting
    # optional, default is 0
    #
    # mssql_winauth            = 1 # use currently logged on user credentials


    # MS SQL specific Unicode indexing flag
    # optional, default is 0 (request SBCS data)
    #
    # mssql_unicode            = 1 # request Unicode data from server


    # pre-query, executed before the main fetch query
    # multi-value, optional, default is empty list of queries
    #
    sql_query_pre            = SET NAMES utf8
    # sql_query_pre            = SET SESSION query_cache_type=OFF


    # main document fetch query
    # mandatory, integer document ID field MUST be the first selected column
    sql_query                = \
        SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
        FROM documents

    # range query setup, query that must return min and max ID values
    # optional, default is empty
    #
    # sql_query will need to reference $start and $end boundaries
    # if using ranged query:
    #
    # sql_query                = \
    #    SELECT doc.id, doc.id AS group, doc.title, doc.data \
    #    FROM documents doc \
    #    WHERE id>=$start AND id<=$end
    #
    # sql_query_range        = SELECT MIN(id),MAX(id) FROM documents


    # range query step
    # optional, default is 1024
    #
    # sql_range_step        = 1000


    # unsigned integer attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # optional bit size can be specified, default is 32
    #
    # sql_attr_uint            = author_id
    # sql_attr_uint            = forum_id:9 # 9 bits for forum_id
    sql_attr_uint            = group_id

    # boolean attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # equivalent to sql_attr_uint with 1-bit size
    #
    # sql_attr_bool            = is_deleted


    # bigint attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # declares a signed (unlike uint!) 64-bit attribute
    #
    # sql_attr_bigint            = my_bigint_id


    # UNIX timestamp attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # similar to integer, but can also be used in date functions
    #
    # sql_attr_timestamp    = posted_ts
    # sql_attr_timestamp    = last_edited_ts
    sql_attr_timestamp        = date_added

    # string ordinal attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # sorts strings (bytewise), and stores their indexes in the sorted list
    # sorting by this attr is equivalent to sorting by the original strings
    #
    # sql_attr_str2ordinal    = author_name


    # floating point attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # values are stored in single precision, 32-bit IEEE 754 format
    #
    # sql_attr_float = lat_radians
    # sql_attr_float = long_radians


    # multi-valued attribute (MVA) attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # MVA values are variable length lists of unsigned 32-bit integers
    #
    # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
    # ATTR-TYPE is 'uint' or 'timestamp'
    # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
    # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
    # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
    #
    # sql_attr_multi    = uint tag from query; SELECT id, tag FROM tags
    # sql_attr_multi    = uint tag from ranged-query; \
    #    SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
    #    SELECT MIN(id), MAX(id) FROM tags


    # post-query, executed on sql_query completion
    # optional, default is empty
    #
    # sql_query_post        =

    
    # post-index-query, executed on successful indexing completion
    # optional, default is empty
    # $maxid expands to max document ID actually fetched from DB
    #
    # sql_query_post_index = REPLACE INTO counters ( id, val ) \
    #    VALUES ( 'max_indexed_id', $maxid )


    # ranged query throttling, in milliseconds
    # optional, default is 0 which means no delay
    # enforces given delay before each query step
    sql_ranged_throttle    = 0

    # document info query, ONLY for CLI search (ie. testing and debugging)
    # optional, default is empty
    # must contain $id macro and must fetch the document by that id
    sql_query_info        = SELECT * FROM documents WHERE id=$id

    # kill-list query, fetches the document IDs for kill-list
    # k-list will suppress matches from preceding indexes in the same query
    # optional, default is empty
    #
    # sql_query_killlist    = SELECT id FROM documents WHERE edited>=@last_reindex


    # columns to unpack on indexer side when indexing
    # multi-value, optional, default is empty list
    #
    # unpack_zlib = zlib_column
    # unpack_mysqlcompress = compressed_column
    # unpack_mysqlcompress = compressed_column_2


    # maximum unpacked length allowed in MySQL COMPRESS() unpacker
    # optional, default is 16M
    #
    # unpack_mysqlcompress_maxsize = 16M


    #####################################################################
    ## xmlpipe settings
    #####################################################################

    # type                = xmlpipe

    # shell command to invoke xmlpipe stream producer
    # mandatory
    #
    # xmlpipe_command    = cat @CONFDIR@/test.xml

    #####################################################################
    ## xmlpipe2 settings
    #####################################################################

    # type                = xmlpipe2
    # xmlpipe_command    = cat @CONFDIR@/test2.xml


    # xmlpipe2 field declaration
    # multi-value, optional, default is empty
    #
    # xmlpipe_field                = subject
    # xmlpipe_field                = content


    # xmlpipe2 attribute declaration
    # multi-value, optional, default is empty
    # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
    #
    # xmlpipe_attr_timestamp    = published
    # xmlpipe_attr_uint            = author_id
}


# inherited source example
#
# all the parameters are copied from the parent source,
# and may then be overridden in this source definition
source src1throttled : src1
{
    sql_ranged_throttle            = 100
}

#############################################################################
## index definition
#############################################################################

# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index test1
{
    # document source(s) to index
    # multi-value, mandatory
    # document IDs must be globally unique across all sources
    source            = src1

    # index files path and file name, without extension
    # mandatory, path must be writable, extensions will be auto-appended
    path            = D:/csft/data/test1

    # document attribute values (docinfo) storage mode
    # optional, default is 'extern'
    # known values are 'none', 'extern' and 'inline'
    docinfo            = extern

    # memory locking for cached data (.spa and .spi), to prevent swapping
    # optional, default is 0 (do not mlock)
    # requires searchd to be run from root
    mlock            = 0

    # a list of morphology preprocessors to apply
    # optional, default is empty
    #
    # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
    # 'soundex', and 'metaphone'; additional preprocessors available from
    # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
    # (see libstemmer_c/libstemmer/modules.txt)
    #
    # morphology     = stem_en, stem_ru, soundex
    # morphology    = libstemmer_german
    # morphology    = libstemmer_sv
    morphology        = none

    # minimum word length at which to enable stemming
    # optional, default is 1 (stem everything)
    #
    # min_stemming_len    = 1


    # stopword files list (space separated)
    # optional, default is empty
    # contents are plain text, charset_table and stemming are both applied
    #
    # stopwords            = @CONFDIR@/data/stopwords.txt


    # wordforms file, in "mapfrom > mapto" plain text format
    # optional, default is empty
    #
    # wordforms            = @CONFDIR@/data/wordforms.txt


    # tokenizing exceptions file
    # optional, default is empty
    #
    # plain text, case sensitive, space insensitive in map-from part
    # one "Map Several Words => ToASingleOne" entry per line
    #
    # exceptions        = @CONFDIR@/data/exceptions.txt


    # minimum indexed word length
    # default is 1 (index everything)
    min_word_len        = 1

    # charset encoding type
    # optional, default is 'sbcs'
    # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
    charset_type = zh_cn.utf-8
    charset_dictpath = D:\csft3.1\bin\uni.lib

    # charset definition and case folding rules "table"
    # optional, default value depends on charset_type
    #
    # defaults are configured to include English and Russian characters only
    # you need to change the table to include additional ones
    # this behavior MAY change i
liu12358
会员
#2 | 发表时间: 2009 12 24 12:51
回复 
请知道的说下问题的原因哦,在线等Unigram dictionary load Error Unigram dictionary load Error
liu12358
会员
#3 | 发表时间: 2009 12 24 13:05
回复 
疯了,问题已解决,路径错误
reboot
会员
#4 | 发表时间: 2010 08 26 17:33
回复 
哪个路径错了?我也出现这种情况了!请说明,检查了我的路径,应该没问题的
HonestQiao
会员
#5 | 发表时间: 2010 08 27 11:23
回复 
 
回复
Bold Style  Italic Style  Image 链接  URL 链接 
发帖注意:
  • 网址中请去掉http://开头,例如:您需要输入www.coreseek.cn,而不是http://www.coreseek.cn
  • 咨询问题,请贴出详细的操作系统版本、Coreseek版本(Linux环境请给出编译参数)
  • 请仔细查看中文手册和本站安装指南,确认操作正确
  • 请仔细查看常见问题解答,也许你的问题已经有解决方法

» 帐号  » 密码 
发帖前请登陆, 或者 注册 .