Coreseek®  
 | 首页 | 注册 | 回复 | 搜索 | 统计资料 |                 网站首页产品服务开放源码安装使用常见问题中文手册社区交流联系我们 
全文检索 论坛首页 / 全文检索 /

有没有JAVA版本的实例呀!配置文件和JAVA API调用搜索的例子!

 
catastiger
会员
#1 | 发表时间: 2010 08 19 17:27
回复 
有没有JAVA版本的实例呀!配置文件和JAVA API调用搜索的例子!
HonestQiao
会员
#2 | 发表时间: 2010 08 19 17:36
回复 
源代码的api目录
catastiger
会员
#3 | 发表时间: 2010 08 19 18:05
回复 
#源定义
source src1
{
    type                    = mysql
    sql_host                = 192.168.93.173
    sql_user                = t_develop
    sql_pass                = t_develop
    sql_db                    = twioo_develop
    sql_port                = 3306    # optional, default is 3306
    sql_query_pre                = SET NAMES UTF8
    
    #sql_query_pre = REPLACE INTO twioo_sphinx.twioo_sph_counter SELECT 1, MAX(content_id) FROM twioo_content
    sql_query                = \
        SELECT content_id,user_id, user_type as user_type , post_time as  posttime ,content_body FROM twioo_content
        #where content_id<=( SELECT max_doc_id FROM twioo_sphinx.twioo_sph_counter WHERE counter_id=1 )  
    sql_attr_uint                =posttime
    sql_attr_uint                =user_type
    #sql_attr_uint                =content_body
    sql_ranged_throttle            = 0
}


index content
{
    source            = src1
    path            = D:\sphinx/data/content
    docinfo            = extern
    mlock            = 0
    morphology        = none
    min_word_len        = 1
    html_strip        = 0
    #charset_dictpath = /usr/local/coreseek/
    #charset_type=zh_cn.utf-8
    charset_type  = utf-8

    ngram_len = 1
    min_prefix_len  = 0
    min_infix_len  = 1
    
    #stopwords   =D:\sphinx/stopwords.txt

    ngram_chars =    U+4E00..U+9FBB, U+3400..U+4DB5, U+20000..U+2A6D6, U+FA0E, U+FA0F, \
            U+FA11, U+FA13, U+FA14, U+FA1F, U+FA21, U+FA23, U+FA24, U+FA27, U+FA28, U+FA29, \
            U+3105..U+312C, U+31A0..U+31B7, U+3041, U+3043, U+3045, U+3047, U+3049, U+304B, \
            U+304D, U+304F, U+3051, U+3053, U+3055, U+3057, U+3059, U+305B, U+305D, U+305F, \
            U+3061, U+3063, U+3066, U+3068, U+306A..U+306F, U+3072, U+3075, U+3078, U+307B, \
            U+307E..U+3083, U+3085, U+3087, U+3089..U+308E, U+3090..U+3093, U+30A1, U+30A3, \
            U+30A5, U+30A7, U+30A9, U+30AD, U+30AF, U+30B3, U+30B5, U+30BB, U+30BD, U+30BF, \
            U+30C1, U+30C3, U+30C4, U+30C6, U+30CA, U+30CB, U+30CD, U+30CE, U+30DE, U+30DF, \
            U+30E1, U+30E2, U+30E3, U+30E5, U+30E7, U+30EE, U+30F0..U+30F3, U+30F5, U+30F6, \
            U+31F0, U+31F1, U+31F2, U+31F3, U+31F4, U+31F5, U+31F6, U+31F7, U+31F8, U+31F9, \
            U+31FA, U+31FB, U+31FC, U+31FD, U+31FE, U+31FF, U+AC00..U+D7A3, U+1100..U+1159, \
            U+1161..U+11A2, U+11A8..U+11F9, U+A000..U+A48C, U+A492..U+A4C6

    charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,\
                        A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,\
                        U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101,\
                        U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109,\
                        U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F,\
                        U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, \
                        U+0116->U+0117,U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D,\
                        U+011D,U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, \
                        U+0134->U+0135,U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, \
                        U+013C,U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, \
                        U+0143->U+0144,U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, \
                        U+014B,U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, \
                        U+0152->U+0153,U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159,\
                        U+0159,U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, \
                        U+0160->U+0161,U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, \
                        U+0167,U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, \
                        U+016E->U+016F,U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175,\
                        U+0175,U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, \
                        U+017B->U+017C,U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, \
                        U+0430..U+044F,U+05D0..U+05EA, U+0531..U+0556->U+0561..U+0586, U+0561..U+0587, \
                        U+0621..U+063A, U+01B9,U+01BF, U+0640..U+064A, U+0660..U+0669, U+066E, U+066F, \
                        U+0671..U+06D3, U+06F0..U+06FF,U+0904..U+0939, U+0958..U+095F, U+0960..U+0963, \
                        U+0966..U+096F, U+097B..U+097F,U+0985..U+09B9, U+09CE, U+09DC..U+09E3, U+09E6..U+09EF, \
                        U+0A05..U+0A39, U+0A59..U+0A5E,U+0A66..U+0A6F, U+0A85..U+0AB9, U+0AE0..U+0AE3, \
                        U+0AE6..U+0AEF, U+0B05..U+0B39,U+0B5C..U+0B61, U+0B66..U+0B6F, U+0B71, U+0B85..U+0BB9, \
                        U+0BE6..U+0BF2, U+0C05..U+0C39,U+0C66..U+0C6F, U+0C85..U+0CB9, U+0CDE..U+0CE3, \
                        U+0CE6..U+0CEF, U+0D05..U+0D39, U+0D60,U+0D61, U+0D66..U+0D6F, U+0D85..U+0DC6, \
                        U+1900..U+1938, U+1946..U+194F, U+A800..U+A805,U+A807..U+A822, U+0386->U+03B1, \
                        U+03AC->U+03B1, U+0388->U+03B5, U+03AD->U+03B5,U+0389->U+03B7, U+03AE->U+03B7, \
                        U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,U+03AF->U+03B9, U+03CA->U+03B9, \
                        U+038C->U+03BF, U+03CC->U+03BF, U+038E->U+03C5,U+03AB->U+03C5, U+03B0->U+03C5, \
                        U+03CB->U+03C5, U+03CD->U+03C5, U+038F->U+03C9,U+03CE->U+03C9, U+03C2->U+03C3, \
                        U+0391..U+03A1->U+03B1..U+03C1,U+03A3..U+03A9->U+03C3..U+03C9, U+03B1..U+03C1, \
                        U+03C3..U+03C9, U+0E01..U+0E2E,U+0E30..U+0E3A, U+0E40..U+0E45, U+0E47, U+0E50..U+0E59, \
                        U+A000..U+A48F, U+4E00..U+9FBF,U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF, \
                        U+2F800..U+2FA1F, U+2E80..U+2EFF,U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF, \
                        U+3040..U+309F, U+30A0..U+30FF,U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, \
                        U+3130..U+318F, U+A000..U+A48F,U+A490..U+A4CF
}


indexer
{
    mem_limit            = 128M
}

#searchd服务定义
searchd
{
    listen                = 192.168.93.147:3312
    read_timeout            = 5
    max_children            = 30
    max_matches            = 100000
    seamless_rotate            = 0
    preopen_indexes            = 0
    unlink_old            = 1
    pid_file            = D:\sphinx\log/searchd.pid
    log                =D:\sphinx\log/searchd.log
    query_log            =D:\sphinx\log/query.log
}
catastiger
会员
#4 | 发表时间: 2010 08 19 18:07
回复 
/*
* $Id: test.java 2055 2009-11-06 23:09:58Z shodan $
*/

package com.xxxxxxxxxx.action;

import java.util.Date;

import org.sphx.api.SphinxClient;
import org.sphx.api.SphinxException;
import org.sphx.api.SphinxMatch;
import org.sphx.api.SphinxResult;
import org.sphx.api.SphinxWordInfo;

/**
* Test class for sphinx API
*/
public class test
{
    public static void main ( String[] argv ) throws SphinxException
    {


        StringBuffer q = new StringBuffer();
        String host = "192.168.93.147";
        int port = 3312;
        int mode = SphinxClient.SPH_MATCH_EXTENDED;
        String index = "*";
        int offset = 0;
        int limit = 20;
        int sortMode = SphinxClient.SPH_SORT_ATTR_DESC;
        String sortClause = "posttime";
        String groupBy = "";
        String groupSort = "";

        SphinxClient cl = new SphinxClient();
    
        q.append("中信证券|浦发银行");
        cl.SetServer ( host, port );
        cl.SetWeights ( new int[] { 100, 1 } );
        cl.SetMatchMode ( mode );
        cl.SetLimits ( offset, limit );
        cl.SetSortMode ( sortMode, sortClause );
        if ( groupBy.length()>0 )
            cl.SetGroupBy ( groupBy, SphinxClient.SPH_GROUPBY_ATTR, groupSort );

        SphinxResult res = cl.Query(q.toString(), index);
        if ( res==null )
        {
            System.err.println ( "Error: " + cl.GetLastError() );
            System.exit ( 1 );
        }
        if ( cl.GetLastWarning()!=null && cl.GetLastWarning().length()>0 )
            System.out.println ( "WARNING: " + cl.GetLastWarning() + "\n" );

        /* print me out */
        System.out.println ( "Query '" + q + "' retrieved " + res.total + " of " + res.totalFound + " matches in " + res.time + " sec." );
        System.out.println ( "Query stats:" );
        for ( int i=0; i<res.words.length; i++ )
        {
            SphinxWordInfo wordInfo = res.words[i];
            System.out.println ( "\t'" + wordInfo.word + "' found " + wordInfo.hits + " times in " + wordInfo.docs + " documents" );
        }

        System.out.println ( "\nMatches:" );
        for ( int i=0; i<res.matches.length; i++ )
        {
            SphinxMatch info = res.matches[i];
            System.out.print ( (i+1) + ". id=" + info.docId + ", weight=" + info.weight );

            if ( res.attrNames==null || res.attrTypes==null )
                continue;

            for ( int a=0; a<res.attrNames.length; a++ )
            {
                System.out.print ( ", " + res.attrNames[a] + "=" );

                if ( ( res.attrTypes[a] & SphinxClient.SPH_ATTR_MULTI )!=0 )
                {
                    System.out.print ( "(" );
                    long[] attrM = (long[]) info.attrValues.get(a);
                    if ( attrM!=null )
                        for ( int j=0; j<attrM.length; j++ )
                    {
                        if ( j!=0 )
                            System.out.print ( "," );
                        System.out.print ( attrM[j] );
                    }
                    System.out.print ( ")" );

                } else
                {
                    switch ( res.attrTypes[a] )
                    {
                        case SphinxClient.SPH_ATTR_INTEGER:
                        case SphinxClient.SPH_ATTR_ORDINAL:
                        case SphinxClient.SPH_ATTR_FLOAT:
                        case SphinxClient.SPH_ATTR_BIGINT:
                            /* longs or floats; print as is */
                            System.out.print ( info.attrValues.get(a) );
                            break;

                        case SphinxClient.SPH_ATTR_TIMESTAMP:
                            Long iStamp = (Long) info.attrValues.get(a);
                            Date date = new Date ( iStamp.longValue()*1000 );
                            System.out.print ( date.toString() );
                            break;

                        default:
                            System.out.print ( "(unknown-attr-type=" + res.attrTypes[a] + ")" );
                    }
                }
            }

            System.out.println();
        }
    }
}

/*
* $Id: test.java 2055 2009-11-06 23:09:58Z shodan $
*/
catastiger
会员
#5 | 发表时间: 2010 08 19 18:12 | 修改: catastiger
回复 
上面是我的配置文件和在test.java中进行的测试,
int mode = SphinxClient.SPH_MATCH_EXTENDED;
q.append("中信证券|浦发银行");
这样到这到结果只有10几条
不知道是我上面的代码写错了还是什么原因?请高手指教一下!
#######################################
实际的业要求是,有几个主话题与子话题,要求 用sphinx把主子话题相关的都搜索出来;相当于把“主话题|子话题|子话题”作为条件搜索,把话题相关的内容搜索出来!

现在单个的查询”中信证券“就会得到很多数据,中信证券|浦发银行 这两个一起就没几行数据
catastiger
会员
#6 | 发表时间: 2010 08 20 09:09
回复 
没有人回答么?
HonestQiao
会员
#7 | 发表时间: 2010 08 20 10:24
回复 
(中信证券)|(浦发银行)

这么看看结果如何呢?
catastiger
会员
#8 | 发表时间: 2010 08 20 14:36
回复 
谢谢,HonestQiao,现在好了,但为什么要加上()呢?
HonestQiao
会员
#9 | 发表时间: 2010 08 21 21:52
回复 
|会优先处理
catastiger
会员
#10 | 发表时间: 2010 09 01 10:06
回复 
就上面的配置文件,用java版本的api下提供的test.java,搜索“a股”,好像只查了“股”这个关键字,
结果如下:
Query 'a股' retrieved 508 of 508 matches in 0.058 sec.
Query stats:
    '股' found 371200 times in 308161 documents
而我单独查“股”,如果如下:
Query '股' retrieved 508 of 508 matches in 0.058 sec.
Query stats:
    '股' found 371200 times in 308161 documents
两个结果好像完全一样的,现在问题是
1.为什么查a股,只查了股呢?
2.如果要查a股一起,要怎么处理,是不是我配置文件那里写错了?
HonestQiao
会员
#11 | 发表时间: 2010 09 01 14:13
回复 
你使用的是一元切分而非中文分词模式,可能将单子字符丢弃了。

你可以参考本页面顶部导航的安装使用,设置中文分词。
我们实际测试,最新版本完全正常。
catastiger
会员
#12 | 发表时间: 2010 09 01 14:51
回复 
等一下试一下看看!谢谢!
 
回复
Bold Style  Italic Style  Image 链接  URL 链接 
发帖注意:
  • 网址中请去掉http://开头,例如:您需要输入www.coreseek.cn,而不是http://www.coreseek.cn
  • 咨询问题,请贴出详细的操作系统版本、Coreseek版本(Linux环境请给出编译参数)
  • 请仔细查看中文手册和本站安装指南,确认操作正确
  • 请仔细查看常见问题解答,也许你的问题已经有解决方法

» 帐号  » 密码 
发帖前请登陆, 或者 注册 .