00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00040
00041 #ifndef __SPHINXAPI_H__
00042 #define __SPHINXAPI_H__
00043
00044 #include <sphinxclient/sphinxclientquery.h>
00045 #include <sphinxclient/value.h>
00046 #include <sphinxclient/globals_public.h>
00047
00048 #include <sstream>
00049 #include <string>
00050 #include <vector>
00051 #include <list>
00052 #include <map>
00053 #include <stdint.h>
00054
00055 #include "error.h"
00056
00057 namespace Sphinx
00058 {
00059
00060
00061
00062
00063
00064 class Query_t;
00065 class Client_t;
00066 class Filter_t;
00067
00068
00069 #define DEFAULT_CONNECT_RETRIES 1
00070 #define CONNECT_RETRY_WAIT_DEFAULT_MS 300
00071
00072
00073
00077 enum MatchMode_t {
00078 SPH_MATCH_ALL = 0,
00080 SPH_MATCH_ANY = 1,
00082 SPH_MATCH_PHRASE = 2,
00084 SPH_MATCH_BOOLEAN = 3,
00087 SPH_MATCH_EXTENDED = 4,
00089 SPH_MATCH_FULLSCAN = 5,
00091
00092 SPH_MATCH_EXTENDED2 = 6,
00094 SPH_MATCH_SZN = 7};
00095
00099 enum SortMode_t { SPH_SORT_RELEVANCE = 0,
00100 SPH_SORT_DATE_DESC = 1,
00101
00102 SPH_SORT_DATE_ASC = 2,
00103
00104 SPH_SORT_TIME_SEGMENTS = 3,
00106 SPH_SORT_EXTENDED = 4,
00108 SPH_SORT_EXPR = 5,
00109 SPH_SORT_SZN = 100
00110 };
00111
00115 enum RankingMode_t {
00116
00117 SPH_RANK_PROXIMITY_BM25 = 0,
00119
00120 SPH_RANK_BM25 = 1,
00122 SPH_RANK_NONE = 2,
00124
00125 SPH_RANK_WORDCOUNT = 3,
00127 SPH_RANK_PROXIMITY = 4,
00129 SPH_RANK_MATCHANY = 5,
00131
00132 SPH_RANK_FIELDMASK = 6,
00133 SPH_RANK_SPH04 = 7,
00134 SPH_RANK_EXPR = 8,
00135 SPH_RANK_TOTAL = 9
00136 };
00137
00143 enum AttributeType_t { SPH_ATTR_INTEGER = 1,
00144 SPH_ATTR_TIMESTAMP = 2,
00145 SPH_ATTR_ORDINAL = 3,
00146 SPH_ATTR_BOOL = 4,
00147 SPH_ATTR_FLOAT = 5,
00148 SPH_ATTR_BIGINT = 6,
00149 SPH_ATTR_STRING = 7,
00150 SPH_ATTR_MULTI_FLAG = 0x40000000,
00151 SPH_ATTR_MULTI = SPH_ATTR_MULTI_FLAG | 1,
00152 SPH_ATTR_MULTI64 = SPH_ATTR_MULTI_FLAG | 2
00153 };
00154
00160 enum GroupFunction_t { SPH_GROUPBY_DAY = 0,
00161 SPH_GROUPBY_WEEK = 1,
00162 SPH_GROUPBY_MONTH = 2,
00163 SPH_GROUPBY_YEAR = 3,
00164 SPH_GROUPBY_ATTR = 4,
00165 SPH_GROUPBY_ATTRPAIR = 5 };
00166
00167
00176 class ConnectionConfig_t
00177 {
00179 struct PrivateData_t;
00181 PrivateData_t *d;
00182 public:
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195 ConnectionConfig_t(const std::string &host = "localhost",
00196 unsigned short port = 3312,
00197 bool keepAlive = true,
00198 int32_t connectTimeout = 1000,
00199 int32_t readTimeout = 3000,
00200 int32_t writeTimeout = 3000,
00201 int32_t connectRetriesCount = DEFAULT_CONNECT_RETRIES,
00202 int32_t connectRetryWait = CONNECT_RETRY_WAIT_DEFAULT_MS);
00203
00204
00205 ConnectionConfig_t(const ConnectionConfig_t &from);
00206
00207
00208 ConnectionConfig_t &operator=(const ConnectionConfig_t &from);
00209
00210 ~ConnectionConfig_t();
00211
00212
00213 const std::string &getHost() const;
00214 unsigned short getPort() const;
00215 bool getKeepAlive() const;
00216 int32_t getConnectTimeout() const;
00217 int32_t getReadTimeout() const;
00218 int32_t getWriteTimeout() const;
00219 int32_t getConnectRetriesCount() const;
00220 int32_t getConnectRetryWait() const;
00221 };
00222
00223
00227 struct GeoAnchorPoint_t {
00228 GeoAnchorPoint_t() {}
00229 GeoAnchorPoint_t(const std::string &laAtt, const std::string &loAtt,
00230 float lattitude, float longitude)
00231 : lattitudeAttributeName(laAtt), longitudeAttributeName(loAtt),
00232 lattitude(lattitude), longitude(longitude) {}
00233
00235 std::string lattitudeAttributeName, longitudeAttributeName;
00237 float lattitude, longitude;
00238 };
00239
00247 struct SearchConfig_t
00248 {
00251 SearchConfig_t(const SearchConfig_t &from);
00252
00255 SearchConfig_t &operator= (const SearchConfig_t &from);
00256
00260 SearchConfig_t(SearchCommandVersion_t cmdVer = VER_COMMAND_SEARCH_2_0_5);
00261
00265 ~SearchConfig_t();
00266
00271 void addRangeFilter(const std::string &attrName, uint64_t minValue,
00272 uint64_t maxValue, bool excludeFlag=false);
00277 void addEnumFilter(const std::string &attrName, const Int64Array_t &values,
00278 bool excludeFlag=false);
00283 void addEnumFilter(const std::string &attrName, const IntArray_t &values,
00284 bool excludeFlag=false);
00289 void addFloatRangeFilter(const std::string &attrName, float minValue,
00290 float maxValue, bool excludeFlag=false);
00291
00298 void addAttributeOverride(const std::string &attrName,
00299 AttributeType_t attrType,
00300 uint64_t docId, const Value_t &value);
00306 void addAttributeOverride(const std::string &attrName,
00307 AttributeType_t attrType,
00308 const std::map<uint64_t, Value_t> &values);
00309
00322 bool getFilter(int index, std::string &attrname,
00323 bool &exclude, float &minValue, float &maxValue) const;
00324
00337 bool getFilter(int index, std::string &attrname,
00338 bool &exclude, uint64_t &minValue, uint64_t &maxValue) const;
00339
00351 bool getFilter(int index, std::string &attrname,
00352 bool &exclude, Int64Array_t &values) const;
00353
00355 unsigned getFilterCount() const;
00356
00358 const Filter_t *getFilter(int index) const;
00359
00361 SearchCommandVersion_t getCommandVersion() const;
00362
00368 void setPaging(uint32_t msgOffset, uint32_t msgLimit);
00369
00371 void setMatchMode(MatchMode_t matchMode);
00372
00378 void setSorting(SortMode_t sortMode, const std::string &sortBy = "");
00379
00385 void setRanking(RankingMode_t rankingMode, const std::string &rankExpr = "");
00386
00393 void setGrouping(
00394 GroupFunction_t groupFunction,
00395 const std::string &groupBy = "",
00396 const std::string &groupSort = "");
00397
00399 void setGroupDistinctAttribute(const std::string &attributeName);
00400
00402 void setMaxMatches(int maxMatches);
00404 void setMaxQueryTime(uint32_t maxQueryTime);
00405
00407 void setSearchedIndexes(const std::string &indexNames);
00413 void setIndexWeight(const std::string &indexName, uint32_t weight);
00419 void setFieldWeight(const std::string &fieldName, uint32_t weight);
00420
00422 void setSearchCutoff(uint32_t searchCutOff);
00428 void setRetries(uint32_t distRetryCount, uint32_t distRetryDelay);
00429
00431 void setGeoAnchorPoints(const std::vector<GeoAnchorPoint_t> &anchorPoints);
00432
00434 void setQueryComment(const std::string &queryComment);
00435
00440 void setSelectClause(const std::string &selectClause);
00441
00442
00444 uint32_t getPagingOffset() const;
00446 uint32_t getPagingLimit() const;
00447
00449 MatchMode_t getMatchMode() const;
00450
00452 SortMode_t getSortingMode() const;
00454 const std::string &getSortingExpr() const;
00455
00457 RankingMode_t getRankingMode() const;
00459 const std::string &getRankingExpr() const;
00460
00462 GroupFunction_t getGroupingFunction() const;
00464 const std::string &getGroupByExpr() const;
00466 const std::string &getGroupSortExpr() const;
00468 const std::string &getGroupDistinctAttribute() const;
00469
00471 int getMaxMatches() const;
00473 uint32_t getMaxQueryTime() const;
00474
00476 const std::string &getSearchedIndexes() const;
00478 const std::map<std::string, uint32_t> &getIndexWeights() const;
00480 const std::map<std::string, uint32_t> &getFieldWeights() const;
00481
00483 uint32_t getSearchCutoff() const;
00485 uint32_t getDistRetryCount() const;
00487 uint32_t getDistRetryDelay() const;
00488
00490 const std::vector<GeoAnchorPoint_t> &getGeoAnchorPoints() const;
00491
00493 const std::string &getQueryComment() const;
00494
00496 const std::string &getSelectClause() const;
00497
00499 typedef std::map<
00500 std::string,
00501 std::pair<AttributeType_t, std::map<uint64_t, Value_t> > >
00502 AttributeOverrides_t;
00503
00505 const AttributeOverrides_t &getAttributeOverrides() const;
00506
00507 private:
00508 struct Dptr_t;
00509 Dptr_t *dptr;
00510 };
00511
00512
00513
00514
00515
00521 struct ResponseEntry_t
00522 {
00523 uint64_t documentId;
00524 uint32_t groupId;
00525 uint32_t timestamp;
00526 uint32_t weight;
00527
00528 std::map<std::string, Value_t> attribute;
00529
00530 ResponseEntry_t()
00531 : documentId(0), groupId(0), timestamp(0), weight(0) {}
00532 };
00533
00539 struct WordStatistics_t
00540 {
00541 uint32_t docsHit;
00542 uint32_t totalHits;
00543 };
00544
00545 typedef std::vector<std::pair<std::string, uint32_t> > AttributeTypes_t;
00546
00552 struct Response_t
00553 {
00555 std::vector<std::string> field;
00556
00558 AttributeTypes_t attribute;
00559
00561 std::vector<ResponseEntry_t> entry;
00562
00564 std::map<std::string, WordStatistics_t> word;
00565
00566
00567 uint32_t entriesGot;
00568 uint32_t entriesFound;
00569 uint32_t timeConsumed;
00570 uint32_t use64bitId;
00571
00572 SearchCommandVersion_t commandVersion;
00573
00574 void clear();
00575 };
00576
00578 struct KeywordResult_t {
00580 std::string tokenized;
00582 std::string normalized;
00584 WordStatistics_t statistics;
00585 };
00586
00592 class MultiQuery_t
00593 {
00594 protected:
00595 SearchCommandVersion_t commandVersion;
00596 Query_t queries;
00597 int queryCount;
00598
00599 public:
00604 MultiQuery_t(SearchCommandVersion_t cmdVersion = VER_COMMAND_SEARCH_0_9_9);
00605
00607 void initQuery(SearchCommandVersion_t commandVersion);
00608
00620 void addQuery(const std::string& query, const SearchConfig_t &queryAttr);
00621
00622 int getQueryCount() const;
00623 const Query_t &getQueries() const;
00624
00625 SearchCommandVersion_t getCommandVersion() const;
00626 };
00627
00628
00629
00630
00631
00632
00633
00634 class SourceQuery_t {
00635 public:
00636
00637
00638
00639
00640
00641
00642 SourceQuery_t(const std::string &query, const SearchConfig_t &queryAttr,
00643 int seqNo);
00644
00645
00646
00647
00648 const std::string &getHash() const {return hash;}
00649
00650
00651
00652 int getInputSeqNo() const {return inputSeqNo;}
00653
00654
00655
00656 const Query_t &getQuery() const {return serializedQuery;}
00657
00658 private:
00660 Query_t serializedQuery;
00663 std::string hash;
00665 int inputSeqNo;
00666 };
00667
00668
00694 class MultiQueryOpt_t
00695 {
00696 public:
00701 MultiQueryOpt_t(SearchCommandVersion_t cmdVersion = VER_COMMAND_SEARCH_0_9_9);
00702
00703
00704
00705
00706
00707
00708 void optimise();
00709
00721 void addQuery(const std::string& query, const SearchConfig_t &queryAttr);
00722
00723 friend class Sphinx::Client_t;
00724 protected:
00725 SearchCommandVersion_t commandVersion;
00727 std::list<SourceQuery_t> sourceQueries;
00730 std::vector<const SourceQuery_t *> sortedQueries;
00732 std::vector<std::pair<int, int> > responseIndex;
00735 std::vector<int> groupQueries;
00736
00738 void initQuery(SearchCommandVersion_t commandVersion);
00739
00740
00741
00742
00743
00744 Sphinx::Query_t getGroupQuery(size_t groupIndex) const;
00745
00746
00747
00748
00749
00750 size_t getGroupQueryCount() const;
00751
00753 int getQueryCount() const;
00754
00759 size_t getQueryCountAtGroup(size_t groupIndex) const;
00760
00766 size_t getResponseIndex(size_t sortedIndex) const;
00767
00769 SearchCommandVersion_t getCommandVersion() const;
00770 };
00771
00772
00773
00777 struct AttributeUpdates_t
00778 {
00779 UpdateCommandVersion_t commandVersion;
00780 std::vector<std::string> attributes;
00781 std::map<uint64_t, std::vector<Value_t> > values;
00782
00784 AttributeUpdates_t();
00785
00787 void setAttributeList(const std::vector<std::string> &attr
00788 = std::vector<std::string>());
00790 void addAttribute(const std::string &);
00791
00797 void addDocument(uint64_t id, const std::vector<Value_t> &vals);
00798
00806 void addDocument(uint64_t id, ValueType_t t, ...);
00807
00809 void setCommandVersion(UpdateCommandVersion_t v);
00810 };
00811
00812
00813
00814
00815
00822 class Client_t
00823 {
00824 public:
00825 Client_t(const ConnectionConfig_t &connectionSettings);
00826
00838 void query(const std::string& query,
00839 const SearchConfig_t &queryAttr,
00840 Response_t &response);
00841
00853 void query(const MultiQuery_t &query, std::vector<Response_t> &response);
00854
00870 void query(const MultiQueryOpt_t &query, std::vector<Response_t> &response);
00871
00883 void updateAttributes(const std::string &index, const AttributeUpdates_t &at);
00884
00895 std::vector<KeywordResult_t> getKeywords(
00896 const std::string &index,
00897 const std::string &query,
00898 bool getWordStatistics = false);
00899
00900 protected:
00901
00902 ConnectionConfig_t connection;
00903 };
00904
00905
00911 std::string escapeQueryString(const std::string &query);
00912
00913 }
00914
00915 extern "C" {
00916 void sphinxClientDummy();
00917 }
00918
00919 #endif
00920