本文整理汇总了C++中xapian::Document类的典型用法代码示例。如果您正苦于以下问题:C++ Document类的具体用法?C++ Document怎么用?C++ Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Document类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char **argv)
{
// Simplest possible options parsing: we just require three or more
// parameters.
if(argc < 4) {
cout << "usage: " << argv[0] <<
" <path to database> <document data> <document terms>" << endl;
exit(1);
}
// Catch any Xapian::Error exceptions thrown
try {
// Make the database
Xapian::WritableDatabase database(argv[1], Xapian::DB_CREATE_OR_OPEN);
// Make the document
Xapian::Document newdocument;
// Put the data in the document
newdocument.set_data(string(argv[2]));
// Put the terms into the document
for (int i = 3; i < argc; ++i) {
newdocument.add_posting(argv[i], i - 2);
}
// Add the document to the database
database.add_document(newdocument);
} catch(const Xapian::Error &error) {
cout << "Exception: " << error.get_msg() << endl;
}
}
开发者ID:IthacaDream,项目名称:Test,代码行数:32,代码来源:quickstartindex.cpp
示例2: addTermsToDocument
void XapianIndex::addTermsToDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
Xapian::Stem *pStemmer = NULL;
string term;
// Do we know what language to use for stemming ?
if (m_stemLanguage.empty() == false)
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
}
// Get the terms
while (tokens.nextToken(term) == true)
{
if (term.empty() == true)
{
continue;
}
// Does it start with a capital letter ?
if (isupper((int)term[0]) != 0)
{
// R-prefix the raw term
doc.add_posting(string("R") + term, termPos);
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
doc.add_posting(limitTermLength(prefix + term), termPos++);
}
else if (mode == STORE_STEM)
{
string stemmedTerm = pStemmer->stem_word(term);
doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
}
else if (mode == STORE_BOTH)
{
string stemmedTerm = pStemmer->stem_word(term);
// Add both
doc.add_posting(limitTermLength(prefix + term), termPos);
// ...at the same position
doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
}
}
#ifdef DEBUG
cout << "XapianIndex::addTermsToDocument: added " << termPos << " terms" << endl;
#endif
if (pStemmer != NULL)
{
delete pStemmer;
}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:59,代码来源:XapianIndex.cpp
示例3: getDocumentInfo
/// Returns a document's properties.
bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
{
bool foundDocument = false;
if (docId == 0)
{
return false;
}
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::Database *pIndex = pDatabase->readLock();
if (pIndex != NULL)
{
Xapian::Document doc = pIndex->get_document(docId);
// Get the current document data
string record = doc.get_data();
if (record.empty() == false)
{
string language = Languages::toLocale(StringManip::extractField(record, "language=", ""));
docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
StringManip::extractField(record, "url=", "\n"),
StringManip::extractField(record, "type=", "\n"),
language);
docInfo.setTimestamp(StringManip::extractField(record, "timestamp=", "\n"));
#ifdef DEBUG
cout << "XapianIndex::getDocumentInfo: language is "
<< docInfo.getLanguage() << endl;
#endif
foundDocument = true;
}
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't get document properties: " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't get document properties, unknown exception occured" << endl;
}
pDatabase->unlock();
return foundDocument;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:55,代码来源:XapianIndex.cpp
示例4: setDocumentData
void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
const string &language) const
{
time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());
// Add this value to allow sorting by date
doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));
DocumentInfo docCopy(info);
docCopy.setLanguage(language);
doc.set_data(XapianDatabase::propsToRecord(&docCopy));
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:12,代码来源:XapianIndex.cpp
示例5: renameLabel
/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
bool renamedLabel = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
string term("XLABEL:");
// Get documents that have this label
term += name;
for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
postingIter != pIndex->postlist_end(term); ++postingIter)
{
Xapian::docid docId = *postingIter;
// Get the document
Xapian::Document doc = pIndex->get_document(docId);
// Remove the term
doc.remove_term(term);
// ...add the new one
doc.add_term(limitTermLength(string("XLABEL:") + newName));
// ...and update the document
pIndex->replace_document(docId, doc);
}
renamedLabel = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't delete label, unknown exception occured" << endl;
}
pDatabase->unlock();
return renamedLabel;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:51,代码来源:XapianIndex.cpp
示例6: setDocumentData
void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
const string &language) const
{
string title(info.getTitle());
string timestamp(info.getTimestamp());
char timeStr[64];
time_t timeT = TimeConverter::fromTimestamp(timestamp);
// Set the document data omindex-style
string record = "url=";
record += info.getLocation();
// The sample will be generated at query time
record += "\nsample=";
record += "\ncaption=";
if (badField(title) == true)
{
// Modify the title if necessary
string::size_type pos = title.find("=");
while (pos != string::npos)
{
title[pos] = ' ';
pos = title.find("=", pos + 1);
}
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
}
record += title;
record += "\ntype=";
record += info.getType();
// Append a timestamp, in a format compatible with Omega
record += "\nmodtime=";
snprintf(timeStr, 64, "%ld", timeT);
record += timeStr;
// ...and the language
record += "\nlanguage=";
record += StringManip::toLowerCase(language);
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
doc.set_data(record);
// Add this value to allow sorting by date
doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:45,代码来源:XapianIndex.cpp
示例7: saveMessage
void HistoryLogger::saveMessage(const Message* message)
{
if (message->flags() & MESSAGE_FLAG_ALARM)
return;
Xapian::Document doc;
quint32 flags = message->flags();
std::string plainText(message->plainText().toUtf8());
std::string confUser(message->getConfUser().constData());
std::string data;
if (flags & MESSAGE_FLAG_RTF)
data = message->rtfText().constData();
else
data = plainText;
std::cout << "HistoryLogger::saveMessage data = " << data << std::endl;
doc.set_data(data);
Xapian::TermGenerator termGen;
termGen.set_stemmer(Xapian::Stem("ru"));
termGen.set_document(doc);
termGen.index_text(plainText);
doc.add_value(0, message->dateTime().toString("yyyyMMdd").toStdString());
doc.add_value(1, message->dateTime().toString("hhmmss").toStdString());
doc.add_value(2, QString::number(flags, 16).toStdString());
doc.add_value(3, message->type() == Message::Outgoing? "o" : "i");
doc.add_value(4, confUser);
database->add_document(doc);
database->flush();
}
开发者ID:Andrsid,项目名称:myagent-im,代码行数:34,代码来源:historylogger.cpp
示例8: setDocumentData
void XapianIndex::setDocumentData(Xapian::Document &doc, const DocumentInfo &info,
const string &language) const
{
string title(info.getTitle());
string timestamp(info.getTimestamp());
char timeStr[64];
// Set the document data omindex-style
string record = "url=";
record += info.getLocation();
// The sample will be generated at query time
record += "\nsample=";
record += "\ncaption=";
if (badField(title) == true)
{
// Modify the title if necessary
string::size_type pos = title.find("=");
while (pos != string::npos)
{
title[pos] = ' ';
pos = title.find("=", pos + 1);
}
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
}
record += title;
record += "\ntype=";
record += info.getType();
// Append a timestamp
record += "\ntimestamp=";
record += timestamp;
// ...and the language
record += "\nlanguage=";
record += language;
#ifdef DEBUG
cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
doc.set_data(record);
// Add this value to allow sorting by date
snprintf(timeStr, 64, "%d", TimeConverter::fromTimestamp(timestamp));
doc.add_value(0, timeStr);
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:44,代码来源:XapianIndex.cpp
示例9: main
int main(int argc, char **argv)
{
// Simplest possible options parsing: we just require two or more
// parameters.
if (argc < 3) {
cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl;
exit(1);
}
// Catch any Xapian::Error exceptions thrown
try {
// Make the database
Xapian::Database db(argv[1]);
// Start an enquire session
Xapian::Enquire enquire(db);
// Set percent and/or weight cutoffs
enquire.set_cutoff(90,0.2);
// Set weighting schema
BM25Weight bm1(1.0,0.0,1.0,0.5,0.3);
enquire.set_weighting_scheme(bm1);
// Build the query object
Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc);
cout << "Performing query" << query.get_description() << "'" << endl;
// Set Stopper
string stop[8]={"的","了","呵","吧","就","你","我","他"};
SimpleStopper *ss=new SimpleStopper;
for(int i=0;i<8;i++){
ss->add(stop[i]);
}
QueryParser qparser;
qparser.set_stopper(ss);
qparser.set_database(db);
// Give the query object to the enquire session
enquire.set_query(query);
// Get the top 10 results of the query
Xapian::MSet matches = enquire.get_mset(0, 10); //最多返回10个文档
// Display the results
cout << matches.size() << " results found" << endl;
for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) {
Xapian::Document doc = i.get_document();
cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl;
}
db.close();
} catch(const Xapian::Error &error) {
cout << "Exception: " << error.get_msg() << endl;
}
}
开发者ID:IthacaDream,项目名称:Test,代码行数:56,代码来源:xapian_test.cpp
示例10: removeFirstPostingsFromDocument
void XapianIndex::removeFirstPostingsFromDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, const string &language, StemmingMode mode) const
{
Xapian::TermIterator termListIter = doc.termlist_begin();
Xapian::Stem *pStemmer = NULL;
string term;
// Do we know what language to use for stemming ?
if (language.empty() == false)
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(language));
}
// Get the terms and remove the first posting for each
while (tokens.nextToken(term) == true)
{
if (term.empty() == true)
{
continue;
}
// Does it start with a capital letter ?
if (isupper((int)term[0]) != 0)
{
// R-prefix the raw term
removeFirstPosting(doc, termListIter, string("R") + term);
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
}
else if (mode == STORE_STEM)
{
removeFirstPosting(doc, termListIter, limitTermLength(prefix + pStemmer->stem_word(term)));
}
else if (mode == STORE_BOTH)
{
string stemmedTerm = pStemmer->stem_word(term);
removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
if (stemmedTerm != term)
{
removeFirstPosting(doc, termListIter, limitTermLength(prefix + stemmedTerm));
}
}
}
if (pStemmer != NULL)
{
delete pStemmer;
}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:56,代码来源:XapianIndex.cpp
示例11: requestImage
QImage ThumbnailProvider::requestImage(const QString &id, QSize *size, const QSize &requestedSize)
{
QImage image;
if (m_thumb32->findImage(id, &image)) {
return image;
} else {
QString filePath;
if (id.at(0) == QLatin1Char('Q')) {
Xapian::Document doc = m_xapianDB->findDocument(id);
if (doc.get_docid() == 0) {
return image;
} else {
filePath = QString::fromStdString(doc.get_value(Database::FilePath));
}
} else {
filePath = id;
}
// Load thumbnail
// KExiv2Iface::KExiv2Previews preview(filePath);
KExiv2Iface::KExiv2 preview(filePath);
image = preview.getExifThumbnail(true);
if (image.isNull()) {
// image = preview.image();
// } else {
// Store thumbnail
// TODO smooth or fast?
image = QImage(filePath).scaled(160, 120, Qt::KeepAspectRatio);
// preview.
kWarning() << "Could not find preview image for" << filePath << image.isNull();
}
// Store the thumbnail into the cache file
if (m_thumb32->insertImage(id, image)) {
kWarning() << "Added preview for" << image.byteCount() << filePath << id;
} else {
kWarning() << "FAILED to add preview for" << filePath << id;
}
}
return image;
}
开发者ID:KDE,项目名称:photobook,代码行数:43,代码来源:ThumbnailProvider.cpp
示例12: text
QString EmailSearchStore::text(int queryId)
{
Xapian::Document doc = docForQuery(queryId);
QMutexLocker lock(&m_mutex);
std::string data;
try {
data = doc.get_data();
} catch (const Xapian::Error &) {
// Nothing to do, move along
}
QString subject = QString::fromUtf8(data.c_str(), data.length());
if (subject.isEmpty()) {
return QStringLiteral("No Subject");
}
return subject;
}
开发者ID:KDE,项目名称:akonadi-search,代码行数:19,代码来源:emailsearchstore.cpp
示例13: prepareDocument
bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc,
Xapian::termcount &termPos) const
{
string title(info.getTitle());
string location(info.getLocation());
Url urlObj(location);
// Add a magic term :-)
doc.add_term(MAGIC_TERM);
// Index the title with and without prefix S
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
titleTokens.rewind();
addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode);
}
// Index the full URL with prefix U
doc.add_term(limitTermLength(string("U") + location, true));
// ...the host name and included domains with prefix H
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
doc.add_term(limitTermLength(string("H") + hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...and the file name with prefix P
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
}
// Finally, add the language code with prefix L
doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
setDocumentData(doc, info, m_stemLanguage);
return true;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:50,代码来源:XapianIndex.cpp
示例14: removeFirstPosting
static void removeFirstPosting(Xapian::Document &doc,
Xapian::TermIterator &termListIter, const string &term)
{
termListIter.skip_to(term);
Xapian::PositionIterator firstPosIter = termListIter.positionlist_begin();
if (firstPosIter != termListIter.positionlist_end())
{
try
{
doc.remove_posting(term, *firstPosIter);
}
catch (const Xapian::Error &error)
{
// This posting may have been removed already
#ifdef DEBUG
cout << "XapianIndex::removeFirstPosting: " << error.get_msg() << endl;
#endif
}
}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:21,代码来源:XapianIndex.cpp
示例15: db
Indexer::Indexer(const string &datapath, const string &dbpath)
{
// Hardcode field offsets for simplicity.
const size_t FIELD_ID_NUMBER = 0;
const size_t FIELD_TITLE = 2;
const size_t FIELD_DESCRIPTION = 8;
// Create or open the database we're going to be writing to.
Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN);
// Set up a TermGenerator that we'll use in indexing.
Xapian::TermGenerator termgenerator;
termgenerator.set_stemmer(Xapian::Stem("en"));
ifstream csv(datapath.c_str());
vector<string> fields;
csv_parse_line(csv, fields);
// Check the CSV header line matches our hard-code offsets.
if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" ||
fields.at(FIELD_TITLE) != "TITLE" ||
fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") {
// The CSV format doesn't match what we expect.
cerr << "CSV format has changed!" << endl;
exit(1);
}
while (csv_parse_line(csv, fields)) {
// 'fields' is a vector mapping from field number to value.
// We look up fields with the 'at' method so we get an exception
// if that field isn't set.
//
// We're just going to use DESCRIPTION, TITLE and id_NUMBER.
const string & description = fields.at(FIELD_DESCRIPTION);
const string & title = fields.at(FIELD_TITLE);
const string & identifier = fields.at(FIELD_ID_NUMBER);
// We make a document and tell the term generator to use this.
Xapian::Document doc;
termgenerator.set_document(doc);
// Index each field with a suitable prefix.
termgenerator.index_text(title, 1, "S");
termgenerator.index_text(description, 1, "XD");
// Index fields without prefixes for general search.
termgenerator.index_text(title);
termgenerator.increase_termpos();
termgenerator.index_text(description);
// Store all the fields for display purposes.
doc.set_data(identifier + "\n" + title + "\n" + description);
// We use the identifier to ensure each object ends up in the
// database only once no matter how many times we run the
// indexer.
string idterm = "Q" + identifier;
doc.add_boolean_term(idterm);
db.replace_document(idterm, doc);
}
}
开发者ID:jainnidhi703,项目名称:xapianclusteringexample,代码行数:61,代码来源:indexer.cpp
示例16: QueryHandler
void QueryHandler(const QueryMessage &message, const Theron::Address from)
{
search::QueryInfo qi=*(message.query);
std::string resKey(message.resKey);
delete message.query;
std::string segString;
char *output=new char[qi.query.length()*9];
char *input=new char[qi.query.length()*3];
memset(output,0,qi.query.length()*9);
memset(input,0,qi.query.length()*3);
try
{
UErrorCode error = U_ZERO_ERROR;
ucnv_convert("GBK","UTF-8",input, qi.query.length()*3, qi.query.c_str(), qi.query.length(), &error );
bool ret = result->ParagraphProcessing(input, output);
if (ret)
{
int oLen=strlen(output);
char *utf8out=new char[oLen*3];
memset(utf8out,0,oLen*3);
ucnv_convert("UTF-8","GBK",utf8out, oLen*3, output, oLen, &error );
segString=std::string(utf8out);
delete [] utf8out;
}
}
catch (...) {
}
delete [] output;
delete [] input;
std::list<std::string> segList;
if(segString.length()>0)
{
std::vector<std::string> resv;
boost::algorithm::split( resv, segString, boost::algorithm::is_any_of(" ") );
for(std::vector<std::string>::iterator it=resv.begin();it!=resv.end();++it)
{
std::vector<std::string> tmpv;
boost::algorithm::split( tmpv, *it, boost::algorithm::is_any_of("/") );
if(tmpv.size()>1&&tmpv[1]!="w")
segList.push_back(std::string("K")+tmpv[0]);
}
}
search::DocList *dList=new search::DocList();
if(segList.size()>0)
{
Xapian::Query query(Xapian::Query::OP_AND,segList.begin(), segList.end());
while(1)
{
try
{
db.reopen();
Xapian::Enquire enquire(db);
enquire.set_query(query);
Xapian::MSet matches = enquire.get_mset(0, 100);
for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
Xapian::Document doc = i.get_document();
search::IndexInfo info;
info.uid=doc.get_value(1);
info.attMap.insert(std::make_pair(std::string("title"),doc.get_value(2)));
info.content=doc.get_data();
dList->docList.push_back(info);
}
std::cout<<"doc size:"<<dList->docList.size()<<std::endl;
break;
}catch(Xapian::DatabaseModifiedError exception)
{
std::cout<<"try agian"<<std::endl;
}catch(...)
{
break;
}
}
}
Send(QueryResponceMessage(dList,resKey.c_str()), from);
}
开发者ID:firememory,项目名称:dfwbi,代码行数:83,代码来源:QueryActor.hpp
示例17: indexDocument
/// Indexes the given data.
bool XapianIndex::indexDocument(Tokenizer &tokens, const std::set<std::string> &labels,
unsigned int &docId)
{
unsigned int dataLength = 0;
bool indexed = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
// Get the document
const Document *pDocument = tokens.getDocument();
if (pDocument == NULL)
{
#ifdef DEBUG
cout << "XapianIndex::indexDocument: no document" << endl;
#endif
return false;
}
// Cache the document's properties
DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(),
pDocument->getType(), pDocument->getLanguage());
docInfo.setTimestamp(pDocument->getTimestamp());
docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation()));
const char *pData = pDocument->getData(dataLength);
if (pData != NULL)
{
m_stemLanguage = scanDocument(pData, dataLength, docInfo);
}
Xapian::Document doc;
Xapian::termcount termPos = 0;
#ifdef DEBUG
cout << "XapianIndex::indexDocument: adding terms" << endl;
#endif
// Add the tokenizer's terms to the Xapian document
addPostingsToDocument(tokens, doc, "", termPos, m_stemMode);
// Add labels
for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
++labelIter)
{
doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
}
if (addCommonTerms(docInfo, doc, termPos) == true)
{
setDocumentData(docInfo, doc, m_stemLanguage);
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
// Add this document to the Xapian index
docId = pIndex->add_document(doc);
indexed = true;
}
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't index document: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't index document, unknown exception occured" << endl;
}
pDatabase->unlock();
return indexed;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:77,代码来源:XapianIndex.cpp
示例18: addPostingsToDocument
void XapianIndex::addPostingsToDocument(Tokenizer &tokens, Xapian::Document &doc,
const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
Xapian::Stem *pStemmer = NULL;
string upperCasePrefix("R");
string term;
// Do we know what language to use for stemming ?
if (m_stemLanguage.empty() == false)
{
pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
}
// Terms starting with a capital letter are R-prefixed, unless a prefix is already defined
if (prefix.empty() == false)
{
upperCasePrefix = prefix;
}
// Get the terms
while (tokens.nextToken(term) == true)
{
if (term.empty() == true)
{
continue;
}
// Does it start with a capital letter ?
if (isupper((int)term[0]) != 0)
{
doc.add_posting(upperCasePrefix + XapianDatabase::limitTermLength(term), termPos);
}
// Lower case the term
term = StringManip::toLowerCase(term);
// Stem the term ?
if ((mode == STORE_UNSTEM) ||
(pStemmer == NULL))
{
doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
}
else if (mode == STORE_STEM)
{
#if XAPIAN_MAJOR_VERSION==0
string stemmedTerm(pStemmer->stem_word(term));
#else
string stemmedTerm((*pStemmer)(term));
#endif
doc.add_posting(prefix + XapianDatabase::limitTermLength(stemmedTerm), termPos);
}
else if (mode == STORE_BOTH)
{
#if XAPIAN_MAJOR_VERSION==0
string stemmedTerm(pStemmer->stem_word(term));
#else
string stemmedTerm((*pStemmer)(term));
#endif
// Add both at the same position
doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
if (stemmedTerm != term)
{
// No point adding the same term twice
doc.add_posting(prefix + XapianDatabase::limitTermLength(stemmedTerm), termPos);
}
}
++termPos;
}
#ifdef DEBUG
cout << "XapianIndex::addPostingsToDocument: added " << termPos << " terms" << endl;
#endif
if (pStemmer != NULL)
{
delete pStemmer;
}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:78,代码来源:XapianIndex.cpp
示例19: addCommonTerms
void XapianIndex::addCommonTerms(const DocumentInfo &info, Xapian::Document &doc,
Xapian::termcount &termPos) const
{
string title(info.getTitle());
string location(info.getLocation());
Url urlObj(location);
// Add a magic term :-)
doc.add_term(MAGIC_TERM);
// Index the title with and without prefix S
if (title.empty() == false)
{
Document titleDoc;
titleDoc.setData(title.c_str(), title.length());
Tokenizer titleTokens(&titleDoc);
addPostingsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
titleTokens.rewind();
addPostingsToDocument(titleTokens, doc, "", termPos, m_stemMode);
}
// Index the full URL with prefix U
doc.add_term(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
// ...the base file with XFILE:
string::size_type qmPos = location.find("?");
if ((urlObj.isLocal() == true) &&
(qmPos != string::npos))
{
doc.add_term(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location.substr(0, qmPos)), true));
}
// ...the host name and included domains with prefix H
string hostName(StringManip::toLowerCase(urlObj.getHost()));
if (hostName.empty() == false)
{
doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName, true));
string::size_type dotPos = hostName.find('.');
while (dotPos != string::npos)
{
doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));
// Next
dotPos = hostName.find('.', dotPos + 1);
}
}
// ...the location (as is) and all directories with prefix XDIR:
string tree(urlObj.getLocation());
if (tree.empty() == false)
{
doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
if (tree[0] == '/')
{
doc.add_term("XDIR:/");
}
string::size_type slashPos = tree.find('/', 1);
while (slashPos != string::npos)
{
doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));
// Next
slashPos = tree.find('/', slashPos + 1);
}
}
// ...and the file name with prefix P
string fileName(urlObj.getFile());
if (fileName.empty() == false)
{
string extension;
doc.add_term(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));
// Does it have an extension ?
string::size_type extPos = fileName.rfind('.');
if ((extPos != string::npos) &&
(extPos + 1 < fileName.length()))
{
extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
}
doc.add_term(string("XEXT:") + XapianDatabase::limitTermLength(extension));
}
// Add the date terms D, M and Y
time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());
struct tm *tm = localtime(&timeT);
string yyyymmdd = TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday);
if (yyyymmdd.length() == 8)
{
doc.add_term(string("D") + yyyymmdd);
doc.add_term(string("M") + yyyymmdd.substr(0, 6));
doc.add_term(string("Y") + yyyymmdd.substr(0, 4));
}
// Finally, add the language code with prefix L
doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
// ...and the MIME type with prefix T
doc.add_term(string("T") + info.getType());
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:94,代码来源:XapianIndex.cpp
示例20: setDocumentLabels
/// Sets a document's labels.
bool XapianIndex::setDocumentLabels(unsigned int docId, const set<string> &labels,
bool resetLabels)
{
bool updatedLabels = false;
XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
if (pDatabase == NULL)
{
cerr << "Bad index " << m_databaseName << endl;
return false;
}
try
{
Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
if (pIndex != NULL)
{
Xapian::Document doc = pIndex->get_document(docId);
// Reset existing labels ?
if (resetLabels == true)
{
Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
if (termIter != pIndex->termlist_end(docId))
{
for (termIter.skip_to("XLABEL:");
termIter != pIndex->termlist_end(docId); ++termIter)
{
// Is this a label ?
if (strncasecmp((*termIter).c_str(), "XLABEL:", min(7, (int)(*termIter).length())) == 0)
{
doc.remove_term(*termIter);
}
}
}
}
// Set new labels
for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
++labelIter)
{
if (labelIter->empty() == false)
{
doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
}
}
pIndex->replace_document(docId, doc);
updatedLabels = true;
}
}
catch (const Xapian::Error &error)
{
cerr << "Couldn't update document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
}
catch (...)
{
cerr << "Couldn't update document's labels, unknown exception occured" << endl;
}
pDatabase->unlock();
return updatedLabels;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:64,代码来源:XapianIndex.cpp
注:本文中的xapian::Document类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论