本文整理汇总了Java中org.apache.lucene.document.Field.TermVector类的典型用法代码示例。如果您正苦于以下问题:Java TermVector类的具体用法?Java TermVector怎么用?Java TermVector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TermVector类属于org.apache.lucene.document.Field包,在下文中一共展示了TermVector类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: createIndexWriter
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
private void createIndexWriter(List<URLContentObject> urlContentList,
Directory ramDirectory) throws IOException
{
Set<String> stopWords = new HashSet<String>(FileUtils.readLines(new File(baseDir+
"data/stopwords/stopwords_en.txt")));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36,stopWords);
IndexWriter indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
for(URLContentObject d:urlContentList)
{
Document document = new Document();
StringReader reader=new StringReader(d.getContent());
document.add(new Field("id", d.getId(), Field.Store.YES, Field.Index.ANALYZED,TermVector.YES));
document.add(new Field("contents", reader, TermVector.YES));
//document.add(new Field("id",Integer.toString(d.getAutoIncrementId()),Field.Store.YES, Field.Index.ANALYZED));
indexWriter.addDocument(document);
reader.close();
}
indexWriter.commit();
indexWriter.close();
}
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:27,代码来源:SummarizeWebData.java
示例2: createDocument
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
private Document createDocument(Term term, String value, boolean synonym) {
Document doc = new Document();
Field ontologyField = new Field(FIELD_ONTOLOGY,
term.getOntology().getName(),
Field.Store.YES,
Field.Index.NO,
TermVector.NO);
ontologyField.setOmitNorms(true);
ontologyField.setOmitTermFreqAndPositions(true);
doc.add(ontologyField);
Field idField = new Field(FIELD_ID,
term.getReferenceId(),
Field.Store.YES,
Field.Index.NOT_ANALYZED,
TermVector.NO);
idField.setOmitNorms(true);
idField.setOmitTermFreqAndPositions(true);
doc.add(idField);
Field nameField = new Field(FIELD_TERM,
value,
Field.Store.YES,
Field.Index.ANALYZED,
TermVector.WITH_POSITIONS_OFFSETS);
//nameField.setOmitNorms(true);
doc.add(nameField);
doc.add(new Field(FIELD_SYNONYM,
synonym ? new byte[] {1} : new byte[] {0},
Field.Store.YES));
return doc;
}
开发者ID:Novartis,项目名称:ontobrowser,代码行数:36,代码来源:OntologySearchServiceImpl.java
示例3: NonDictionaryField
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
/**
*
*/
public NonDictionaryField(String name, Store store, Index index, TermVector termVector, boolean multiValued)
{
this.name = name;
this.store = store;
this.index = index;
this.termVector = termVector;
this.multiValued = multiValued;
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:12,代码来源:AlfrescoSolrDataModel.java
示例4: getCosineSimilarityMatrix
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
public static DocVector[] getCosineSimilarityMatrix(List<String> fileSentences) throws IOException{
RAMDirectory ramDir = new RAMDirectory();
FileReader fr=new FileReader(new File("lib/stoplists/en.txt"));
// Set<String> stopWords = new HashSet<String>(FileUtils.readLines(new File("stop-words.txt")));
Analyzer analyzer = new StopAnalyzer(Version.LUCENE_36, fr );
//Index the full text of both documents
//IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
IndexWriter writer =new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_36, analyzer));
for (String s:fileSentences)
{
Document doc1 = new Document();
StringReader d1reader=new StringReader(s);
doc1.add(new Field("contents", d1reader, TermVector.YES));
writer.addDocument(doc1);
}
// writer.commit();
writer.close();
DocVector[] docs = new DocVector[fileSentences.size()];
//Build a term vector for each document
IndexReader RAMreader = IndexReader.open(ramDir);
Map<String,Integer> terms = new HashMap<String,Integer>();
TermEnum termEnum = RAMreader.terms(new Term("contents"));
//System.out.println(RAMreader.numDocs());
int pos = 0;
while (termEnum.next()) {
Term term = termEnum.term();
if (!"contents".equals(term.field()))
break;
terms.put(term.text(), pos++);
}
//System.out.println("Num terms:"+terms.size());
for(int i=0;i<fileSentences.size();i++)
{
TermFreqVector[] tfvs = RAMreader.getTermFreqVectors(i);
docs[i]=new DocVector(terms);
if (tfvs==null)
continue;
for (TermFreqVector tfv : tfvs)
{
String[] termTexts = tfv.getTerms();
int[] termFreqs = tfv.getTermFrequencies();
for (int j = 0; j < termTexts.length; j++) {
double idfValue=getIDF(RAMreader,termTexts[j]);
double tfIdfValue=termFreqs[j]*idfValue;
docs[i].setEntry(termTexts[j], tfIdfValue);
}
}
docs[i].normalize();
}
RAMreader.close();
ramDir.close();
//ramDir.close();
//System.out.println(RAMreader.numDocs());
//System.out.println("Similarity:" + calcCosineSimilarity(docs[5], docs[19]));
return docs;
}
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:71,代码来源:CosineDocumentSimilarity.java
示例5: addNonDictionaryField
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
private static void addNonDictionaryField(String name, Store store, Index index, TermVector termVector, boolean multiValued)
{
nonDictionaryFields.put(name, new NonDictionaryField(name, store, index, termVector, multiValued));
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:5,代码来源:AlfrescoSolrDataModel.java
示例6: addAdditionalContentField
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
private static void addAdditionalContentField(String name, Store store, Index index, TermVector termVector, boolean multiValued)
{
additionalContentFields.put(name, new NonDictionaryField(name, store, index, termVector, multiValued));
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:5,代码来源:AlfrescoSolrDataModel.java
示例7: addAdditionalTextField
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
private static void addAdditionalTextField(String name, Store store, Index index, TermVector termVector, boolean multiValued)
{
additionalTextFields.put(name, new NonDictionaryField(name, store, index, termVector, multiValued));
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:5,代码来源:AlfrescoSolrDataModel.java
示例8: addAdditionalMlTextField
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
private static void addAdditionalMlTextField(String name, Store store, Index index, TermVector termVector, boolean multiValued)
{
additionalMlTextFields.put(name, new NonDictionaryField(name, store, index, termVector, multiValued));
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:5,代码来源:AlfrescoSolrDataModel.java
示例9: getFieldTermVec
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
/**
* @param field
* @return
*/
public TermVector getFieldTermVec(SchemaField field)
{
return TermVector.NO;
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:9,代码来源:AlfrescoSolrDataModel.java
示例10: getFieldTermVec
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
@Override
protected TermVector getFieldTermVec(SchemaField field, String internalVal)
{
return AlfrescoSolrDataModel.getInstance(id).getFieldTermVec(field);
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:6,代码来源:AlfrescoDataType.java
示例11: run
import org.apache.lucene.document.Field.TermVector; //导入依赖的package包/类
public double run(String doc1,String doc2) throws IOException
{
// index strings
s[0]=doc1;
s[1]=doc2;
//System.out.print(s[0]+"\n"+s[1]+"\n");
Directory index = new RAMDirectory();
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
IndexWriter writer = new IndexWriter(index, config);
for (String si : s) {
Document doc = new Document();
doc.add(new Field("content", si, Field.Store.YES, Field.Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
}
writer.close();
// read the index
IndexReader reader = IndexReader.open(index);
// calculate tf/idf
Map<String,Integer> terms = new HashMap<String,Integer>();
TermEnum termEnum = reader.terms(new Term("content"));
int pos = 0;
while (termEnum.next()) {
Term term = termEnum.term();
if (! "content".equals(term.field())) break;
terms.put(term.text(), pos++);
}
// for (int i=0; i<reader.maxDoc(); i++) {
// if (reader.isDeleted(i))
// continue;
//
// Document doc = reader.document(i);
// System.out.println(doc);
// TermFreqVector tfvs = reader.getTermFreqVector(i,"content");
// System.out.println(tfvs);
// }
//
// apply cosine similarity
DocVector[] docs = new DocVector[s.length];
for (int i=0; i<s.length; i++) {
TermFreqVector[] tfvs = reader.getTermFreqVectors(i);
//String strip_str=tfvs.toString();
//strip_str.replaceAll("null", "");
docs[i] = new DocVector(terms);
//System.out.print(tfvs);
//}
for (TermFreqVector tfv : tfvs) {
String[] termTexts = tfv.getTerms();
int[] termFreqs = tfv.getTermFrequencies();
for (int j = 0; j < termTexts.length; j++) {
docs[i].setEntry(termTexts[j], termFreqs[j]);
}
}
docs[i].normalize();
}
// now get similarity between doc[0] and doc[1]
double cosim01 = getCosineSimilarity(docs[0], docs[1]);
//System.out.println("cosim(0,1)=" + cosim01);
// between doc[0] and doc[2]
// double cosim02 = getCosineSimilarity(docs[0], docs[3]);
//System.out.println("cosim(0,2)=" + cosim02);
// between doc[1] and doc[3]
//double cosim03 = getCosineSimilarity(docs[1], docs[2]);
//System.out.println("cosim(1,2)=" + cosim03);
// }
//double cosim01=10.0;
reader.close();
return cosim01;
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:78,代码来源:CosineSimilarity.java
注:本文中的org.apache.lucene.document.Field.TermVector类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论