本文整理汇总了Java中org.carrot2.core.Document类的典型用法代码示例。如果您正苦于以下问题:Java Document类的具体用法?Java Document怎么用?Java Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Document类属于org.carrot2.core包,在下文中一共展示了Document类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: process
import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public void process() throws ProcessingException {
clusters = Lists.newArrayListWithCapacity(documents.size());
for (Document document : documents) {
final Cluster cluster = new Cluster();
cluster.addPhrases(document.getTitle(), document.getSummary());
if (document.getLanguage() != null) {
cluster.addPhrases(document.getLanguage().name());
}
for (String field : customFields.split(",")) {
Object value = document.getField(field);
if (value != null) {
cluster.addPhrases(value.toString());
}
}
cluster.addDocuments(document);
clusters.add(cluster);
}
}
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:EchoClusteringAlgorithm.java
示例2: displayResults
import org.carrot2.core.Document; //导入依赖的package包/类
/**
* 对processingResult进行全面的展示,输出至控制台.
* @author GS
* @param processingResult
*/
public static void displayResults(ProcessingResult processingResult)
{
final Collection<Document> documents = processingResult.getDocuments();//所有的文档
final Collection<Cluster> clusters = processingResult.getClusters();//所有的类别
final Map<String, Object> attributes = processingResult.getAttributes();//参数
// Show documents
if (documents != null)
{
displayDocuments(documents);//打印所有文档
}
// Show clusters
if (clusters != null)
{
displayClusters(clusters);//打印所有分类
}
// Show attributes other attributes
displayAttributes(attributes);//打印参数
}
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:27,代码来源:ConsoleFormatter.java
示例3: cluster
import org.carrot2.core.Document; //导入依赖的package包/类
/**
* 对所有的PagePOJO进行聚类
*
* @author GS
* @return
* @throws IOException
* @throws Exception
*/
public ProcessingResult cluster(String docPath) throws IOException,
Exception {
@SuppressWarnings("unchecked")
final Controller controller = ControllerFactory
.createCachingPooling(IDocumentSource.class);
final List<Document> documents = Lists.newArrayList();
JsonReader jr = new JsonReader(new File(docPath));
while (jr.hasNext()) {
Hit h = jr.next();
documents.add(new Document(h.getPagePOJO().getTitle(), h
.getPagePOJO().getContent()));
}
jr.close();
final Map<String, Object> attributes = Maps.newHashMap();
CommonAttributesDescriptor.attributeBuilder(attributes).documents(
documents);
final ProcessingResult englishResult = controller.process(attributes,
LingoClusteringAlgorithm.class);
ConsoleFormatter.displayResults(englishResult);// 展示
return englishResult;
}
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:30,代码来源:Cluster.java
示例4: adapt
import org.carrot2.core.Document; //导入依赖的package包/类
private DocumentGroup adapt(Cluster cluster) {
DocumentGroup group = new DocumentGroup();
group.setId(cluster.getId());
List<String> phrases = cluster.getPhrases();
group.setPhrases(phrases.toArray(new String[phrases.size()]));
group.setLabel(cluster.getLabel());
group.setScore(cluster.getScore());
group.setOtherTopics(cluster.isOtherTopics());
List<Document> documents = cluster.getDocuments();
String[] documentReferences = new String[documents.size()];
for (int i = 0; i < documentReferences.length; i++) {
documentReferences[i] = documents.get(i).getStringId();
}
group.setDocumentReferences(documentReferences);
List<Cluster> subclusters = cluster.getSubclusters();
subclusters = (subclusters == null ? Collections.emptyList() : subclusters);
group.setSubgroups(adapt(subclusters));
return group;
}
开发者ID:carrot2,项目名称:elasticsearch-carrot2,代码行数:23,代码来源:ClusteringAction.java
示例5: cluster
import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
// Prepare attributes for Carrot2 clustering call
Map<String, Object> attributes = new HashMap<>();
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
attributes.put(AttributeNames.DOCUMENTS, documents);
attributes.put(AttributeNames.QUERY, query.toString());
// Pass the fields on which clustering runs.
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
// Pass extra overriding attributes from the request, if any
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to an output structure of clusters.
//
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
}
开发者ID:europeana,项目名称:search,代码行数:37,代码来源:CarrotClusteringEngine.java
示例6: cluster
import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
// Prepare attributes for Carrot2 clustering call
Map<String, Object> attributes = new HashMap<String, Object>();
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
attributes.put(AttributeNames.DOCUMENTS, documents);
attributes.put(AttributeNames.QUERY, query.toString());
// Pass the fields on which clustering runs to the
// SolrStopwordsCarrot2LexicalDataFactory
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
// Pass extra overriding attributes from the request, if any
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to named list
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:37,代码来源:CarrotClusteringEngine.java
示例7: cluster
import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
// Prepare attributes for Carrot2 clustering call
Map<String, Object> attributes = new HashMap<String, Object>();
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
attributes.put(AttributeNames.DOCUMENTS, documents);
attributes.put(AttributeNames.QUERY, query.toString());
// Pass the fields on which clustering runs.
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
// Pass extra overriding attributes from the request, if any
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to an output structure of clusters.
//
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
}
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:37,代码来源:CarrotClusteringEngine.java
示例8: displayDocuments
import org.carrot2.core.Document; //导入依赖的package包/类
/**
* 显示Collection里面的每一个文档,显示标题和URL
* @author GS
* @param documents
*/
public static void displayDocuments(final Collection<Document> documents)
{
System.out.println("Collected " + documents.size() + " documents\n");//所有的文档总数
for (final Document document : documents)
{
displayDocument(0, document);//显示单个文档,包括显示标题和URL
}
}
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:14,代码来源:ConsoleFormatter.java
示例9: displayDocument
import org.carrot2.core.Document; //导入依赖的package包/类
/**
* 展示单个文档
* @author GS
* @param level
* @param document
*/
private static void displayDocument(final int level, Document document)//展示每一个文档
{
final String indent = getIndent(level);
System.out.printf(indent + "[%2s] ", document.getStringId());//打印文档ID号
System.out.println(document.getField(Document.TITLE));//打印标题
final String url = document.getField(Document.CONTENT_URL);//正文URL
if (StringUtils.isNotBlank(url))//如果document里面带有正文的URL则打印
{
System.out.println(indent + " " + url);
}
System.out.println();
}
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:20,代码来源:ConsoleFormatter.java
示例10: displayCluster
import org.carrot2.core.Document; //导入依赖的package包/类
/**
* 对一个类进行展示.
* @author GS
* @param level
* @param tag
* @param cluster
* @param maxNumberOfDocumentsToShow
* @param clusterDetailsFormatter
*/
private static void displayCluster(final int level, String tag, Cluster cluster,
int maxNumberOfDocumentsToShow, ClusterDetailsFormatter clusterDetailsFormatter)
{
final String label = cluster.getLabel();//当前类的标题
// indent up to level and display this cluster's description phrase
for (int i = 0; i < level; i++)
{
System.out.print(" ");
}
System.out.println(label + " "
+ clusterDetailsFormatter.formatClusterDetails(cluster));
// if this cluster has documents, display three topmost documents.
int documentsShown = 0;
for (final Document document : cluster.getDocuments())
{
if (documentsShown >= maxNumberOfDocumentsToShow)//如果达到最大展示数的话不再展示
{
break;
}
displayDocument(level + 1, document);//这个level是干嘛的?
documentsShown++;//当前分类已经展示的文档数
}
if (maxNumberOfDocumentsToShow > 0
&& (cluster.getDocuments().size() > documentsShown))
{
System.out.println(getIndent(level + 1) + "... and "
+ (cluster.getDocuments().size() - documentsShown) + " more\n");
}
// finally, if this cluster has subclusters, descend into recursion.
final int num = 1;
for (final Cluster subcluster : cluster.getSubclusters())
{
displayCluster(level + 1, tag + "." + num, subcluster,
maxNumberOfDocumentsToShow, clusterDetailsFormatter);
}
}
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:49,代码来源:ConsoleFormatter.java
示例11: clustersToNamedList
import org.carrot2.core.Document; //导入依赖的package包/类
private void clustersToNamedList(List<Cluster> outputClusters,
List<NamedList<Object>> parent, boolean outputSubClusters, int maxLabels) {
for (Cluster outCluster : outputClusters) {
NamedList<Object> cluster = new SimpleOrderedMap<>();
parent.add(cluster);
// Add labels
List<String> labels = outCluster.getPhrases();
if (labels.size() > maxLabels) {
labels = labels.subList(0, maxLabels);
}
cluster.add("labels", labels);
// Add cluster score
final Double score = outCluster.getScore();
if (score != null) {
cluster.add("score", score);
}
// Add other topics marker
if (outCluster.isOtherTopics()) {
cluster.add("other-topics", outCluster.isOtherTopics());
}
// Add documents
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
List<Object> docList = Lists.newArrayList();
cluster.add("docs", docList);
for (Document doc : docs) {
docList.add(doc.getField(SOLR_DOCUMENT_ID));
}
// Add subclusters
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
List<NamedList<Object>> subclusters = Lists.newArrayList();
cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters,
outputSubClusters, maxLabels);
}
}
}
开发者ID:europeana,项目名称:search,代码行数:42,代码来源:CarrotClusteringEngine.java
示例12: clustersToNamedList
import org.carrot2.core.Document; //导入依赖的package包/类
private void clustersToNamedList(List<Cluster> outputClusters,
List<NamedList<Object>> parent, boolean outputSubClusters, int maxLabels) {
for (Cluster outCluster : outputClusters) {
NamedList<Object> cluster = new SimpleOrderedMap<Object>();
parent.add(cluster);
// Add labels
List<String> labels = outCluster.getPhrases();
if (labels.size() > maxLabels) {
labels = labels.subList(0, maxLabels);
}
cluster.add("labels", labels);
// Add cluster score
final Double score = outCluster.getScore();
if (score != null) {
cluster.add("score", score);
}
// Add other topics marker
if (outCluster.isOtherTopics()) {
cluster.add("other-topics", outCluster.isOtherTopics());
}
// Add documents
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
List<Object> docList = Lists.newArrayList();
cluster.add("docs", docList);
for (Document doc : docs) {
docList.add(doc.getField(SOLR_DOCUMENT_ID));
}
// Add subclusters
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
List<NamedList<Object>> subclusters = Lists.newArrayList();
cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters,
outputSubClusters, maxLabels);
}
}
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:42,代码来源:CarrotClusteringEngine.java
注:本文中的org.carrot2.core.Document类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论