本文整理汇总了Python中sumy.summarizers.lsa.LsaSummarizer类的典型用法代码示例。如果您正苦于以下问题:Python LsaSummarizer类的具体用法?Python LsaSummarizer怎么用?Python LsaSummarizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LsaSummarizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: summarizeFile
def summarizeFile(inputFile):
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
url = findURLS(inputFile)
if url != None:
if url[-1] == '.':
url = url[0:-1]
#print (url)
#urlContent = 'Summary from URL ['+url+']: \n'
urlContent = ''
try:
parser = HtmlParser.from_url(url, Tokenizer("english"))
for sentence in summarizer(parser.document, 3):
urlContent = urlContent + str(sentence) + '\n'
except:
#print (sys.exc_info()[0])
urlContent = ''
content = inputFile.read()
parser = PlaintextParser.from_string(content, Tokenizer(LANGUAGE))
#summarizer = LsaSummarizer(stem_word)
#summarizer.stop_words = get_stop_words(LANGUAGE)
#summary = 'Event Summary: \n'
summary = ''
try:
for sentence in summarizer(parser.document, SENTENCES_COUNT_1):
summary = summary + str(sentence) + '\n'
except AssertionError:
return None
if url != None:
return summary + urlContent
return summary
开发者ID:kansal,项目名称:Sub-Event-Detection,代码行数:31,代码来源:content_url.py
示例2: test_single_sentence
def test_single_sentence(self):
document = build_document(("I am the sentence you like",))
summarizer = LsaSummarizer()
summarizer.stopwords = ("I", "am", "the",)
sentences = summarizer(document, 10)
self.assertEqual(len(sentences), 1)
self.assertEqual(to_unicode(sentences[0]), "I am the sentence you like")
开发者ID:jgomezdans,项目名称:sumy,代码行数:8,代码来源:test_lsa.py
示例3: test_single_sentence
def test_single_sentence():
document = build_document(("I am the sentence you like",))
summarizer = LsaSummarizer()
summarizer.stopwords = ("I", "am", "the",)
sentences = summarizer(document, 10)
assert len(sentences) == 1
assert to_unicode(sentences[0]) == "I am the sentence you like"
开发者ID:miso-belica,项目名称:sumy,代码行数:8,代码来源:test_lsa.py
示例4: lsa
def lsa(comment,parser,num):
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
LSAstr = ''
for sentence in summarizer(parser.document,num):
LSAstr += str(sentence)
return LSAstr
开发者ID:WangWenjun559,项目名称:Weiss,代码行数:9,代码来源:sumy.py
示例5: summarize
def summarize(filename, num_sentences):
with open (filename, "r") as myfile:
data=myfile.read()
parser = PlaintextParser.from_string(data, Tokenizer('english'))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
summary = ""
for sentence in summarizer(parser.document, num_sentences):
summary += sentence.__unicode__().encode('ascii', 'ignore').replace('\"', '').replace('\'', '').strip() + " "
return summary
开发者ID:cestella,项目名称:senate_speech_investigation,代码行数:10,代码来源:generate_summaries.py
示例6: summary
def summary(self, int1, int2):
# int1, int2 are the places between which to look for
# the summary to be taken (slicing the corpus as a string)
parser = PlaintextParser(self.corpus[int1:int2], Tokenizer("english"))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
self.summary_text = " ".join(
map(lambda x:x._text,
summarizer(parser.document, 20)))
return self.summary_text
开发者ID:nalourie,项目名称:digital-humanities,代码行数:10,代码来源:analysis_script.py
示例7: test_issue_5_sigma_can_multiply_matrix_v
def test_issue_5_sigma_can_multiply_matrix_v(self):
"""Source: https://github.com/miso-belica/sumy/issues/5"""
parser = PlaintextParser.from_string(
load_resource("articles/sigma_can_multiply_matrix_v.txt"),
Tokenizer("english")
)
summarizer = LsaSummarizer(english_stemmer)
summarizer.stop_words = get_stop_words("english")
sentences = summarizer(parser.document, 20)
self.assertEqual(len(sentences), 20)
开发者ID:abensrhir,项目名称:sumy,代码行数:11,代码来源:test_lsa.py
示例8: test_article_example
def test_article_example():
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("articles/prevko_cz_1.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
开发者ID:miso-belica,项目名称:sumy,代码行数:11,代码来源:test_lsa.py
示例9: test_real_example
def test_real_example(self):
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("snippets/prevko.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 2)
self.assertEqual(len(sentences), 2)
开发者ID:likegitcoding,项目名称:sumy,代码行数:11,代码来源:test_lsa.py
示例10: test_document
def test_document(self):
document = build_document(
("I am the sentence you like", "Do you like me too",),
("This sentence is better than that above", "Are you kidding me",)
)
summarizer = LsaSummarizer()
summarizer.stopwords = ("I", "am", "the", "you", "are", "me", "is", "than", "that", "this",)
sentences = summarizer(document, 2)
self.assertEqual(len(sentences), 2)
self.assertEqual(to_unicode(sentences[0]), "I am the sentence you like")
self.assertEqual(to_unicode(sentences[1]), "This sentence is better than that above")
开发者ID:jgomezdans,项目名称:sumy,代码行数:12,代码来源:test_lsa.py
示例11: test_issue_5_svd_converges
def test_issue_5_svd_converges():
"""Source: https://github.com/miso-belica/sumy/issues/5"""
pytest.skip("Can't reproduce the issue.")
parser = PlaintextParser.from_string(
load_resource("articles/svd_converges.txt"),
Tokenizer("english")
)
summarizer = LsaSummarizer(Stemmer("english"))
summarizer.stop_words = get_stop_words("english")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
开发者ID:miso-belica,项目名称:sumy,代码行数:13,代码来源:test_lsa.py
示例12: test_dictionary_without_stop_words
def test_dictionary_without_stop_words(self):
summarizer = LsaSummarizer()
summarizer.stop_words = ["stop", "Halt", "SHUT", "HmMm"]
document = build_document(
("stop halt shut hmmm", "Stop Halt Shut Hmmm",),
("StOp HaLt ShUt HmMm", "STOP HALT SHUT HMMM",),
("Some relevant sentence", "Some moRe releVant sentEnce",),
)
expected = frozenset(["some", "more", "relevant", "sentence"])
dictionary = summarizer._create_dictionary(document)
self.assertEqual(expected, frozenset(dictionary.keys()))
开发者ID:jgomezdans,项目名称:sumy,代码行数:13,代码来源:test_lsa.py
示例13: lsaReferenceSummary
def lsaReferenceSummary(path):
sentencesList=[]
parser = PlaintextParser.from_file(path, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
#print(sentence._text)
sentencesList.append(sentence._text)
return sentencesList
开发者ID:ab93,项目名称:Text-Summarization,代码行数:13,代码来源:api.py
示例14: summarize_text
def summarize_text(textbody):
parser = PlaintextParser.from_string(textbody, Tokenizer(LANG))
stemmer = Stemmer(LANG)
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANG)
summary = summarizer(parser.document, SENTENCE_COUNT)
summarized_text = ''
for sentence in summary:
summarized_text += str(sentence) + ' '
return summarized_text
开发者ID:mihir3535,项目名称:SummarizerBot,代码行数:14,代码来源:Summarizer.py
示例15: main
def main(url, num_sentences=10, language='english'):
parser = HtmlParser.from_url(url, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
for sentence in summarizer(parser.document, num_sentences):
print(sentence)
开发者ID:493238731,项目名称:text-learning,代码行数:7,代码来源:summarize.py
示例16: summarize
def summarize(text, n_sentences, sep='\n'):
'''
Args:
text (str or file): text itself or file in memory of text
n_sentences (int): number of sentences to include in summary
Kwargs:
sep (str): separator to join summary sentences
Returns:
(str) n_sentences-long, automatically-produced summary of text
'''
if isinstance(text, str):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
elif isinstance(text, file):
parser = PlaintextParser.from_file(text, Tokenizer(LANGUAGE))
else:
raise TypeError('text must be either str or file')
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return '\n'.join(str(s) for s in summarizer(parser.document, n_sentences))
开发者ID:mtpain,项目名称:iatv,代码行数:26,代码来源:iatv.py
示例17: summarize
def summarize(string, summary_length = 1, language = "english"):
string = string.lower() if string.isupper() else string
parser = PlaintextParser.from_string(string, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
return ". ".join([str(sentence) for sentence in summarizer(parser.document, summary_length)])
开发者ID:jwmueller,项目名称:SemanticTextDB,代码行数:8,代码来源:summarizer.py
示例18: summarize
def summarize(text):
total = ""
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
total += str(sentence)
return total
开发者ID:notexactlyawe,项目名称:paper-reader,代码行数:9,代码来源:summarize.py
示例19: summarizeText
def summarizeText(self, body, numSentences = 10):
"""Summarizes body of text to numSentences
"""
#parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
stemmer = Stemmer(self.LANG)
summarizer = SumySummarizer(stemmer)
summarizer.stop_words = get_stop_words(self.LANG)
summary = ' '.join([str(sentence).decode('utf-8') for sentence in summarizer(parser.document, numSentences)])
return summary
开发者ID:mchenchen,项目名称:HouseDoctor,代码行数:10,代码来源:Summarizer.py
示例20: summarize
def summarize(content):
parser = PlaintextParser.from_string(content.body, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
text = '\n'.join(
[str(sentence) for sentence in summarizer(parser.document, COUNT)]
)
summary = Summary(content=content, summary=text)
summary.save()
开发者ID:tishmen,项目名称:keyword_research,代码行数:10,代码来源:contents_helpers.py
注:本文中的sumy.summarizers.lsa.LsaSummarizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论