本文整理汇总了Python中sumy.utils.get_stop_words函数的典型用法代码示例。如果您正苦于以下问题:Python get_stop_words函数的具体用法?Python get_stop_words怎么用?Python get_stop_words使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_stop_words函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main(url, num_sentences=10, language='english'):
parser = HtmlParser.from_url(url, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
for sentence in summarizer(parser.document, num_sentences):
print(sentence)
开发者ID:493238731,项目名称:text-learning,代码行数:7,代码来源:summarize.py
示例2: summarize_url
def summarize_url(url,summarizer):
# E.G. url = "http://www.cnn.com/2016/06/12/politics/hillary-clinton-bernie-sanders-meeting-tuesday/index.html"
print 'Summarizing ', url
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
if summarizer == 'luhn':
summarizer = LuhnSummarizer(stemmer)
elif summarizer == 'edmundson':
summarizer = ESummarizer(stemmer)
elif summarizer == 'lsa':
summarizer = LsaSummarizer(stemmer)
elif summarizer == 'lex':
summarizer = LexSummarizer(stemmer)
elif summarizer == 'text':
summarizer = TextSummarizer(stemmer)
elif summarizer == 'sb':
summarizer = SumBasicSummarizer(stemmer)
else:
summarizer = KLSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
print summarizer
sentences = []
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print sentence
sentences.append(str(sentence))
return sentences
开发者ID:scottstanie,项目名称:summarizer,代码行数:32,代码来源:app.py
示例3: summarize
def summarize(corpus, length, algorithm):
summarizer = None
summary = "No compatible summarizer was selected, please use one of these : textrank, lexrank, luhn, edmonson*, kl, lsa, sumbasic, random (* doesn\'t work yet)"
algorithm = algorithm.lower()
try:
parser = PlaintextParser.from_string(corpus,Tokenizer(LANGUAGE))
if algorithm == "textrank":
summarizer = TextRankSummarizer(Stemmer(LANGUAGE))
elif algorithm == "lexrank":
summarizer = LexRankSummarizer(Stemmer(LANGUAGE))
elif algorithm == "luhn":
summarizer = LuhnSummarizer(Stemmer(LANGUAGE))
elif algorithm == "edmundson":
summarizer = EdmundsonSummarizer(Stemmer(LANGUAGE))
elif algorithm == "kl":
summarizer = KLSummarizer(Stemmer(LANGUAGE))
elif algorithm == "lsa":
summarizer = LsaSummarizer(Stemmer(LANGUAGE))
elif algorithm == "sumbasic":
summarizer = SumBasicSummarizer(Stemmer(LANGUAGE))
elif algorithm == "random":
summarizer = RandomSummarizer(Stemmer(LANGUAGE))
if summarizer:
summarizer.stop_words = get_stop_words(LANGUAGE)
summary = " ".join([obj._text for obj in summarizer(parser.document, length)])
return summary
except Exception as e:
return str(e)
开发者ID:ferryxo,项目名称:IUSE,代码行数:31,代码来源:AutoSummary.py
示例4: summarize
def summarize(text, n_sentences, sep='\n'):
'''
Args:
text (str or file): text itself or file in memory of text
n_sentences (int): number of sentences to include in summary
Kwargs:
sep (str): separator to join summary sentences
Returns:
(str) n_sentences-long, automatically-produced summary of text
'''
if isinstance(text, str):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
elif isinstance(text, file):
parser = PlaintextParser.from_file(text, Tokenizer(LANGUAGE))
else:
raise TypeError('text must be either str or file')
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return '\n'.join(str(s) for s in summarizer(parser.document, n_sentences))
开发者ID:mtpain,项目名称:iatv,代码行数:26,代码来源:iatv.py
示例5: summarize_with_info
def summarize_with_info(self, corpus, length, algorithm):
parser = PlaintextParser.from_string(corpus, Tokenizer(self.LANGUAGE))
if algorithm == "textrank":
summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lexrank":
summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "luhn":
summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "edmundson":
summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
summarizer.bonus_words = parser.significant_words
summarizer.stigma_words = parser.stigma_words
elif algorithm == "kl":
summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lsa":
summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "sumbasic":
summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "random":
summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
else:
raise NotImplemented("Summary algorithm is not available")
summarizer.stop_words = get_stop_words(self.LANGUAGE)
return summarizer(parser.document, length)
开发者ID:ferryxo,项目名称:AutoSummaryV1,代码行数:27,代码来源:AutoSummary.py
示例6: summarize
def summarize(self, corpus, length, algorithm):
parser = PlaintextParser.from_string(corpus,Tokenizer(self.LANGUAGE))
if algorithm == "textrank":
summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lexrank":
summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "luhn":
summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "edmundson":
summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "kl":
summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lsa":
summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "sumbasic":
summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "random":
summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
else:
raise NotImplemented("Summary algorithm is not available")
summarizer.stop_words = get_stop_words(self.LANGUAGE)
summary = " ".join([obj._text for obj in summarizer(parser.document, length)])
return summary
开发者ID:peerlogic,项目名称:AutoSummaryV1,代码行数:26,代码来源:AutoSummary.py
示例7: summarizeFile
def summarizeFile(inputFile):
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
url = findURLS(inputFile)
if url != None:
if url[-1] == '.':
url = url[0:-1]
#print (url)
#urlContent = 'Summary from URL ['+url+']: \n'
urlContent = ''
try:
parser = HtmlParser.from_url(url, Tokenizer("english"))
for sentence in summarizer(parser.document, 3):
urlContent = urlContent + str(sentence) + '\n'
except:
#print (sys.exc_info()[0])
urlContent = ''
content = inputFile.read()
parser = PlaintextParser.from_string(content, Tokenizer(LANGUAGE))
#summarizer = LsaSummarizer(stem_word)
#summarizer.stop_words = get_stop_words(LANGUAGE)
#summary = 'Event Summary: \n'
summary = ''
try:
for sentence in summarizer(parser.document, SENTENCES_COUNT_1):
summary = summary + str(sentence) + '\n'
except AssertionError:
return None
if url != None:
return summary + urlContent
return summary
开发者ID:kansal,项目名称:Sub-Event-Detection,代码行数:31,代码来源:content_url.py
示例8: summarize
def summarize(string, summary_length = 1, language = "english"):
string = string.lower() if string.isupper() else string
parser = PlaintextParser.from_string(string, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
return ". ".join([str(sentence) for sentence in summarizer(parser.document, summary_length)])
开发者ID:jwmueller,项目名称:SemanticTextDB,代码行数:8,代码来源:summarizer.py
示例9: luhn
def luhn(self,text_parser):
assert isinstance(text_parser,plaintext.PlaintextParser)
summarizer=Luhn()
#EnglishStemmer())
#summarizer.stop_words=stopwords.words("english")
summarizer.stop_words=get_stop_words(settings.SUMMARIZER_LANGUAGE)
return summarizer(text_parser.document,settings.SUMMARIZER_TOP_X_SENTENCES)
开发者ID:wangk1,项目名称:research,代码行数:9,代码来源:summarizer.py
示例10: summarize
def summarize(url):
summary = []
parser = HtmlParser.from_url(url,Tokenizer(lang))
stemmer = Stemmer(lang)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(lang)
for sentence in summarizer(parser.document,sent):
summary.append(sentence._text)
return ' '.join(summary)
开发者ID:karthiknrao,项目名称:scarface,代码行数:9,代码来源:ddgsrchsum.py
示例11: summarize
def summarize(text):
total = ""
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
total += str(sentence)
return total
开发者ID:notexactlyawe,项目名称:paper-reader,代码行数:9,代码来源:summarize.py
示例12: lsa
def lsa(comment,parser,num):
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
LSAstr = ''
for sentence in summarizer(parser.document,num):
LSAstr += str(sentence)
return LSAstr
开发者ID:WangWenjun559,项目名称:Weiss,代码行数:9,代码来源:sumy.py
示例13: summarizeText
def summarizeText(self, body, numSentences = 10):
"""Summarizes body of text to numSentences
"""
#parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
stemmer = Stemmer(self.LANG)
summarizer = SumySummarizer(stemmer)
summarizer.stop_words = get_stop_words(self.LANG)
summary = ' '.join([str(sentence).decode('utf-8') for sentence in summarizer(parser.document, numSentences)])
return summary
开发者ID:mchenchen,项目名称:HouseDoctor,代码行数:10,代码来源:Summarizer.py
示例14: summarize
def summarize(content):
parser = PlaintextParser.from_string(content.body, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
text = '\n'.join(
[str(sentence) for sentence in summarizer(parser.document, COUNT)]
)
summary = Summary(content=content, summary=text)
summary.save()
开发者ID:tishmen,项目名称:keyword_research,代码行数:10,代码来源:contents_helpers.py
示例15: retreive_sumy
def retreive_sumy(url):
# "http://en.wikipedia.org/wiki/Automatic_summarization"
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return summarizer(parser.document, SENTENCES_COUNT)
开发者ID:TauqirA,项目名称:SumCrossword,代码行数:10,代码来源:test.py
示例16: summary
def summary(self, int1, int2):
# int1, int2 are the places between which to look for
# the summary to be taken (slicing the corpus as a string)
parser = PlaintextParser(self.corpus[int1:int2], Tokenizer("english"))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
self.summary_text = " ".join(
map(lambda x:x._text,
summarizer(parser.document, 20)))
return self.summary_text
开发者ID:nalourie,项目名称:digital-humanities,代码行数:10,代码来源:analysis_script.py
示例17: summary
def summary(text, summarizer_class):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = summarizer_class(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print(sentence)
yield sentence
开发者ID:mdamien,项目名称:mdamien.github.io,代码行数:10,代码来源:render.py
示例18: summarize
def summarize(filename, num_sentences):
with open (filename, "r") as myfile:
data=myfile.read()
parser = PlaintextParser.from_string(data, Tokenizer('english'))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
summary = ""
for sentence in summarizer(parser.document, num_sentences):
summary += sentence.__unicode__().encode('ascii', 'ignore').replace('\"', '').replace('\'', '').strip() + " "
return summary
开发者ID:cestella,项目名称:senate_speech_investigation,代码行数:10,代码来源:generate_summaries.py
示例19: summary
def summary(text):
stemmer = Stemmer(LANGUAGE)
parser = PlaintextParser(text, Tokenizer(LANGUAGE))
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
short = ""
for sentence in summarizer(parser.document, SENTENCES_COUNT):
short = short + ">" + "* " + str(sentence).decode('ascii','ignore') + "\n\n"
#print(sentence)
return short
开发者ID:HunkDivine,项目名称:samacharbot2,代码行数:11,代码来源:altsummary.py
示例20: test_article_example
def test_article_example():
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("articles/prevko_cz_1.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
开发者ID:miso-belica,项目名称:sumy,代码行数:11,代码来源:test_lsa.py
注:本文中的sumy.utils.get_stop_words函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论