本文整理汇总了Python中wordcloud.STOPWORDS类的典型用法代码示例。如果您正苦于以下问题:Python STOPWORDS类的具体用法?Python STOPWORDS怎么用?Python STOPWORDS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了STOPWORDS类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: wordcloud
def wordcloud(datafile):
#remove stop words, the most common words in a language
vectorizer=CountVectorizer(stop_words='english')
for word in vectorizer.get_stop_words():
STOPWORDS.add(word)
STOPWORDS.add("said")
pony_mask = np.array(Image.open("../pinkyB.jpg"))
wc = WordCloud(background_color="black", max_words=2000, mask=pony_mask, stopwords=STOPWORDS)
#init dictionary with the five categories
categoriesSet = set(datafile["Category"])
categoriesDict = dict.fromkeys(categoriesSet,"")
#Conditional Selection
# business = datafile.ix[datafile["Category"]=="Business"]
# print business["Content"].size
#fill index with data from cv
for index, row in datafile.iterrows():
categoriesDict[row["Category"]] += str(row["Content"])
for category, text in categoriesDict.iteritems():
wc.generate(text)
image = wc.to_image()
image.save("../wordcloud/wordcloud_" + category + ".jpg")
return
开发者ID:s-kypr,项目名称:DataMining,代码行数:29,代码来源:wordcloud_csv.py
示例2: wordCloud
def wordCloud(text_array,name,keyword=""):
new_text_arr=[]
if keyword is not "":
keyword=keyword.split(" ")[1]
for text in text_array:
if keyword in text:
new_text_arr.append(text)
text_array=new_text_arr
cloud_text=""
for text in text_array:
cloud_text+=text+" "
m_stopwords=['police','traffic','sir']
for word in m_stopwords:
STOPWORDS.add(word)
image_mask = os.path.join(BASE_DIR, 'static/tool/img/nebula.png')
coloring = imread(image_mask)
wordcloud = WordCloud(stopwords=STOPWORDS,background_color="white",mask=coloring,ranks_only=True,max_words=50).generate(cloud_text)
filename=os.path.join(BASE_DIR, 'static/tool/img/'+name+'.png')
image_colors = ImageColorGenerator(coloring)
wordcloud.recolor(color_func=image_colors)
wordcloud.to_file(filename)
data_uri = open(filename, 'rb').read().encode('base64').replace('\n', '')
img_tag = '<img src="data:image/png;base64,{0}" style="height:400px;">'.format(data_uri)
layout=wordcloud.layout_
words_colours={}
count=1
for lo in layout:
entry={}
entry['word']=lo[0][0]
color=lo[len(lo)-1]
color=color[4:]
color=color[:-1]
color_split=color.split(',')
color_num=[int(x) for x in color_split]
color_hex='#%02x%02x%02x' % tuple(color_num)
# print color_num
entry['color']=color_hex
words_colours[count]=entry
count+=1
# print words_colours
list_html=""
cap=51
if cap>len(words_colours):
cap=len(words_colours)
for i in range(1,cap):
list_html+='<li class="list-group-item" ><a class="cloud-key-'+name+'" href="#" style="color:'+words_colours[i]['color']+'">'
list_html+="#"+str(i)+" "+words_colours[i]['word']+'</a></li>'
return (img_tag,list_html)
开发者ID:sbagroy986,项目名称:PoliceOSMDashboard,代码行数:60,代码来源:graphing.py
示例3: generateWordCloud
def generateWordCloud(text, stop):
d = path.dirname(outputdir)
for w in stop:
STOPWORDS.add(w)
# Generate the wordcloud without the stop words
wordcloud = WordCloud(stopwords=STOPWORDS).generate(text)
# Draw the positioned words to a PNG file.
wordcloud.to_file(path.join(d, 'diabetes-wordcloud.png'))
开发者ID:skazzaks,项目名称:blog-text-analyzer,代码行数:11,代码来源:analyze_data.py
示例4: word_cloud
def word_cloud(csv_file, stopwords_path, pic_path):
pic_name = csv_file+"_词云图.png"
path = os.path.abspath(os.curdir)
csv_file = path+ "\\" + csv_file + ".csv"
csv_file = csv_file.replace('\\', '\\\\')
d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
content = []
for i in d['content']:
try:
i = translate(i)
except AttributeError as e:
continue
else:
content.append(i)
comment_after_split = jieba.cut(str(content), cut_all=False)
wl_space_split = " ".join(comment_after_split)
backgroud_Image = plt.imread(pic_path)
stopwords = STOPWORDS.copy()
with open(stopwords_path, 'r', encoding='utf-8') as f:
for i in f.readlines():
stopwords.add(i.strip('\n'))
f.close()
wc = WordCloud(width=1024, height=768, background_color='white',
mask=backgroud_Image, font_path="C:\simhei.ttf",
stopwords=stopwords, max_font_size=400,
random_state=50)
wc.generate_from_text(wl_space_split)
img_colors = ImageColorGenerator(backgroud_Image)
wc.recolor(color_func=img_colors)
plt.imshow(wc)
plt.axis('off')
plt.show()
wc.to_file(pic_name)
开发者ID:miaomao1989,项目名称:DA_projects,代码行数:34,代码来源:visualization_analysis.py
示例5: make_cloud
def make_cloud(words, image, size=10, filename='figures/cloud.png', max_words=200, horizontal=0.8):
# Remove URLs, 'RT' text, screen names, etc
my_stopwords = ['RT', 'amp', 'lt']
words_no_urls = ' '.join([word for word in words.split()
if word not in my_stopwords])
# Add stopwords, if needed
stopwords = STOPWORDS.copy()
stopwords.add("RT")
stopwords.add('amp')
stopwords.add('lt')
# Load up a logo as a mask & color image
logo = imread(image)
# Generate colors
image_colors = ImageColorGenerator(logo)
# Generate plot
wc = WordCloud(stopwords=stopwords, mask=logo, color_func=image_colors, scale=0.8,
max_words=max_words, background_color='white', random_state=42, prefer_horizontal=horizontal)
wc.generate(words_no_urls)
plt.figure(figsize=(size, size))
plt.imshow(wc)
plt.axis("off")
plt.savefig(filename)
开发者ID:dr-rodriguez,项目名称:conventions_2016,代码行数:29,代码来源:wordcloud.py
示例6: main
def main(save_files = False, db_filename = '../output/database.sqlite'):
conn = sqlite3.connect(db_filename)
c = conn.cursor()
# Retrieve papers
c.execute('''SELECT *
FROM Papers''')
paper_content = c.fetchall()
conn.close()
titles = ''
for pc in paper_content:
titles += pc[1]
# A Marvin Minsky mask
mask = np.array(Image.open("../files/minsky_mask.png"))
wc = WordCloud(background_color="white", max_words=2000, mask=mask, stopwords=STOPWORDS.copy())
# Generate word cloud
wc.generate(titles)
if (save_files):
# Store to file
wc.to_file("../files/title_cloud.png")
# Show word cloud
plt.imshow(wc)
plt.axis("off")
# Show mask
# plt.figure()
# plt.imshow(mask, cmap=plt.cm.gray)
# plt.axis("off")
plt.show()
开发者ID:edintelligence,项目名称:nips-2015-papers,代码行数:35,代码来源:exploration.py
示例7: cloudplot
def cloudplot(person):
person = re.sub(r'\+', ' ', person)
text = GetTextRange(Emails, person)
text = rmBoring(rmNonAlpha(text)).decode('ascii', 'ignore')
plt.clf()
d = path.dirname(path.abspath(__file__))
hilcolor = np.array(Image.open(path.join(d, "static/img/hillarylogo.jpg")))
wc = WordCloud(background_color="white", max_words=150, mask=hilcolor,
stopwords=STOPWORDS.add("said"),
max_font_size=80, random_state=42,
relative_scaling = 0.5)
wc.generate(text)
image_colors = ImageColorGenerator(hilcolor)
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis("off")
fig = plt.gcf()
img = StringIO.StringIO()
fig.savefig(img)
img.seek(0)
return send_file(img, mimetype='image/png')
开发者ID:abhik1368,项目名称:teamhrc,代码行数:31,代码来源:__init__.py
示例8: config_stopwords
def config_stopwords(self, more_stopwords=None):
"""
(obj) -> None
Configuring stopwords by adding more if required
"""
if more_stopwords is not None:
self.STOPWORDS = STOPWORDS.union(more_stopwords)
开发者ID:drewmccalmont,项目名称:services-to-wordcloud,代码行数:9,代码来源:helper_wordcloud.py
示例9: makeWordCloud
def makeWordCloud(text):
#preprocess
stopwords = STOPWORDS.copy()
# text.replace("State","")
# text.replace("year","")
# text.replace("Congress","")
# text.replace("will","")
wC = WordCloud(max_words=2000, stopwords=stopwords, margin=5, random_state=1, width = 1600, height = 800).generate(text)
plt.imshow(wC)
plt.show()
开发者ID:kshabahang,项目名称:fbminer,代码行数:10,代码来源:mine.py
示例10: create_wordcloud
def create_wordcloud(posts):
wordcloud_str=' '.join(post['message'] for post in posts) #join all posts together
aces_mask=imread("aces.png") #add aces mask
wc=WordCloud(background_color="BLACK", mask=aces_mask, stopwords=STOPWORDS.add("will")) #don't include the word "will" in the wordcloud
#(not an interesting word and took up a large chunk of the wordcloud)
wc.generate(wordcloud_str)
plt.axis("off")
plt.imshow(wc)
plt.show()
wc.to_file("aces_wordcloud.png")
开发者ID:Statistica,项目名称:Aces,代码行数:10,代码来源:aces.py
示例11: generate_wc
def generate_wc(content):
path = r'fzzqhj.TTF'
bg_pic = imread('mo.png') # 读取一张图片文件
image_colors = ImageColorGenerator(bg_pic) # 从背景图片生成颜色值
wc = WordCloud(font_path=path, background_color="white",
mask=bg_pic,
stopwords=STOPWORDS.add("said"),
max_font_size=40,
color_func=image_colors,
random_state=42)
wc = wc.generate(content)
wc.to_file(c.outputs_pictures_path + 'result.jpg')
开发者ID:hzhenpeng,项目名称:ReptileSomething,代码行数:12,代码来源:CatchWorkingReport.py
示例12: plotTwiiterWordCloud
def plotTwiiterWordCloud():
args = sys.argv
tracefile = open(args[2], 'r')
nLines = sum(1 for line in tracefile)
tracefile.seek(0)
dictTerms = dict()
blacklist = STOPWORDS.copy()
blacklist.add('rt')
punctuation = set(string.punctuation)
punctuation.remove('@')
punctuation.remove('&')
# punctuation.remove('#')
for line in tqdm(tracefile, total=nLines):
try:
linesplited = line.split(', ')
tweet = linesplited[6].lower()
for p in punctuation:
tweet = tweet.replace(p, '')
terms = tweet.split(' ')
for t in terms:
if (len(t) > 1) and 'http' not in t and (t not in blacklist):
try:
dictTerms[t] += 1
except KeyError:
dictTerms[t] = 1
except IndexError:
print 'IndexError'
for t in blacklist:
try:
del dictTerms[t]
except KeyError:
continue
popularTerms = sorted(dictTerms.keys(), key=lambda w:dictTerms[w], reverse=True)
popularTerms = [p for p in popularTerms if (dictTerms[p]) > 1]
print len(popularTerms)
text = list()
terms = ''
for p in popularTerms:
text.append((p, dictTerms[p]))
for i in range(dictTerms[p]):
terms += ' ' + p
# print terms
maskfile = 'csgo-icon'
mask = imread(maskfile + '.jpg') # mask=mask
wc = WordCloud(mask=mask, background_color='white', width=1280, height=720).generate(terms) # max_words=10000
default_colors = wc.to_array()
plt.figure()
plt.imshow(default_colors)
plt.axis('off')
plt.savefig(maskfile + '-wordcloud.png', dpi=500, bbox_inches='tight', pad_inches=0) # bbox_inches='tight'
plt.show()
开发者ID:leokassio,项目名称:csgo-analysis,代码行数:52,代码来源:csgo-analysis.py
示例13: makeCloud
def makeCloud(text, imgFile, words):
"""
Makes a word cloud and stores it in a jpeg file
"""
excludewords = STOPWORDS.copy()
for word in words:
excludewords.add(word)
wordcloud = WordCloud(max_words=NUM_OF_WORDS, width=WIDTH, height=HEIGHT, stopwords=excludewords).generate(text)
image = wordcloud.to_image()
image.show()
image.save(imgFile + '.jpeg')
开发者ID:KaushikR,项目名称:SubredditStats,代码行数:13,代码来源:RandiaAnalysis.py
示例14: make_word_cloud
def make_word_cloud(data):
text = ''
for d in data:
text = text + d[0] + ' '
# Generate a word cloud image
wordcloud = WordCloud(stopwords=STOPWORDS.add('watson')).generate(text)
# Display the generated image:
# the matplotlib way:
import matplotlib.pyplot as plt
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
开发者ID:boxcarton,项目名称:candy-log,代码行数:14,代码来源:analyze.py
示例15: writeFreq
def writeFreq(text, outFile, words):
"""
Writes frequencies of words into the specified file
"""
excludewords = STOPWORDS.copy()
for word in words:
excludewords.add(word)
wordcloud = WordCloud(max_words=NUM_OF_WORDS, stopwords=excludewords)
freqList = wordcloud.process_text(text)
for item in freqList:
outFile.write(item[0] + ',' + str(item[1]) + '\n')
开发者ID:KaushikR,项目名称:SubredditStats,代码行数:15,代码来源:RandiaAnalysis.py
示例16: mainProcess
def mainProcess(usernames):
print "Processing "+str(len(usernames)-1)+" usernames"
words4=""
loginFacebook(driver)
timeread=time.time()
time0=time.clock()
for username in usernames:
if len(username) is not 0:
username=username.strip()
time1=time.clock()
count, words3 =produce3(username)
module.Database.edit2(username, count, conn)
time2=time.clock()
words4=words4+" "+words3
time3=time.clock()
timeread=time.time()-timeread
print "TOTAL TIME"
print time3-time0
print timeread
more_stopwords =["ja", "aga", "kui", "siis", "tongue", "nii", "ka", "et", "see", "ma","oma","oli", "emoticon", "ei","ning", "seda", "või", "smile", "grin", "Kas", "kes", "veel"]
for more in more_stopwords:
STOPWORDS.add(more)
utf=["Translation", "nüüd", "või", "ära", "Kas"]
for u in utf:
words4=words4.replace(u, "")
wordcloud = WordCloud(stopwords=STOPWORDS).generate(words4)
image = wordcloud.to_image()
image.save("words.png","PNG")
driver.close()
driver.quit
conn.commit()
conn.close()
print "Done"
开发者ID:tammet,项目名称:fbstalk,代码行数:36,代码来源:fbposts.py
示例17: title_wordcloud
def title_wordcloud(dataFrame):
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
#WordCloud Visualization
text = " ".join(list(dataFrame['track_name']))
STOPWORDS = STOPWORDS.union(["feat","Remix","Edit","Radio","Version","Mix","Remastered"])
spotify_mask = np.array(Image.open(path.join( "spotify-logo.jpg")))
wordcloud = WordCloud(width=2880, height=1800,background_color="white",
stopwords=STOPWORDS,mask = spotify_mask).generate(text)
# Open a plot of the generated image.
plt.figure( figsize=(10,6))
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.savefig("project3_wordcloud.png")
plt.show()
开发者ID:tyty233,项目名称:Music-Classification-and-Ranking-Analysis,代码行数:16,代码来源:Project3_visualization.py
示例18: main
def main():
d = os.path.dirname(__file__)
DOC_NAME = "cvpr2015papers.txt"
text = open(os.path.join(d, DOC_NAME)).read()
# adding computer vision specific stopwords
stopwords = STOPWORDS.copy()
stopwords.add("image")
wc = WordCloud(max_words=300, stopwords=stopwords, width=800, height=400)
wc.generate(text)
wc.to_file(os.path.join(d, "cvpr2015wordcloud.png"))
plt.imshow(wc)
plt.axis("off")
plt.show()
开发者ID:satojkovic,项目名称:cvpr2015wordcloud,代码行数:16,代码来源:cvpr2015wordcloud.py
示例19: generate_word_cloud
def generate_word_cloud(text, mask_filename):
d = path.dirname(__file__) #??
mask = imread(path.join(d, mask_filename))
# adding movie script specific stopwords
stopwords = STOPWORDS.copy()
stopwords.add("info")
stopwords.add("meetbot")
stopwords.add("supybot")
wc = WordCloud(max_words=1000, mask=mask, stopwords=stopwords, margin=10,
random_state=1).generate(text)
_, tmpfilename = tempfile.mkstemp('-wordcloud.png')
wc.to_file(tmpfilename)
return tmpfilename
开发者ID:decause,项目名称:word_cloud,代码行数:16,代码来源:mailcloud.py
示例20: create_cloud
def create_cloud(word, img, out_path):
# Read the whole text.
# text = open(word_path).read()
text = word.read().decode('utf-8')
# read the mask image
# taken from
# http://www.stencilry.org/stencils/movies/alice%20in%20wonderland/255fk.jpg
alice_mask = np.array(Image.open(img))
# alice_mask = np.array(img_path)
wc = WordCloud(font_path = '华文黑体.ttf' ,background_color="white", max_words=2000, mask=alice_mask,
stopwords=STOPWORDS.add("said"), width=1000, height=2300, ranks_only=True, mode='RGBA')
# generate word cloud
wc.generate(text)
# wc.generate_from_frequencies([()])
# store to file
wc.to_file(out_path)
开发者ID:cangfengzhe,项目名称:xiaobaifinder,代码行数:17,代码来源:word_cloud.py
注:本文中的wordcloud.STOPWORDS类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论