本文整理汇总了Python中sentimentfinding.IOtools类的典型用法代码示例。如果您正苦于以下问题:Python IOtools类的具体用法?Python IOtools怎么用?Python IOtools使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IOtools类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: csv2latextable_algorithm
def csv2latextable_algorithm(inpath, outpath, filename, metricname):
header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{9cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline \\bf algorithm \& parameters & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +" \\\ \\hline"
footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{alg-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 27 learning models } \n \
\\end{table}"
ip1 = os.path.join(inpath, filename+".csv")
df = IOtools.readcsv(ip1, keepindex=True)
nrows, ncols = df.shape
rowids = df.index.values.tolist()
out = header+"\n"
for rowid in rowids:
featset = rowid[4:]
featset = "\\verb|"+featset+"|"
out += featset + " & "
#np.round(a, decimals, out)
mean = df.loc[rowid, "mean"]
min = df.loc[rowid, "min"]
max = df.loc[rowid, "max"]
stats = map(lambda x : str(round(x, 5)), [mean, min, max])
statsstr = " & ".join(stats)
out += statsstr + " \\\ \hline " + "\n"
out += footer
IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:33,代码来源:latexhelpers.py
示例2: csv2latextable_featset
def csv2latextable_featset(inpath, outpath, filename, metricname):
header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{5cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline \\bf feature-combined dataset name & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +" \\\ \\hline"
footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{featset-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 8 feature-measure-combined datasets } \n \
\\end{table}"
ip1 = os.path.join(inpath, filename+".csv")
df = IOtools.readcsv(ip1, keepindex=True)
nrows, ncols = df.shape
rowids = df.index.values.tolist()
out = header+"\n"
for rowid in rowids:
featset = rowid.split("**")[0].strip()
featset = "\\verb|"+featset+"|"
out += featset + " & "
#np.round(a, decimals, out)
mean = df.loc[rowid, "mean"]
min = df.loc[rowid, "min"]
max = df.loc[rowid, "max"]
stats = map(lambda x : str(round(x, 5)), [mean, min, max])
statsstr = " & ".join(stats)
out += statsstr + " \\\ \hline " + "\n"
out += footer
IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:32,代码来源:latexhelpers.py
示例3: buildcorpus
def buildcorpus(nfile, ncat, resourcename, path):
resourcepath = path + os.sep + resourcename
catnames = IOtools.getfoldernames_of_dir(resourcepath)[:ncat]
featurematrix = []
doctermmatrix = []
cfdTermDoc = nltk.ConditionalFreqDist()
for catname in catnames:
fileids = []
p = resourcepath + os.sep + catname + os.sep
fileids.extend(IOtools.getfilenames_of_dir(p, removeextension=False)[:nfile])
corpus = CorpusFeatures(fileids, resourcename+os.sep+catname, p)
corpus.getfeatures()
datapoints = corpus.build_featurematrix()
for k,v in datapoints.iteritems():
featurematrix.append([k]+v+[resourcename])
corpus.plot_features()
#doc term matrix
cfd = corpus.build_termmatrix()
for fileid in cfd.conditions():
for term in list(cfd[fileid]):
cfdTermDoc[fileid].inc(term)
IOtools.todisc_matrix(featurematrix, IOtools.results_rootpath+os.sep+"MATRIX"+str(nfile*ncat)+"texts.txt", mode="a")
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:27,代码来源:dataspace.py
示例4: run_copy_from_gold
def run_copy_from_gold():
maincsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_worker.csv"
indf = IOtools.readcsv(maincsvpath)
sourcecsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/expertandgoldannotations/gold-labels3.csv"
sourcedf = IOtools.readcsv(sourcecsvpath)
outfilepath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_wtexts_wmajority_worker.csv"
insert_texts(indf, sourcedf, outfilepath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:7,代码来源:analyse_mturk.py
示例5: get_allfolds_bigdf
def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
folds = IOtools.getfoldernames_of_dir(foldrootpath)
for foldno in folds:
p1 = os.path.join(foldrootpath, foldno)
scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
scorecsvfile = IOtools.readcsv(scorecsvfilepath)
print " scorefile ",scorecsvfilepath," ",scorecsvfile.shape
#rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
rankdf = scorecsvfile.copy()
rankdf["labelunion"] = labelunion
rankdf["featureset"] = featset
rankdf["annottype"] = annottype
rankdf["fold"] = foldno
bigdf = bigdf.append(rankdf)
#dflist.append(rankdf)
print "FOLDROOTPATH ",foldrootpath
outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
IOtools.tocsv(bigdf, outcsvpath, False)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:27,代码来源:performance_evaluation_crossval.py
示例6: prepare_experiment
def prepare_experiment(self, Xpath, ypath, erootpath, labelnames=None):
self.datapath = Xpath
self.labelpath = ypath
#if erootpath:
self.set_score_folder(erootpath)
yvector = IOtools.readcsv(ypath, True)
self.ylabels = yvector.answer.values
yvals = self.ylabels.copy().tolist()
#print "y vals ",yvals
#print "vect ", self.ylabels
if labelnames is None:
labelnames = ["class "+str(i) for i in list(set(yvals))]
instanceids = yvector.index.values.tolist()
datadf = IOtools.readcsv(Xpath, keepindex=True)
datadf = datadf.loc[instanceids, :]
self.X = datadf.values
self.X[np.isnan(self.X)] = 0
self.X[np.isinf(self.X)] = 0
''' do it inside models
if normalize:
self.X = preprocessing.normalize(self.X, axis=0)
'''
''' can't apply standardization as it results in negative entries in the matrix,
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:learner.py
示例7: getwordsandlemmasfromfile
def getwordsandlemmasfromfile():
rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
corpuspath = rootpath + os.sep + "texts/"
outwordspath = rootpath + os.sep + "weightedwords/"
fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
for fileid in fileids:
txt = texter.readtxtfile(corpuspath+os.sep+fileid)
marker = "Haziran 2013"
mark = txt.find(marker) # skip metadata
txt = txt[mark+len(marker):]
words = texter.getwords(txt)
lemmatuples = SAKsParser.findrootsinlexicon(words)
roots = [root for _,root,_ in lemmatuples]
fdwords = nltk.FreqDist(words)
fdroots = nltk.FreqDist(roots)
weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:articleanalysis.py
示例8: report_results
def report_results(self):
self.compute_precision()
self.compute_recall()
self.compute_fmeasure()
self.compute_accuracy()
IOtools.todisc_matrix(self.confusionmatrix, self.folder+os.sep+self.experimentname+".confmat")
f = codecs.open(self.folder+os.sep+self.experimentname+".results", "a", encoding='utf8')
# write report as list not to keep the whole string in memory
header = "\t" + "\t".join(self.catmetrics.keys()) +"\n"
f.write(header)
labelencoding, _ = classfhelpers.classlabelindicing(self.classes) # labeldecoding contains indices
for c in self.classes:
i = labelencoding[c]
line = []
line.append(c)
for metricname in self.catmetrics.keys():
line.append(self.catmetrics[metricname][i])
line = map(lambda x : str(x), line)
outstr = "\t".join(line) + "\n"
f.write(outstr)
f.write("\nAccuracy: "+str(self.accuracy))
f.close()
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:clsshell.py
示例9: get_user_text_distributions
def get_user_text_distributions(self):
#users = range(1, self.ncoders+1)
# 1- get single-annotation list
# 2- get double-annotation list
usertextassignment = {}
singleannot_distribution = Selection()
singleannot_distribution.initialize(self.months, self.resources, self.cats)
for i,user in enumerate(self.coders):
oneuser_distribution, assignment = self.justice_selection(self.nsingle) # will return textids as (newsid-res-cat) # handle selected_texts here
usertextassignment[i] = assignment
singleannot_distribution.update_selection(oneuser_distribution)
# record userassign. and distribution
#self.singles_jsonpath = os.path.join(self.outfolder, "singleannotation_assignments.txt")
IOtools.todisc_json(self.singles_jsonpath, usertextassignment, ind=5)
singleannot_distribution.todisc(os.path.join(self.outfolder, "singleannotation_distribution.txt"))
textassignments = {}
# BURADA numberofdoubleannotatabletexts sayısında bug var. (self.ncoders/2)*self.noverlaps olmalı.
#doubleannot_distribution, textassignments = self.justice_selection(self.ncoders * self.noverlaps)
doubleannot_distribution, textassignments = self.justice_selection(int(self.ncoders / 2.0) * self.noverlaps)
#self.doubles_jsonpath = os.path.join(self.outfolder, "doubleannotation_assignments.txt")
IOtools.todisc_json(self.doubles_jsonpath, textassignments)
doubleannot_distribution.todisc(os.path.join(self.outfolder, "doubleannotation_distribution.txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:annotationbuilder_old.py
示例10: diff_word_lists
def diff_word_lists(list1, list2, outdir, outfilename):
l = list(set(list1) - set(list2))
IOtools.todisc_list(outdir+os.sep+outfilename+".txt", l)
fdist = nltk.FreqDist(l)
IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
return l
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:7,代码来源:articleanalysis.py
示例11: evaluate_crosscorpus
def evaluate_crosscorpus(scoresroot):
featclasses = IOtools.getfoldernames_of_dir(scoresroot)
for featureclass in featclasses:
p1 = os.path.join(scoresroot, featureclass)
lunions = IOtools.getfoldernames_of_dir(p1)
for labelunion in lunions:
p2 = os.path.join(p1, labelunion)
testcases = IOtools.getfoldernames_of_dir(p2)
for testcase in testcases:
p3 = os.path.join(p2, testcase)
traincases = IOtools.getfoldernames_of_dir(p3)
for traincase in traincases:
p4 = os.path.join(p3, traincase) # foldspath
get_allfolds_bigdf(foldrootpath=p4,
annottype=testcase + " ** "+traincase,
featset=featureclass,
labelunion=labelunion)
get_fold_averages(p4)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:29,代码来源:performance_evaluation_crossval.py
示例12: get_randomly_annotated_set
def get_randomly_annotated_set(incsvfilename, outcsvfilename,
incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath,
randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename)) # df cols: questionname,userid,answer
randomdf= df.copy()
numofrows, _ = randomdf.values.shape
subjvalues = randomchoicevalues
randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
randomdf.loc[:, "answer"] = randomanswers
# extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
notknowingrows = random.sample(range(numofrows), 5)
'''
for _ in range(5):
randindex = random.randint(0, numofrows-1)
while randindex in notknowingrows:
randindex = random.randint(0, numofrows-1)
notknowingrows.append(randindex)
'''
#notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)] # be careful with this 5 number it is subject to change for the sake of statistical validity
randomdf.loc[notknowingrows, "answer"] = 5
IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:goldsetbuilder.py
示例13: metadata_tabular
def metadata_tabular():
rpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/wordletest/words/temp/"
metadf = pd.read_csv(rpath+"/metadocs.csv", index_col=None, sep="\t")
print metadf.loc[0,"Author"]
metadf = metadf.sort(["Polarity", "Date", "Author"], ascending=[False, True, True])
v = metadf.iloc[0,:]
print v.loc["Author"],v.loc["Resource"]
header = "\\begin{tabular}{l | c | c | c | c } \n \
kategori & yazar & başlık & tarih & yayın \\\\ \n \
\\hline \\hline \n"
end = "\\end{tabular}"
outltx = ""
numofdocs, fields = metadf.shape
for i in range(numofdocs):
row = metadf.iloc[i,:]
cat = row.loc["Polarity"]
cat = "\\textbf{"+cat+"}"
author = row.loc["Author"]
title = row.loc["Title"]
link = row.loc["Link"]
date = row.loc["Date"]
resource = row.loc["Resource"]
title = "\\href{"+link+"}{"+title+"}"
date = "\\textit{"+date+"}"
resource = "@"+resource
s = " & ".join([cat, author, title, date, resource])
outltx = outltx + s + "\\\\ \n \\hline \n"
outltx = header + outltx + end
IOtools.todisc_txt(outltx, rpath+"docswordle_tableLaTeX.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:35,代码来源:preparelatex.py
示例14: add_resource_label
def add_resource_label(matrixpath, datasetname, replacelabel=False, headers=True):
matrixlines = IOtools.readtextlines(matrixpath) # 1st item=fileid, lastitem=filecat.
newmatrix = []
if headers:
matrixlines = matrixlines[2:]
for instance in matrixlines:
items = instance.split()
fileid = items[0]
print instance,
path = datapath+os.sep+datasetname
foldernames = IOtools.getfoldernames_of_dir(datapath+os.sep+datasetname)
#print foldernames
for folder in foldernames:
allfileids = IOtools.getfilenames_of_dir(path+os.sep+folder, removeextension=False)
#print allfileids
if fileid in allfileids:
newspath = path+os.sep+folder+os.sep+fileid
resourcename = texter.getnewsmetadata(newspath, ["resource"])["resource"]
#print "## ",resourcename," ",type(instance)," ~~ ",instance
if replacelabel: items = items[:-1]
newmatrix.append(items +[resourcename])
break
return newmatrix
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:matrixhandler.py
示例15: get_AllObj_AllSubj_class
def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:arrange_class_unions.py
示例16: get_2_classes
def get_2_classes(labelrootpath, taggertype, in_NC=5):
out_NC = 2
if in_NC <= out_NC:
return
originallabelspath = os.path.join(labelrootpath, "NC"+metaexperimentation.intrafeatsep+str(in_NC), taggertype+".csv")
labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
outlabelspath = os.path.join(ensure_nclass_dir(labelrootpath, out_NC), taggertype+".csv")
labelvector = labeldf.values
labelvector = np.array(labelvector, dtype=object)
# replace values 12->"sub"; 34->"obj"
labelvector[labelvector == 1] = 12
labelvector[labelvector == 2] = 12
labelvector[labelvector == 3] = 34
labelvector[labelvector == 4] = 34
for i,_ in enumerate(labelvector):
if labelvector[i] == 5:
labelvector[i] = random.choice([12, 34])
twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
IOtools.tocsv(twolabeldf, outlabelspath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:26,代码来源:arrange_N_classes.py
示例17: exclude_one_feature
def exclude_one_feature(self):
exclusionmap = utils.get_excluded_features_map()
for exclusionname, featuremap in exclusionmap.iteritems():
p1 = IOtools.ensure_dir(os.path.join(self.combinedfeaturesfolder, exclusionname))
for featuregroup, combcodemap in featuremap.iteritems():
p2 = IOtools.ensure_dir(os.path.join(p1, featuregroup))
for combcode, row in combcodemap.iteritems():
featuredflist = []
for j,featno in enumerate(row):
print combcode[:8]," ",row, " featno= ",featno
if featno >= 0:
groupname = sorted(self.featuremap.keys())[j]
print " -> ",groupname
extractorinstance = self.featuremap[groupname][featno]
featurematrixpath = extractorinstance.getfeaturematrixpath
featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
featuredflist.append(featurematrix)
datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
datamatrixpath = os.path.join(p2, combcode+".csv")
IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:features_combiner.py
示例18: assign_annotator_aggreement
def assign_annotator_aggreement(doubleannotated_path, doubleannot_filename):
csvpath = os.path.join(doubleannotated_path, doubleannot_filename)
doubleannotatedcsv = IOtools.readcsv(csvpath)
nrows, _ = doubleannotatedcsv.shape
doubleannotated_full4class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
doubleannotated_half2class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
# get full agreed and half agreed annotations:
for i in range(nrows):
answer1 = doubleannotatedcsv.loc[i, "answer1"]
answer2 = doubleannotatedcsv.loc[i, "answer2"]
if answer1 == answer2:
doubleannotated_full4class.loc[i, "answer"] = answer1
if answer1 in [1,2] and answer2 in [1,2]: # elif?
doubleannotated_half2class.loc[i, "answer"] = 12
elif answer1 in [3,4] and answer2 in [3,4]:
doubleannotated_half2class.loc[i, "answer"] = 34
# filtrate non-agreeing rows:
doubleannotated_full4class = doubleannotated_full4class[doubleannotated_full4class["answer"] > 0]
csvpath1 = os.path.join(doubleannotated_path, "doubleannotated_fullagr4class.csv")
IOtools.tocsv(doubleannotated_full4class, csvpath1)
doubleannotated_half2class = doubleannotated_half2class[doubleannotated_half2class["answer"] > 0]
csvpath2 = os.path.join(doubleannotated_path, "doubleannotated_halfagr2class.csv")
IOtools.tocsv(doubleannotated_half2class, csvpath2)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:29,代码来源:goldsetbuilder.py
示例19: img_ref_captions
def img_ref_captions():
rpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/wordletest/words/temp/"
metadf = pd.read_csv(rpath+"/metadocs.csv", index_col=None, sep="\t")
header = '\\begin{figure}[ht] \n \
\subfigure[frequency weighted word cloud]{ \n \
\includegraphics[width=3.5in]{pics_docs/freq_'
middle1 = '.png}} \n \
\quad \
\subfigure[tfidf weighted word cloud]{ \n \
\includegraphics[width=3in]{pics_docs/tfidf_'
middle2 = ".png}} \n \
\caption{ "
end = "\end{figure}"
outltx = ""
numofdocs, fields = metadf.shape
for i in range(numofdocs):
filename = metadf.loc[i, "filename"]
author = metadf.loc[i, "Author"]
title = metadf.loc[i, "Title"]
link = metadf.loc[i, "Link"]
date = metadf.loc[i, "Date"]
resource = metadf.loc[i, "Resource"]
caps_link = "\href{" + link + "}"
caps_a = "{\\textit{" + author + "}, " + title + ", \\textit{" + date + "} - @" + resource + "} }\n"
figtxt = header + filename + middle1 + filename + middle2 + caps_link + caps_a + end
outltx = outltx + figtxt + "\n\n"
IOtools.todisc_txt(outltx, rpath+"docswordle_figLaTeX.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:35,代码来源:preparelatex.py
示例20: visualize_monthly_news_stats2
def visualize_monthly_news_stats2(csvfolder=metacorpus.statspath, csvname=metacorpus.prunedmetafilename,
imgoutpath=metacorpus.imgfolder,
rescatmap=metacorpus.resourcecategorymap2):
colldf = IOtools.readcsv(csvfolder+os.sep+csvname)
numoftexts, _ = colldf.values.shape
# daily news counts for resources
cfddailyresourcecount = ConditionalFreqDist((colldf.loc[i,"date"], colldf.loc[i,"resource"].strip()) for i in range(numoftexts))
CFDhelpers.cfd2csv(cfddailyresourcecount, csvfolder+os.sep+"cfddailyresourcecount2.csv", colnames=['date','resource','count'])
#cfdresourcesdaycount = ConditionalFreqDist((resource, day) for day in cfddailyresourcecount.conditions() for resource in list(cfddailyresourcecount[day]))
# daily news counts for categories
cfddailycategorycount = ConditionalFreqDist((colldf.loc[i,"date"],
"_".join(map(lambda x : str(x).strip(), [colldf.loc[i, "resource"], colldf.loc[i, "category"]]))) for i in range(numoftexts))
CFDhelpers.cfd2csv(cfddailycategorycount, csvfolder+os.sep+"cfddailycategorycount2.csv", ["date", "category", 'count'])
#cfdcatsdaycount = ConditionalFreqDist((category, date) for date in cfddailycategorycount.conditions() for category in list(cfddailycategorycount[date]))
# visualize monthly --- assuming the dates are of the form yyyy-mm-dd -we did it so while recording
cfdmonthlyresourcecount = ConditionalFreqDist((colldf.loc[i,"date"][:-3], colldf.loc[i,"resource"].strip()) for i in range(numoftexts))
CFDhelpers.cfd2csv(cfdmonthlyresourcecount, csvfolder+os.sep+"cfdmonthlyresourcecount.csv", colnames=['month','resource','count'])
#cfdresourcesmonthcount = ConditionalFreqDist((resource, month) for month in cfdmonthlyresourcecount.conditions() for resource in list(cfdmonthlyresourcecount[month]))
imgpath = IOtools.ensure_dir(os.path.join(imgoutpath, "resourcebasednewscount"))
visualize_monthly_cfd(cfd=cfdmonthlyresourcecount, figuretitle="Monthly news count for each resource", ylabel="news published", imgoutpath=imgpath)
cfdmonthlycategorycount = ConditionalFreqDist((colldf.loc[i,"date"][:-3],
"-".join(map(lambda x : str(x).strip(), [colldf.loc[i, "resource"], colldf.loc[i, "category"]])))
for i in range(numoftexts))
CFDhelpers.cfd2csv(cfdmonthlycategorycount, csvfolder+os.sep+"cfdmonthlycategorycount.csv", ["month", "category", 'count'])
#cfdcatsmonthcount = ConditionalFreqDist((category, month) for month in cfdmonthlycategorycount.conditions() for category in list(cfdmonthlycategorycount[month]))
imgpath = IOtools.ensure_dir(os.path.join(imgoutpath, "categorybasednewscount"))
for canoniccatname, rescatnamedct in rescatmap.iteritems():
monthresourcepairs = []
for resourcename, origcats in rescatnamedct.iteritems():
for origcatname in origcats:
#resourcename = rescat.split("-")[0]
rescat = "-".join([resourcename, origcatname])
for month in cfdmonthlycategorycount.conditions():
numofoccurrences = cfdmonthlycategorycount[month][rescat]
#print resourcename," had ",numofoccurrences," times texts in :",rescat," during ",month
for i in range(numofoccurrences):
monthresourcepairs.append((month, resourcename))
cfdmonthlyresourcecount_percat = ConditionalFreqDist(monthresourcepairs)
print canoniccatname,resourcename," * ",rescat," : ",len(cfdmonthlyresourcecount_percat.conditions())," ",cfdmonthlyresourcecount_percat.N()
figuretitle = "Monthly news count of each resource over category "+canoniccatname.upper()
visualize_monthly_cfd(cfdmonthlyresourcecount_percat, figuretitle, ylabel="news published", imgoutpath=imgpath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:59,代码来源:collectionstats.py
注:本文中的sentimentfinding.IOtools类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论