• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python sentimentfinding.IOtools类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sentimentfinding.IOtools的典型用法代码示例。如果您正苦于以下问题:Python IOtools类的具体用法?Python IOtools怎么用?Python IOtools使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了IOtools类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: csv2latextable_algorithm

def csv2latextable_algorithm(inpath, outpath, filename, metricname):
    
    header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{9cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline  \\bf algorithm \& parameters & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +"   \\\ \\hline"
    
    footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{alg-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 27 learning models } \n \
\\end{table}"
    
    ip1 = os.path.join(inpath, filename+".csv")
    df = IOtools.readcsv(ip1, keepindex=True)
    nrows, ncols = df.shape
    rowids = df.index.values.tolist()
    
    out = header+"\n"
    for rowid in rowids:
        featset = rowid[4:]
        featset = "\\verb|"+featset+"|"
        
        out += featset + " & "
        #np.round(a, decimals, out)
        mean = df.loc[rowid, "mean"]
        min = df.loc[rowid, "min"]
        max = df.loc[rowid, "max"]
        stats = map(lambda x : str(round(x, 5)), [mean, min, max])
        statsstr = " & ".join(stats)
        out += statsstr + " \\\ \hline " + "\n"
    
    out += footer
    IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:33,代码来源:latexhelpers.py


示例2: csv2latextable_featset

def csv2latextable_featset(inpath, outpath, filename, metricname):
    
    header = "\\begin{table}[h] \n \
\\begin{center} \n \
\\begin{tabular}{|p{5cm}|p{2cm}|p{2cm}|p{2cm}|} \n \
\\hline  \\bf feature-combined dataset name & \\bf mean "+ metricname +" & \\bf minimum "+ metricname +" & \\bf maximum "+ metricname +"   \\\ \\hline"
    
    footer = "\\end{tabular} \n \
\\end{center} \n \
\\caption{\\label{featset-"+metricname[:4]+"-stats} Mean, maximum and minimum "+metricname+" results for 8 feature-measure-combined datasets } \n \
\\end{table}"
    
    ip1 = os.path.join(inpath, filename+".csv")
    df = IOtools.readcsv(ip1, keepindex=True)
    nrows, ncols = df.shape
    rowids = df.index.values.tolist()
    
    out = header+"\n"
    for rowid in rowids:
        featset = rowid.split("**")[0].strip()
        featset = "\\verb|"+featset+"|"
        out += featset + " & "
        #np.round(a, decimals, out)
        mean = df.loc[rowid, "mean"]
        min = df.loc[rowid, "min"]
        max = df.loc[rowid, "max"]
        stats = map(lambda x : str(round(x, 5)), [mean, min, max])
        statsstr = " & ".join(stats)
        out += statsstr + " \\\ \hline " + "\n"
    
    out += footer
    IOtools.todisc_txt(out, os.path.join(outpath, filename+".txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:32,代码来源:latexhelpers.py


示例3: buildcorpus

def buildcorpus(nfile, ncat, resourcename, path):
    resourcepath = path + os.sep + resourcename
    catnames = IOtools.getfoldernames_of_dir(resourcepath)[:ncat]
    
    featurematrix = []
    doctermmatrix = []
    cfdTermDoc = nltk.ConditionalFreqDist()
    
    for catname in catnames:
        fileids = []
        p = resourcepath + os.sep + catname + os.sep
        fileids.extend(IOtools.getfilenames_of_dir(p, removeextension=False)[:nfile])
        corpus = CorpusFeatures(fileids, resourcename+os.sep+catname, p)
        corpus.getfeatures()
        datapoints = corpus.build_featurematrix()
        for k,v in datapoints.iteritems():
            featurematrix.append([k]+v+[resourcename])
            
        corpus.plot_features()
        
        #doc term matrix
        cfd = corpus.build_termmatrix()
        for fileid in cfd.conditions():
            for term in list(cfd[fileid]):
                cfdTermDoc[fileid].inc(term)
    
    IOtools.todisc_matrix(featurematrix, IOtools.results_rootpath+os.sep+"MATRIX"+str(nfile*ncat)+"texts.txt", mode="a")
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:27,代码来源:dataspace.py


示例4: run_copy_from_gold

def run_copy_from_gold():
    maincsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_worker.csv"
    indf = IOtools.readcsv(maincsvpath)
    sourcecsvpath = "/home/dicle/Dropbox/ukp/fallacy_detection/expertandgoldannotations/gold-labels3.csv"
    sourcedf = IOtools.readcsv(sourcecsvpath)
    outfilepath = "/home/dicle/Dropbox/ukp/fallacy_detection/mturk_annotations/annotationdf_wtexts_wmajority_worker.csv"
    insert_texts(indf, sourcedf, outfilepath)    
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:7,代码来源:analyse_mturk.py


示例5: get_allfolds_bigdf

def get_allfolds_bigdf(foldrootpath, annottype, featset, labelunion):
    
    bigdf = pd.DataFrame(columns=metaexperimentation.performanceheader)
    
    folds = IOtools.getfoldernames_of_dir(foldrootpath)
                        
    for foldno in folds:
        p1 = os.path.join(foldrootpath, foldno)
                                    
        scorecsvfilepath = p1 + os.sep + metaexperimentation.scorefilename+".csv"
        scorecsvfile = IOtools.readcsv(scorecsvfilepath)
        
        print " scorefile ",scorecsvfilepath,"  ",scorecsvfile.shape
        
        #rankdf = matrixhelpers.get_first_N_rows(scorecsvfile, int(N / 2), metricnames, ascend=takeworst)
        rankdf = scorecsvfile.copy()
        rankdf["labelunion"] = labelunion
        rankdf["featureset"] = featset 
        rankdf["annottype"] = annottype
        rankdf["fold"] = foldno
        bigdf = bigdf.append(rankdf)
        #dflist.append(rankdf)
    
    
    print "FOLDROOTPATH ",foldrootpath
    outcsvpath = os.path.join(foldrootpath, "bigdf.csv")
    IOtools.tocsv(bigdf, outcsvpath, False)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:27,代码来源:performance_evaluation_crossval.py


示例6: prepare_experiment

    def prepare_experiment(self, Xpath, ypath, erootpath, labelnames=None):
        
        self.datapath = Xpath
        self.labelpath = ypath
        
        #if erootpath:
        self.set_score_folder(erootpath)
        
        yvector = IOtools.readcsv(ypath, True)
        self.ylabels = yvector.answer.values
        yvals = self.ylabels.copy().tolist()
        #print "y vals ",yvals
        #print "vect ", self.ylabels
        if labelnames is None:
            labelnames = ["class "+str(i) for i in list(set(yvals))]

        
        instanceids = yvector.index.values.tolist()
        datadf = IOtools.readcsv(Xpath, keepindex=True)
        datadf = datadf.loc[instanceids, :]
              
        self.X = datadf.values   
        self.X[np.isnan(self.X)] = 0
        self.X[np.isinf(self.X)] = 0
        
        '''  do it inside models
        if normalize:
            self.X = preprocessing.normalize(self.X, axis=0)
        '''
        '''  can't apply standardization as it results in negative entries in the matrix, 
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:learner.py


示例7: getwordsandlemmasfromfile

def getwordsandlemmasfromfile():
    rootpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/"
    
    corpuspath = rootpath + os.sep + "texts/"
    outwordspath = rootpath + os.sep + "weightedwords/"
    
    fileids = IOtools.getfilenames_of_dir(corpuspath, removeextension=False)
    
    
    
    for fileid in fileids:
        txt = texter.readtxtfile(corpuspath+os.sep+fileid)
        
        marker = "Haziran 2013"
        mark = txt.find(marker)    # skip metadata
        txt = txt[mark+len(marker):]
        
        words = texter.getwords(txt)
        lemmatuples = SAKsParser.findrootsinlexicon(words)
        roots = [root for _,root,_ in lemmatuples]
        
        fdwords = nltk.FreqDist(words)
        
        fdroots = nltk.FreqDist(roots)
        
        weightedwords = [word+"\t"+str(fdwords[word]) for word in list(fdwords)]
        weightedroots = [root+"\t"+str(fdroots[root]) for root in list(fdroots)]
               
        IOtools.todisc_list(outwordspath+os.sep+"lemma"+os.sep+fileid, weightedwords)
        IOtools.todisc_list(outwordspath+os.sep+"root"+os.sep+fileid, weightedroots)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:30,代码来源:articleanalysis.py


示例8: report_results

 def report_results(self):
     self.compute_precision()
     self.compute_recall()
     self.compute_fmeasure()
     self.compute_accuracy()
     
     IOtools.todisc_matrix(self.confusionmatrix, self.folder+os.sep+self.experimentname+".confmat")
     
     
     
     f = codecs.open(self.folder+os.sep+self.experimentname+".results", "a", encoding='utf8')
     # write report as list not to keep the whole string in memory
     header = "\t" + "\t".join(self.catmetrics.keys()) +"\n"
     f.write(header)
     
     labelencoding, _ = classfhelpers.classlabelindicing(self.classes)    # labeldecoding contains indices
     for c in self.classes:
         i = labelencoding[c]
         line = []
         line.append(c)
         for metricname in self.catmetrics.keys():
             line.append(self.catmetrics[metricname][i])
         line = map(lambda x : str(x), line)
         outstr = "\t".join(line) + "\n"
         f.write(outstr)
     f.write("\nAccuracy: "+str(self.accuracy))
     f.close()
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:clsshell.py


示例9: get_user_text_distributions

 def get_user_text_distributions(self):
     #users = range(1, self.ncoders+1)
     
     # 1- get single-annotation list
     # 2- get double-annotation list
     
     usertextassignment = {}
     singleannot_distribution = Selection()
     singleannot_distribution.initialize(self.months, self.resources, self.cats)
     for i,user in enumerate(self.coders):
         oneuser_distribution, assignment = self.justice_selection(self.nsingle)   # will return textids as (newsid-res-cat) # handle selected_texts here
         usertextassignment[i] = assignment
         singleannot_distribution.update_selection(oneuser_distribution)
     
     # record userassign. and distribution
     #self.singles_jsonpath = os.path.join(self.outfolder, "singleannotation_assignments.txt")
     IOtools.todisc_json(self.singles_jsonpath, usertextassignment, ind=5)
     singleannot_distribution.todisc(os.path.join(self.outfolder, "singleannotation_distribution.txt"))
     
     
     textassignments = {}
     # BURADA numberofdoubleannotatabletexts sayısında bug var. (self.ncoders/2)*self.noverlaps olmalı.
     #doubleannot_distribution, textassignments = self.justice_selection(self.ncoders * self.noverlaps)
     doubleannot_distribution, textassignments = self.justice_selection(int(self.ncoders / 2.0) * self.noverlaps)
     
     #self.doubles_jsonpath = os.path.join(self.outfolder, "doubleannotation_assignments.txt")
     IOtools.todisc_json(self.doubles_jsonpath, textassignments)
     doubleannot_distribution.todisc(os.path.join(self.outfolder, "doubleannotation_distribution.txt"))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:annotationbuilder_old.py


示例10: diff_word_lists

def diff_word_lists(list1, list2, outdir, outfilename):
    l = list(set(list1) - set(list2))
    IOtools.todisc_list(outdir+os.sep+outfilename+".txt", l)
    
    fdist = nltk.FreqDist(l)
    IOtools.todisc_freqdist(outdir+os.sep+"weighted-"+outfilename+".txt", fdist)
    return l
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:7,代码来源:articleanalysis.py


示例11: evaluate_crosscorpus

def evaluate_crosscorpus(scoresroot):
    
    featclasses = IOtools.getfoldernames_of_dir(scoresroot)
    
    for featureclass in featclasses:
        
        p1 = os.path.join(scoresroot, featureclass)
        lunions = IOtools.getfoldernames_of_dir(p1)
        
        for labelunion in lunions:
            
            p2 = os.path.join(p1, labelunion)

            testcases = IOtools.getfoldernames_of_dir(p2)
            
            for testcase in testcases:
                
                p3 = os.path.join(p2, testcase)
                traincases = IOtools.getfoldernames_of_dir(p3)
                
                for traincase in traincases:
                    
                    p4 = os.path.join(p3, traincase)   # foldspath
                    get_allfolds_bigdf(foldrootpath=p4, 
                                       annottype=testcase + " ** "+traincase, 
                                       featset=featureclass, 
                                       labelunion=labelunion)
                    
                    get_fold_averages(p4)
开发者ID:dicleoztur,项目名称:tez0.1v,代码行数:29,代码来源:performance_evaluation_crossval.py


示例12: get_randomly_annotated_set

def get_randomly_annotated_set(incsvfilename, outcsvfilename,
                               incsvfolder=metacorpus.userannotatedpath, outcsvfolder=metacorpus.randomannotatedpath, 
                               randomchoicevalues=metacorpus.subjectivity_label_values.keys()):
    df = IOtools.readcsv(os.path.join(incsvfolder, incsvfilename))  # df cols: questionname,userid,answer
    randomdf= df.copy()
    numofrows, _ = randomdf.values.shape
    subjvalues = randomchoicevalues
    
    randomanswers = [random.choice(subjvalues) for _ in range(numofrows)]
    randomdf.loc[:, "answer"] = randomanswers
    
    # extra: assign 5 of the rows the value 5 for the answer 'no idea, ambiguous'
    notknowingrows = random.sample(range(numofrows), 5)
    
    '''
    for _ in range(5):
        randindex = random.randint(0, numofrows-1)
        while randindex in notknowingrows:
            randindex = random.randint(0, numofrows-1)
        notknowingrows.append(randindex)
    '''        
    #notknowingrows = [random.randint(0, numofrows-1) for _ in range(5)]  # be careful with this 5 number it is subject to change for the sake of statistical validity
    randomdf.loc[notknowingrows, "answer"] = 5
    
    IOtools.tocsv(randomdf, os.path.join(outcsvfolder, outcsvfilename))
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:goldsetbuilder.py


示例13: metadata_tabular

def metadata_tabular():
    rpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/wordletest/words/temp/"
    metadf = pd.read_csv(rpath+"/metadocs.csv", index_col=None, sep="\t")
    
    print metadf.loc[0,"Author"]
    metadf = metadf.sort(["Polarity", "Date", "Author"], ascending=[False, True, True])
    v = metadf.iloc[0,:]
    print v.loc["Author"],v.loc["Resource"]
    
    header = "\\begin{tabular}{l  | c | c | c | c } \n \
kategori & yazar & başlık & tarih & yayın \\\\ \n \
\\hline \\hline \n"

    end = "\\end{tabular}"
    outltx = ""
    numofdocs, fields = metadf.shape
    for i in range(numofdocs):
        row = metadf.iloc[i,:]
        cat = row.loc["Polarity"]
        cat = "\\textbf{"+cat+"}"
        author = row.loc["Author"]
        title = row.loc["Title"]
        link = row.loc["Link"]
        date = row.loc["Date"]
        resource = row.loc["Resource"]
        
        title = "\\href{"+link+"}{"+title+"}"
        date = "\\textit{"+date+"}"
        resource = "@"+resource
        
        s = " & ".join([cat, author, title, date, resource])
        outltx = outltx + s + "\\\\ \n \\hline \n"
    
    outltx = header + outltx + end
    IOtools.todisc_txt(outltx, rpath+"docswordle_tableLaTeX.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:35,代码来源:preparelatex.py


示例14: add_resource_label

def add_resource_label(matrixpath, datasetname, replacelabel=False, headers=True):
    matrixlines = IOtools.readtextlines(matrixpath)  # 1st item=fileid, lastitem=filecat.
    
    newmatrix = []
    
    if headers:
        matrixlines = matrixlines[2:]
    
    for instance in matrixlines:
        items = instance.split()
        fileid = items[0]
        print instance,
        path = datapath+os.sep+datasetname
        foldernames = IOtools.getfoldernames_of_dir(datapath+os.sep+datasetname)
        #print foldernames
        for folder in foldernames:
            allfileids = IOtools.getfilenames_of_dir(path+os.sep+folder, removeextension=False)
            #print allfileids
            if fileid in allfileids:
                newspath = path+os.sep+folder+os.sep+fileid
                resourcename = texter.getnewsmetadata(newspath, ["resource"])["resource"]
                #print "## ",resourcename,"  ",type(instance),"  ~~ ",instance
                
                if replacelabel: items = items[:-1]
                newmatrix.append(items +[resourcename])
                break
    
    return newmatrix
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:28,代码来源:matrixhandler.py


示例15: get_AllObj_AllSubj_class

def get_AllObj_AllSubj_class(originallabelspath, outfolder, in_NC=5):
    
    out_NC = 2
    if in_NC <= out_NC:
        return

    labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
    
    outpath = os.path.join(ensure_unionclass_dir(outfolder, "ALLobj-ALLsubj", out_NC), metacorpus.labelsfilename + ".csv")
    
    labelvector = labeldf.values
    labelvector = np.array(labelvector, dtype=object)
    
    # replace values  12->"sub"; 34->"obj"
    labelvector[labelvector == 1] = 12
    labelvector[labelvector == 2] = 12
    labelvector[labelvector == 3] = 34
    labelvector[labelvector == 4] = 34
    
    for i,_ in enumerate(labelvector):
        if labelvector[i] == 5:
            labelvector[i] = random.choice([12, 34])
    
    twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
    IOtools.tocsv(twolabeldf, outpath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:25,代码来源:arrange_class_unions.py


示例16: get_2_classes

def get_2_classes(labelrootpath, taggertype, in_NC=5):
    
    out_NC = 2
    if in_NC <= out_NC:
        return
    
    originallabelspath = os.path.join(labelrootpath, "NC"+metaexperimentation.intrafeatsep+str(in_NC), taggertype+".csv")
    labeldf = IOtools.readcsv(originallabelspath, keepindex=True)
    
    outlabelspath = os.path.join(ensure_nclass_dir(labelrootpath, out_NC), taggertype+".csv")
    
    labelvector = labeldf.values
    labelvector = np.array(labelvector, dtype=object)
    
    # replace values  12->"sub"; 34->"obj"
    labelvector[labelvector == 1] = 12
    labelvector[labelvector == 2] = 12
    labelvector[labelvector == 3] = 34
    labelvector[labelvector == 4] = 34
    
    for i,_ in enumerate(labelvector):
        if labelvector[i] == 5:
            labelvector[i] = random.choice([12, 34])
    
    twolabeldf = pd.DataFrame(labelvector, columns=labeldf.columns.values.tolist(), index=labeldf.index.values.tolist())
    IOtools.tocsv(twolabeldf, outlabelspath, keepindex=True)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:26,代码来源:arrange_N_classes.py


示例17: exclude_one_feature

   def exclude_one_feature(self):
 
       exclusionmap = utils.get_excluded_features_map()
       
       for exclusionname, featuremap in exclusionmap.iteritems():
           p1 = IOtools.ensure_dir(os.path.join(self.combinedfeaturesfolder, exclusionname))
           
           for featuregroup, combcodemap in featuremap.iteritems():
               p2 = IOtools.ensure_dir(os.path.join(p1, featuregroup))
               
               for combcode, row in combcodemap.iteritems():
                   featuredflist = []
                   
                   for j,featno in enumerate(row):
                       print combcode[:8],"  ",row, " featno= ",featno
                       if featno >= 0:
                           groupname = sorted(self.featuremap.keys())[j]
                           print " -> ",groupname           
                           extractorinstance = self.featuremap[groupname][featno]
                           featurematrixpath = extractorinstance.getfeaturematrixpath
                           featurematrix = IOtools.readcsv(featurematrixpath, keepindex=True)
                           featuredflist.append(featurematrix)
                    
                   datamatrix = pd.concat(featuredflist, axis=1) #, verify_integrity=True) # CLOSED DUE TO THE OVERLAPPING WORDS IN ABS AND SUBJ LISTS
                   
                   datamatrixpath = os.path.join(p2, combcode+".csv")  
                   IOtools.tocsv(datamatrix, datamatrixpath, keepindex=True)          
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:27,代码来源:features_combiner.py


示例18: assign_annotator_aggreement

def assign_annotator_aggreement(doubleannotated_path, doubleannot_filename):
    csvpath = os.path.join(doubleannotated_path, doubleannot_filename)
    doubleannotatedcsv = IOtools.readcsv(csvpath)
    nrows, _ = doubleannotatedcsv.shape
    
    doubleannotated_full4class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
    doubleannotated_half2class = doubleannotatedcsv.loc[:, ["questionname", "answer"]].copy()
    
    # get full agreed and half agreed annotations:
    for i in range(nrows):
        answer1 = doubleannotatedcsv.loc[i, "answer1"]
        answer2 = doubleannotatedcsv.loc[i, "answer2"]
    
        if answer1 == answer2:
            doubleannotated_full4class.loc[i, "answer"] = answer1
        if answer1 in [1,2] and answer2 in [1,2]:   # elif?
            doubleannotated_half2class.loc[i, "answer"] = 12
        elif answer1 in [3,4] and answer2 in [3,4]:
            doubleannotated_half2class.loc[i, "answer"] = 34
    
    
    # filtrate non-agreeing rows:
    doubleannotated_full4class = doubleannotated_full4class[doubleannotated_full4class["answer"] > 0]
    csvpath1 = os.path.join(doubleannotated_path, "doubleannotated_fullagr4class.csv")
    IOtools.tocsv(doubleannotated_full4class, csvpath1)
    
    doubleannotated_half2class = doubleannotated_half2class[doubleannotated_half2class["answer"] > 0]
    csvpath2 = os.path.join(doubleannotated_path, "doubleannotated_halfagr2class.csv")
    IOtools.tocsv(doubleannotated_half2class, csvpath2)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:29,代码来源:goldsetbuilder.py


示例19: img_ref_captions

def img_ref_captions():
    rpath = "/home/dicle/Dicle/Tez/geziyakurdiproject/corpus2/ldatests22Temmuz/wordletest/words/temp/"
    metadf = pd.read_csv(rpath+"/metadocs.csv", index_col=None, sep="\t")
    
    header = '\\begin{figure}[ht] \n \
\subfigure[frequency weighted word cloud]{ \n \
 \includegraphics[width=3.5in]{pics_docs/freq_'
 
    middle1 = '.png}} \n \
\quad \
\subfigure[tfidf weighted word cloud]{ \n \
 \includegraphics[width=3in]{pics_docs/tfidf_'
 
    middle2 = ".png}} \n \
                \caption{ " 

    end = "\end{figure}"
           
    outltx = ""
    numofdocs, fields = metadf.shape
    for i in range(numofdocs):
        filename = metadf.loc[i, "filename"]
        author = metadf.loc[i, "Author"]
        title = metadf.loc[i, "Title"]
        link = metadf.loc[i, "Link"]
        date = metadf.loc[i, "Date"]
        resource = metadf.loc[i, "Resource"]
        
        caps_link = "\href{" + link + "}"
        caps_a = "{\\textit{" + author + "}, " + title + ", \\textit{" + date + "} - @" + resource + "} }\n"
        figtxt = header + filename + middle1 + filename + middle2 + caps_link + caps_a + end
        
        outltx = outltx + figtxt + "\n\n"
    
    IOtools.todisc_txt(outltx, rpath+"docswordle_figLaTeX.txt")
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:35,代码来源:preparelatex.py


示例20: visualize_monthly_news_stats2

def visualize_monthly_news_stats2(csvfolder=metacorpus.statspath, csvname=metacorpus.prunedmetafilename,
                                 imgoutpath=metacorpus.imgfolder,
                                 rescatmap=metacorpus.resourcecategorymap2):
    colldf = IOtools.readcsv(csvfolder+os.sep+csvname)
    
    numoftexts, _ = colldf.values.shape
    
    
    # daily news counts for resources
    cfddailyresourcecount = ConditionalFreqDist((colldf.loc[i,"date"], colldf.loc[i,"resource"].strip()) for i in range(numoftexts))
    CFDhelpers.cfd2csv(cfddailyresourcecount, csvfolder+os.sep+"cfddailyresourcecount2.csv", colnames=['date','resource','count'])
    #cfdresourcesdaycount = ConditionalFreqDist((resource, day) for day in cfddailyresourcecount.conditions() for resource in list(cfddailyresourcecount[day]))
    
    
    # daily news counts for categories
    cfddailycategorycount = ConditionalFreqDist((colldf.loc[i,"date"], 
                                                 "_".join(map(lambda x : str(x).strip(), [colldf.loc[i, "resource"], colldf.loc[i, "category"]]))) for i in range(numoftexts)) 
    CFDhelpers.cfd2csv(cfddailycategorycount, csvfolder+os.sep+"cfddailycategorycount2.csv", ["date", "category", 'count'])
    #cfdcatsdaycount = ConditionalFreqDist((category, date) for date in cfddailycategorycount.conditions() for category in list(cfddailycategorycount[date]))

    
    
    # visualize monthly   --- assuming the dates are of the form yyyy-mm-dd -we did it so while recording
    
    cfdmonthlyresourcecount = ConditionalFreqDist((colldf.loc[i,"date"][:-3], colldf.loc[i,"resource"].strip()) for i in range(numoftexts))
    CFDhelpers.cfd2csv(cfdmonthlyresourcecount, csvfolder+os.sep+"cfdmonthlyresourcecount.csv", colnames=['month','resource','count'])
    #cfdresourcesmonthcount = ConditionalFreqDist((resource, month) for month in cfdmonthlyresourcecount.conditions() for resource in list(cfdmonthlyresourcecount[month]))
    imgpath = IOtools.ensure_dir(os.path.join(imgoutpath, "resourcebasednewscount"))
    visualize_monthly_cfd(cfd=cfdmonthlyresourcecount, figuretitle="Monthly news count for each resource", ylabel="news published", imgoutpath=imgpath)



    
    cfdmonthlycategorycount = ConditionalFreqDist((colldf.loc[i,"date"][:-3], 
                                                   "-".join(map(lambda x : str(x).strip(), [colldf.loc[i, "resource"], colldf.loc[i, "category"]]))) 
                                                  for i in range(numoftexts)) 
    CFDhelpers.cfd2csv(cfdmonthlycategorycount, csvfolder+os.sep+"cfdmonthlycategorycount.csv", ["month", "category", 'count'])
    #cfdcatsmonthcount = ConditionalFreqDist((category, month) for month in cfdmonthlycategorycount.conditions() for category in list(cfdmonthlycategorycount[month]))
    
    imgpath = IOtools.ensure_dir(os.path.join(imgoutpath, "categorybasednewscount"))
    for canoniccatname, rescatnamedct in rescatmap.iteritems():
        monthresourcepairs = []
        
        for resourcename, origcats in rescatnamedct.iteritems(): 
        
            for origcatname in origcats:
                #resourcename = rescat.split("-")[0]
                rescat = "-".join([resourcename, origcatname])
                for month in cfdmonthlycategorycount.conditions():
                    numofoccurrences = cfdmonthlycategorycount[month][rescat]
                    #print resourcename," had ",numofoccurrences," times texts in :",rescat," during ",month
                    for i in range(numofoccurrences):
                        monthresourcepairs.append((month, resourcename))
                        
        cfdmonthlyresourcecount_percat = ConditionalFreqDist(monthresourcepairs) 
            
        print canoniccatname,resourcename," * ",rescat," : ",len(cfdmonthlyresourcecount_percat.conditions()),"  ",cfdmonthlyresourcecount_percat.N()
        figuretitle = "Monthly news count of each resource over category "+canoniccatname.upper()
        visualize_monthly_cfd(cfdmonthlyresourcecount_percat, figuretitle, ylabel="news published", imgoutpath=imgpath)
开发者ID:dicleoztur,项目名称:subjectivity_detection,代码行数:59,代码来源:collectionstats.py



注:本文中的sentimentfinding.IOtools类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python sentinelsat.SentinelAPI类代码示例发布时间:2022-05-27
下一篇:
Python sensors.iter_detected_chips函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap