• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utility.load_data函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utility.load_data函数的典型用法代码示例。如果您正苦于以下问题:Python load_data函数的具体用法?Python load_data怎么用?Python load_data使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_data函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main

def main():
    revision = 1

    print("Loading the classifier")
    classifier = utility.load_model("train_rtext_rev{}".format(revision))
    
    print("Reading in the training data")
    train = utility.load_data("training", "rtext")

    print("Predicting the rest of the training data")
    pred = np.ravel(classifier.predict(list(train['rtext_bcat'])))
    score = utility.rmsle_log(pred, train['votes_useful_log'])
    print "Score:", score

    print("Writing out new training data")
    del train['rtext_bcat']
    train['votes_useful_log_rtextpred_sgd'] = pd.Series(pred, index=train.index)
    utility.save_data(train, "training", "rtext_sgd_rev{}".format(revision))
    
    print("Reading in the test data")
    test = utility.load_data("test", "rtext")
    tepred = np.ravel(classifier.predict(list(test['rtext_bcat'])))

    print("Writing out new test data")
    del test['rtext_bcat']
    test['votes_useful_log_rtextpred_sgd'] = pd.Series(tepred, index=test.index)
    utility.save_data(test, "test", "rtext_sgd_rev{}".format(revision))
    test['votes'] = pd.Series(np.exp(tepred) + 1, index=test.index)

    print("Writing out a new submission file")
    utility.write_submission(test, "rtextsgd_sub_rev{}".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:31,代码来源:predict_rtext_sgd.py


示例2: main

def main():
    print("Loading the classifier")
    classifier = utility.load_model("fullsgd_model_rev{}".format(revision))

    print("Reading in the training data")
    train = utility.load_data("training", "finalinput")
    truth = train['votes_useful_log']
    del train['votes_useful_log']

    print("Predicting the training data")
    logpred = np.ravel(classifier.predict(train.values[:,1:]))
    score = utility.rmsle_log(logpred, truth)
    print "Score:", score

    print("Reading in the test data")
    test = utility.load_data("test", "finalinput")
    del test['votes_useful_log']

    print("Predicting the test data")
    logpred = np.ravel(classifier.predict(test.values[:,1:]))
    pred = np.exp(np.array(logpred, dtype=np.float64)) - 1
    test['votes'] = pred
    
    print("Writing out a new submission file")
    utility.write_submission(test, "fullsgd_sub_rev{}.csv".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:25,代码来源:predict.py


示例3: main

def main():
    
    #load data
    df = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','BORO','GRADE','GRADE DATE'])
    df = clean_data(df) #clean data
    
    #question 4
    sum_nyc, sum_boro = grade_sum(df) #calculate sum of test_grade in nyc and in each borough
    print 'The sum of test_grade in NYC is: {} \n'.format(sum_nyc)
    print 'The sum of test_grade in each boroughs is: \n {}'.format(sum_boro)
    
    #question 5
    grade_overtime_plot(df, 'nyc') #grade overtime plot for nyc
    #grade overtime plot for each borough
    for borough in ['BRONX', 'BROOKLYN', 'MANHATTAN', 'QUEENS', 'STATEN ISLAND']:
        df_boro = df[df['BORO'] == borough]
        grade_overtime_plot(df_boro, borough.lower())
    
    #question 6
    df1 = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','CUISINE DESCRIPTION'])
    type_name = get_top_10_nyc(df1)
    df2 = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','CUISINE DESCRIPTION', 'GRADE', 'GRADE DATE'])
    df2 = clean_data(df2)
    df2 = df2[df2['CUISINE DESCRIPTION'].isin(type_name)]
    df_sum = top_10_grade_overtime(df2, type_name) #calculate score overtime for each restaurant type
    top_10_plot(df_sum) #score overtime plot
    top_10_colormap(df_sum) #plot correlation between any two restaurant types in NYC in color map
开发者ID:ariesyi329,项目名称:assignment10,代码行数:27,代码来源:assignment10.py


示例4: main

def main():
    """
    This function is to present the results of this assignment.
    Users will ask to see:
    1)Income distribution across all countries for a given year:
      Users need to input a year from 1800 to 2012.
      Results will be saved as a .png file.
    2)Income distribution by region in recent years:
      Users need to input the first year, last year and year gap in a year rangeand select a plot type, boxplot or histograms.
      Results will be saved as a .pdf file.
    """

    #load countries and income data
    countries = load_data('countries.csv')
    income = load_data('indicator gapminder gdp_per_capita_ppp.csv')
    #transform income data set
    income = trans_data(income)

    try:
        while raw_input('To see income distribution across all countries? (y/n) ') == 'y':
            try:
                year = raw_input('Which year? ') #select a year
                income_distr(income, year)
            except:
                print 'Please input a year from 1800 to 2012'
        
        while raw_input('To see income distribution by region in recent years? (y/n) ') == 'y':
            try:
                from_year = int(raw_input('From which year? ')) #input the first year
                to_year = int(raw_input('To which year? ')) #input the last year
                year_gap = int(raw_input('Year gap? ')) #input a year gap
                pltype = raw_input('Plot type: boxplots or histograms? (b/h) ') #select a plot type
                if pltype == 'b':
                    pp = PdfPages('results/Income by region from {0} to {1}_boxplot.pdf'.format(from_year, to_year)) #create a pdf file to save plots
                    for i in xrange(from_year, to_year+1, year_gap):
                        fig = income_region(1,str(i))
                        pp.savefig(fig)
                elif pltype == 'h':
                    pp = PdfPages('results/Income by region from {0} to {1}_hist.pdf'.format(from_year, to_year))
                    for i in xrange(from_year, to_year+1, year_gap):
                        fig = income_region(0, str(i))
                        plt.suptitle('{}'.format(i))
                        pp.savefig(fig)
                pp.close() #close the pdf file
            except:
                print 'please input years from 1800 to 2012 and try again!'

    except(KeyboardInterrupt):
        print 'Bye!'
        sys.exit()
开发者ID:ariesyi329,项目名称:assignment9,代码行数:50,代码来源:assignment9.py


示例5: optimal_svm

def optimal_svm(optimal_c):
    """
    This function is to calculate AUC for optimal C chose from model selection
    """
    
    #load datasets
    train_X, train_y = load_data('train_X.csv', 'train_y.csv')
    test_X, test_y = load_data('test_X.csv', 'test_y.csv')
    train_X_pca = data_pca(0.95, train_X, train_X)
    test_X_pca = data_pca(0.95, train_X, test_X)
    train_y = np.array(train_y).ravel()
    test_y = np.array(test_y).ravel()
    #set up model with the optimal C
    my_svm = svm.SVC(kernel='linear', C=optimal_c, class_weight='auto')
    predicted_y = my_svm.fit(train_X_pca,train_y).decision_function(test_X_pca)
    fpr, tpr, tr = roc_curve(test_y, predicted_y)
    
    print auc(fpr, tpr)
开发者ID:LEONOB2014,项目名称:DS-GA1001-Project,代码行数:18,代码来源:svm_testing.py


示例6: run

def run(train_file, test_file, output_file):
    train, labels, test = utils.load_data(train_file, test_file)

    clf = XGBoost(max_iterations=500, max_depth=12, min_child_weight=4.9208250938262745,
                  row_subsample=.9134478530382129, min_loss_reduction=.5132278416508804,
                  column_subsample=.730128689911957, step_size=.1)
    clf.fit(train, labels)
    predictions = clf.predict_proba(test)
    utils.save_prediction(output_file, predictions)
开发者ID:shqyking,项目名称:BigDataProject,代码行数:9,代码来源:xgboost.py


示例7: main

def main():
    revision = 4

    print("Loading the classifier")
    classifier = utility.load_model("train_rtext_rev{}".format(revision))
    
    print("Reading in the training data")
    train = utility.load_data("training", "rtext")

    print("Predicting the rest of the training data")
    bunch = 50000
    pred = np.zeros(len(train))
    for ibunch in range(int(len(train) / bunch)) :
        beg = ibunch * bunch
        end = (ibunch + 1) * 50000
        mtrain = train.ix[beg:end - 1]
        mpred = np.ravel(classifier.predict(list(mtrain['rtext_bcat'])))
        pred[beg:end] = mpred

    beg = int(len(train) / bunch) * bunch
    mtrain = train.ix[beg:]
    mpred = np.ravel(classifier.predict(list(mtrain['rtext_bcat'])))
    pred[beg:] = mpred

    score = utility.rmsle_log(pred, train['votes_useful_log'])
    print "Score:", score

    print("Writing out new training data")
    del train['rtext_bcat']
    train['votes_useful_log_rtextpred'] = pd.Series(pred, index=train.index)
    utility.save_data(train, "training", "rtext_rev{}".format(revision))
    
    print("Reading in the test data")
    test = utility.load_data("test", "rtext")
    tepred = np.ravel(classifier.predict(list(test['rtext_bcat'])))

    print("Writing out new test data")
    del test['rtext_bcat']
    test['votes_useful_log_rtextpred'] = pd.Series(tepred, index=test.index)
    utility.save_data(test, "test", "rtext_rev{}".format(revision))
    test['votes'] = pd.Series(np.exp(tepred) + 1, index=test.index)

    print("Writing out a new submission file")
    utility.write_submission(test, "rtextrf_sub_rev{}.csv".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:44,代码来源:predict_rtext.py


示例8: main

def main():
    print("Reading in the training data")
    train = utility.load_data("training", "finalinput")
    truth = np.ravel(np.array(train['votes_useful_log']))
    del train['votes_useful_log']

    print("Extracting features and training review text model")
    classifier = get_pipeline()
    classifier.fit(train.values[:,1:], np.array(truth))

    print("Saving the classifier")
    utility.save_model(classifier, "fullsgd_model_rev{}".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:12,代码来源:train.py


示例9: main

def main():
    
    #load datasets
    train_X, train_Y = load_data('train_X.csv', 'train_y.csv')
    train_X_pca = data_pca(0.95, train_X, train_X)
    train = train_X_pca
    train['Y'] = train_Y
    #set a list of hyperparameter C
    c = [10**i for i in range(-9,2)]
    #conduct X cross validation and return AUCs in each sample for each C
    aucs=xValSVM(train, 'Y', 5, c)
    #calculate the average and standard error of AUC for each C
    avg, stderr = avg_stderr(aucs, c)
    #plot the results of cross validation
    plotxValSVM(avg, stderr, c)
开发者ID:LEONOB2014,项目名称:DS-GA1001-Project,代码行数:15,代码来源:svm_val.py


示例10: main

def main():
    revision = 4

    print("Reading in the training data")
    train = utility.load_data("training", "rtext")
    inds = random.sample(range(len(train)), 100000)
    mtrain = train.ix[inds]

    print("Extracting features and training review text model")
    classifier = get_pipeline()
    classifier.fit(list(mtrain['rtext_bcat']), 
                   list(mtrain['votes_useful_log']))

    print("Saving the classifier")
    utility.save_model(classifier, "train_rtext_rev{}".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:15,代码来源:train_rtext.py


示例11: main

def main():

    trabus = utility.load_data("training", "business")
    tesbus = utility.load_data("test", "business")
    bus = pd.concat((trabus, tesbus))
    for cat in delbuscats :
        if hasattr(bus, cat) : del bus[cat]
    bus['procbcat'] = pd.Series(map(process_bcat, bus['categories']), bus.index)
    del bus['categories']

    for s in ["training", "test"] :

        rev = utility.load_data(s, "review")
        for cat in delrevcats :
            if hasattr(rev, cat) : del rev[cat]
        if hasattr(rev, 'votes_useful') :
            rev['votes_useful_log'] = np.log(rev.votes_useful + 1)
        rev = pd.merge(rev, bus, 'inner')

        rev['rtext_bcat'] = rev['text'] + rev['procbcat']
        del rev['procbcat']
        del rev['text']

        utility.save_data(rev, s, 'rtext')
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:24,代码来源:preprocess_rtext.py


示例12: printPOS

def printPOS(pos_words):
    #pos_words is a list of (word, tag)
    s = ""
    t = ""
    for p in pos_words:
        l = len(p[0]) if len(p[0]) > len(p[1]) else len(p[1])
        s = s + p[0].rjust(l) + ' '
        t = t + p[1].rjust(l) + ' '

    print '-----------'
    print s
    print t
    print ""





if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "Usage: python showTaggedSentences.py <input file>"
        sys.exit(0)

    qaTests = load_data(sys.argv[1])

    showAllTaggedSentences(qaTests)



开发者ID:wangxu724,项目名称:NLPproject,代码行数:26,代码来源:showTaggedSentences.py


示例13: mask_load

	def mask_load(self):
		self.url_masks = utility.load_data("urlmasks", {})
开发者ID:Merola,项目名称:pynik,代码行数:2,代码来源:title_reader.py


示例14: load_urls

	def load_urls(self):
		self.url_lists = utility.load_data("urls", {})
开发者ID:Merola,项目名称:pynik,代码行数:2,代码来源:title_reader.py


示例15: on_load

	def on_load(self):
		self.id_directory = utility.load_data('schema_id', {})
		self.id_presets = utility.load_data('schema_fav', {})
开发者ID:IcEBnd,项目名称:pyirkbot,代码行数:3,代码来源:ical_parser.py


示例16: on_load

	def on_load(self):
		self.favorites = utility.load_data("favorites", {})
开发者ID:raek,项目名称:pynik,代码行数:2,代码来源:favorites.py


示例17: train_weakgbm

import utility
import numpy as np
import pandas as pd
import multiprocessing
from sklearn.grid_search import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

nweakgbms = 18

dftest = utility.load_data('test')
dftrain = utility.load_data('train')

dftestpreds = pd.DataFrame(dftest.id)
dftrainpreds = pd.DataFrame({'id':np.arange(len(dftrain)),
                             'ACTION':dftrain.ACTION})

y = np.array(dftrain.ACTION)
del dftrain['ACTION']
X = np.array(dftrain)

Xtest = np.array(dftest)[:,1:]

def train_weakgbm(i) :

    cols = np.ones(9)
    cols[i % X.shape[1]] = 0
    smallX = np.compress(cols, X, axis=1)

    X_cvtrain, X_cvtest, y_cvtrain, y_cvtest = train_test_split(
开发者ID:jamesjohnson92,项目名称:kaggle-amazonaccess,代码行数:31,代码来源:weakgbms.py


示例18: on_load

	def on_load(self):
		self.location = utility.load_data('festern_bbq', "okänt")
开发者ID:IcEBnd,项目名称:pyirkbot,代码行数:2,代码来源:festern_bbq.py


示例19: on_load

 def on_load(self):
     self.__aliases = utility.load_data("stockaliases", {})
开发者ID:osund,项目名称:pynik,代码行数:2,代码来源:stock.py


示例20: on_load

	def on_load(self):
		self.places = utility.load_data("postnr_addresses", {})
开发者ID:IcEBnd,项目名称:pyirkbot,代码行数:2,代码来源:postnr.py



注:本文中的utility.load_data函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utility.read_url函数代码示例发布时间:2022-05-26
下一篇:
Python utility.flatten函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap