• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python joblib.dump函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.externals.joblib.dump函数的典型用法代码示例。如果您正苦于以下问题:Python dump函数的具体用法?Python dump怎么用?Python dump使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了dump函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main

def main():
	pos_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresPos160_60.npy'
	neg_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresNeg160_60.npy'

	saving_loc = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/'

	pos_features = np.load(pos_features_path)[:, 0::3]
	neg_features = np.load(neg_features_path) [:, 0::3]
	train, val = prepare_features(pos_features, neg_features, True, saving_loc)
	del pos_features
	del neg_features

	clf = svm.SVC(kernel='rbf')

	logging.info('starts training')
	clf.fit(train[:, 1:], train[:, 0])
	del train
	logging.info('starts predicting')
	predicted = clf.predict(val[:, 1:])
	conf_mat = confusion_matrix(predicted, val[:, 0])
	acc = accuracy_score(val[:, 0], predicted)
	del val
	del predicted
	logging.info('Confusion matrix: %s' %conf_mat)
	logging.info('Accuracy: %s' %acc)
	logging.info('saving model')
	joblib.dump(clf, join(saving_loc, 'svm_rbf_scaled.pkl'))
开发者ID:Sh-imaa,项目名称:Kaggle_whales,代码行数:27,代码来源:svm_model.py


示例2: predict_test

 def predict_test(self,clf, tag):
     np.random.seed(1919) 
     if os.path.isdir('../model/'+tag) == False: 
         os.mkdir('../model/'+tag)   
     print "Dir made : "+str(datetime.datetime.now())
     
     print "Fit Started : "+str(datetime.datetime.now())
     clf.fit(self.X, self.y)    
     
     print "Dump Started : "+str(datetime.datetime.now())    
     joblib.dump(clf, '../model/'+tag+'/'+tag+'.pkl')
     
     print "Prediction Started : "+str(datetime.datetime.now())
     output_arr = clf.predict_proba(self.x_test)
     
     f = open("../data/output_"+str(tag), "w")
     f.write("id,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9\n")
     i=1
     for row in output_arr:
         row = map(str, row)
         f.write(str(i)+","+str(",".join(row))+"\n")
         i += 1
     f.close()
     
     print "ALL DONE : "+str(datetime.datetime.now())
开发者ID:raman-sharma,项目名称:ML-Learn,代码行数:25,代码来源:svc.py


示例3: train

 def train(self, seg_corpus, dep_corpus, path=None):
     assert seg_corpus.keys() == dep_corpus.keys()
     features, labels = self.extract_features_from_corpus(
         dep_corpus, seg_corpus=seg_corpus)
     self._train(features, labels)
     if path is not None:
         joblib.dump(self.pipeline, path, compress=1, cache_size=1e9)
开发者ID:discourse-lab,项目名称:DiscourseSegmenter,代码行数:7,代码来源:matesegmenter.py


示例4: train_svm

def train_svm(feedback, classes):

    print "Building n-grams"

    X_train_counts = count_vect.fit_transform(feedback) # converting string to the bag - of - words form, using bi-grams

    X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts) # weighting the words from bag-of-words form

    '''
    The vocabulary used here on the training set needs to be saved for classification matters,
    what this means is that the number of words during training is going to be different of
    the number of words in classification but the count still needs to be the same, in other
    words if the word "animal" is associated with the integer 3 during training, it has to be
    associated again with number 3 during classification
    '''

    pickle.dump(count_vect.vocabulary_,open("feature.pkl","wb")) # Saving vocab

    print "Saving words features"

    c = svm.SVC(kernel = 'rbf',gamma = 0.001, C = 100)

    print "training SVM"

    c.fit(X_train_tfidf, classes) # Training the SVM

    print "Training completed..."

    joblib.dump(c, 'filename.pkl', compress= 9) # Saving the Support vectors
开发者ID:Guaramy,项目名称:WeWillRuleML,代码行数:29,代码来源:train_svm.py


示例5: train

    def train(self):
        with gzip.open(constants.TRAIN_EXPANDED, 'r') as source:
            reader = csv.reader(source)
            next(reader, None)

            n_sample = 0
            labels = []
            features = []
            for feature_vector in reader:
                s_features = feature_vector[2:6] + feature_vector[7:]
                s_label = int(feature_vector[1])
                features.append(s_features)
                labels.append(s_label)

                # print 'features', s_features
                # print 'labels', s_label
                # print 'norm features', normalized_features

                n_sample += 1
                if n_sample % 500000 == 0:
                    self.clf.partial_fit(features, labels)
                    features = []
                    labels = []
                    print 'Processing sample [%s]' % n_sample

        print 'Finished training'
        print 'Estimated parameters [%s]' % self.clf.get_params()

        # saving model into file
        joblib.dump(self.clf, constants.MODEL_FILENAME, compress=9)
开发者ID:trein,项目名称:criteo-challenge,代码行数:30,代码来源:training.py


示例6: fetch_vgg_architecture

def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None):
    """Fetch a pickled version of the caffe model, represented as list of
    dictionaries."""

    default_filename = os.path.join(VGG_PATH, 'vgg.pickle')
    if caffemodel_parsed is not None:
        if os.path.exists(caffemodel_parsed):
            return joblib.load(caffemodel_parsed)
        else:
            if os.path.exists(default_filename):
                import warnings
                warnings.warn('Did not find %s, but found %s. Loading it.' %
                              (caffemodel_parsed, default_filename))
                return joblib.load(default_filename)
    else:
        if os.path.exists(default_filename):
            return joblib.load(default_filename)

    # We didn't find the file: let's create it by parsing the protobuffer
    protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer)
    model = _parse_caffe_model(protobuf_file)

    if caffemodel_parsed is not None:
        joblib.dump(model, caffemodel_parsed)
    else:
        joblib.dump(model, default_filename)

    return model
开发者ID:Faruk-Ahmed,项目名称:sklearn-theano,代码行数:28,代码来源:vgg.py


示例7: train_model

def train_model(feats_csv):

	df = pd.DataFrame()
	df = pd.read_csv(feats_csv).iloc[:,1:]

	y = np.ravel(df.iloc[:,-1:])
	X = np.array(df.iloc[:,:-1])

	############ 15 Best selected features using ANOVA F-value score function ###############
	X_new = SelectKBest(f_classif, k=15).fit_transform(X, y)
	selected_features = SelectKBest(f_classif, k=15).fit(X, y).get_support(indices = True)

	############ KNN manhattan ###############
	##### preprocessing: data scaling######## 
	min_max_scaler = MinMaxScaler()
	X_new = min_max_scaler.fit_transform(X_new)

	model = KNeighborsClassifier(n_neighbors = 1,algorithm = 'brute',metric = 'manhattan',weights = 'uniform')
	model.fit(X_new,y)

	newdir = '../kNN_clfr'
	os.mkdir(newdir)

	joblib.dump(model, os.path.join(newdir,'kNN.pkl')) 

	return
开发者ID:LefterisStamellos,项目名称:swoice_assignment_gender,代码行数:26,代码来源:train_model.py


示例8: train_classifier

def train_classifier():
    pos_feat_path = positive_features_path
    neg_feat_path = negative_features_path

    model_path = classifier_model_path

    feature_vectors = []
    labels = []

    for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(1)

    for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(0)

    classifier = LinearSVC()
    print "Training classifier"
    classifier.fit(feature_vectors, labels)
    print "Classifier successfully trained"
    if not os.path.isdir(os.path.split(model_path)[0]):
        os.makedirs(os.path.split(model_path)[0])
    joblib.dump(classifier, model_path)
开发者ID:ranveeraggarwal,项目名称:traffic-light-detection,代码行数:32,代码来源:train_classifier.py


示例9: trainClassifier

def trainClassifier(clf,
      dir,model_file='adaptive',
      data_file='train',
      seed=1234,
    ):
  '''
   Train classifier
  '''
  print 'Training classifier'

  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  traindata = data[:,:-1]
  targetdata = data[:,-1]
  pdb.set_trace()

  if model_g == 'mlp':
    train_mlp((traindata, targetdata), save_file='{0}/{1}_F0_F1.pkl'.format(dir,model_file))
  else:
    rng = np.random.RandomState(seed)
    indices = rng.permutation(traindata.shape[0])
    traindata = traindata[indices]
    targetdata = targetdata[indices]
    scores = cross_validation.cross_val_score(clf, traindata, targetdata)
    print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
    clf.fit(traindata,targetdata)
    #clf.plot_importance_matrix(vars_names)
    joblib.dump(clf, '{0}/{1}_F0_F1.pkl'.format(dir,model_file))
开发者ID:jgpavez,项目名称:transfer_learning,代码行数:27,代码来源:transfer_learning_ratios.py


示例10: setTestInputforNN

    def setTestInputforNN(self, collection={}, sel_words=[]):
        list_of_strings = []
        list_of_salary = []
        count = 0
        sel_words_set = set(sel_words)
        sel_words_list = list(sel_words_set)
        for document in collection:
            count += 1
            title = document.getTitle()
            description = document.getDescription()
            salary = (int)(document.getSalaryNorm())
            words = re.split(" ", title) + re.split(" ", description)
            # words = [x for x in words if x in sel_words]
            wordsUnique = set(words)
            wordsUnique = wordsUnique & sel_words_set
            words = [x for x in words if x in wordsUnique]
            documentString = " ".join(words)
            list_of_strings.append(documentString)
            list_of_salary.append(salary)

            if not (count % 15000):
                break

        vectorizer = CountVectorizer(vocabulary=sel_words, min_df=1)
        self.inp = vectorizer.fit_transform(list_of_strings)
        from sklearn.externals import joblib

        joblib.dump(self.inp.tocsr(), "test_dataset_in.joblib")

        self.inp_size = len(list_of_strings)
        output = np.array(list_of_salary)
        self.target = output.reshape(len(list_of_strings), 1)
        joblib.dump(self.target, "test_dataset_out.joblib")

        return [self.inp, self.target]
开发者ID:ananthd88,项目名称:CSCE-633-670-Project,代码行数:35,代码来源:neural_lab.py


示例11: save_classifier

def save_classifier(cl, fn, use_joblib=True, **kwargs):
    """Save a classifier to disk.

    Parameters
    ----------
    cl : classifier object
        Pickleable object or a classify.VigraRandomForest object.
    fn : string
        Writeable path/filename.
    use_joblib : bool, optional
        Whether to prefer joblib persistence to pickle.
    kwargs : keyword arguments
        Keyword arguments to be passed on to either `pck.dump` or 
        `joblib.dump`.

    Returns
    -------
    None

    Notes
    -----
    For joblib persistence, `compress=3` is the default.
    """
    if isinstance(cl, VigraRandomForest):
        cl.save_to_disk(fn)
    elif use_joblib and sklearn_available:
        if not kwargs.has_key('compress'):
            kwargs['compress'] = 3
        joblib.dump(cl, fn, **kwargs)
    else:
        with open(fn, 'w') as f:
            pck.dump(cl, f, protocol=kwargs.get('protocol', -1))
开发者ID:cmor,项目名称:gala,代码行数:32,代码来源:classify.py


示例12: trainModel

def trainModel():

	# 数据预处理
	data_train = joblib.load('data/data_train.pkl')
	label_train = joblib.load('data/label_train.pkl')

	print data_train.shape

	clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=True)

	#clf.set_params(kernel='rbf')

	print clf

	print data_train.shape
	print label_train.shape

	print 'begin training....'
	clf.fit(data_train,label_train)
	print 'finish training....'
	print clf
	joblib.dump(clf, 'model/svm.pkl')
	
	return None
开发者ID:BLKStone,项目名称:EasyPyPR,代码行数:26,代码来源:SVMtrain.py


示例13: rf_fit

def rf_fit():

	train_inp,valid_inp,train_target,valid_target = prepare_input()

	rf = RandomForestClassifier(random_state=31,n_jobs=-1,verbose=1,n_estimators=100,min_samples_split=5)
	start = time.time()

	rf.fit(train_inp,train_target)

	end = time.time()
	print "fitting took {:0.4} seconds".format(end-start)

	training_output = rf.predict_proba(train_inp)
	validation_output = rf.predict_proba(valid_inp)

	training_error = log_loss(train_target,training_output)
	validation_error = log_loss(valid_target,validation_output)

	print "Train error: {:02.4f}".format(training_error)
	print "Validation error: {:02.4f}".format(validation_error)


	joblib.dump(rf,rf_filename)


	return rf
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:26,代码来源:red_hat.py


示例14: xgb_fit

def xgb_fit():

	train_inp,valid_inp,train_target,valid_target = prepare_input()

	dtrain = xgb.DMatrix(train_inp,label=train_target)
	dvalid = xgb.DMatrix(valid_inp)


	param = {'max_depth':10, 'eta':0.02, 'silent':1, 'objective':'binary:logistic' }
	param['nthread'] = 4
	param['eval_metric'] = 'auc'
	param['subsample'] = 0.7
	param['colsample_bytree']= 0.7
	param['min_child_weight'] = 0
	param['booster'] = "gblinear"

	watchlist  = [(dtrain,'train')]
	num_round = 300
	early_stopping_rounds=10
	bst = xgb.train(param, dtrain, num_round, watchlist,early_stopping_rounds=early_stopping_rounds)

	joblib.dump(bst,bst_filename)


	train_pred = bst.predict(xgb.DMatrix(train_inp))
	valid_pred = bst.predict(xgb.DMatrix(valid_inp))
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:26,代码来源:red_hat.py


示例15: train

def train(trainingData, pklFile):
	# ========================================================================= #
	# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
	# ========================================================================= #
	if (pklFile == ''):
		os.system('rm -rf learntModel & mkdir learntModel')
		pklFile = 'learntModel/learntModel.pkl'
	
	# ========================================================================= #
	# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
	# ========================================================================= #
	NUMBER_OF_FEATURES = len(trainingData[0]) - 1
	NUMBER_OF_TRAINING_POINTS = len(trainingData)

	x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
	y = trainingData[:, NUMBER_OF_FEATURES]
	
	# ========================================================================= #
	# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
	# ========================================================================= #
	minSquareError = np.inf
	targetAlpha = None
	alphas = np.logspace(-10, -2, 500)			
	
	# ========================================================================= #
	# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
	# ========================================================================= #
	clf = LogisticRegressionCV(Cs=alphas)
	clf.fit(x, y)
	joblib.dump(clf, pklFile)
	
	return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}
开发者ID:ZAZAZakari,项目名称:ML-Algorithm,代码行数:32,代码来源:logisticRegression.py


示例16: perform_cluster_analysis

def perform_cluster_analysis(dataset):

    filename = 'elbow_plot.dat'

    if os.path.exists(cpath + filename):
        data = joblib.load(cpath + filename)
        K = data[0]
        meandistortions = data[1]
    else:
        X = dataset
        print 'X Shape: ', X.shape

        #K = range(1, 50, 5)
        K = [1, 2, 5, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
        #K = [1, 2, 5, 10, 50, 100]
        meandistortions = []
        cluster_centers = []
        for k in K:
            print k
            kmeans = KMeans(n_clusters=k, n_jobs=3)
            kmeans.fit(X)
            #import ipdb; ipdb.set_trace() # debugging code
            #meandistortions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1))/X.shape[0])
            meandistortions.append(kmeans.inertia_)
            cluster_centers.append(kmeans.cluster_centers_)
            #print 'k: ', k, ' Cluster Centers: ', kmeans.cluster_centers_
        data = [K, meandistortions]
        joblib.dump(data, cpath + filename, compress=8)

    plot_name = "elbow_plot.png"
    title = 'Selecting k with the Elbow Method'
    xlabel = 'Number of Clusters (k)'
    ylabel = 'Average Distortion'
    xyplot(K, meandistortions, 0, 0, 0, 0, title, xlabel, ylabel, staticpath + plot_name, line=1, y_log=0)
开发者ID:tilanukwatta,项目名称:scicano,代码行数:34,代码来源:arxiv_analysis_v3.py


示例17: trainFixed

def trainFixed():
	'''
	train a machine learner based on data from some fixed parameter point.
	save to fixed.pkl
	'''
	print "Entering train fixed"
	trainAndTarget = np.loadtxt('traindata.dat')
	traindata = trainAndTarget[:,0:2]
	targetdata = trainAndTarget[:,2]

	massPoints = np.unique(traindata[:,1])
	chunk = len(traindata)/len(massPoints)/2
	shift = len(traindata)/2


	#plot for fixed mu=0 training
	print "training fixed"
	clf = svm.NuSVR()
	reducedtrain = 	np.concatenate((traindata[4*chunk : 5*chunk,0], 
		traindata[4*chunk+shift : 5*chunk+shift , 0]))
	reducedtarget = np.concatenate((targetdata[4*chunk : 5*chunk], 
		targetdata[4*chunk+shift : 5*chunk+shift]))

	clf.fit(reducedtrain.reshape((len(reducedtrain),1)), reducedtarget)  
	joblib.dump(clf, 'fixed.pkl') 
开发者ID:cranmer,项目名称:parametrized-learning,代码行数:25,代码来源:GausSigOnExpBkg.py


示例18: compute

def compute(filename):
	fileArray = filename.split("/")
	operator = fileArray[-1].split(".")[0]

	print 'SVM received operator = ' + operator
	ip = open(filename)
	i = 0
	A = []
	B = []
	for line in ip:
		temp = []
		elm = line.rstrip("\n").split(" ");
		temp = [np.exp(float(row)) for row in range(len(elm)-1) ]

		# A.append([temp[0] ,temp[1]])
		A.append(temp)
		B.append(np.float(elm[len(elm)-1]))

	clf = svm.SVR()
	clf.fit(A,B)


	# f.close()
	modelURI = "Models/"+operator+"/"
	if not os.path.exists(modelURI):
		os.makedirs(modelURI)
	
	modelURI += 'm.pkl'
	joblib.dump(clf, modelURI)

	print 'SUCCESS,' + modelURI + ' written to disk.'
开发者ID:pvam,项目名称:pvam.github.io,代码行数:31,代码来源:SVMRegression.py


示例19: gbm_fit

def gbm_fit(params, cv_folds):
    gbm = GradientBoostingRegressor(**params)
    gbm.fit(x_train, y_train)

    # Check accuracy of model
    # No need for validation data because of cross validation
    # Training data is split up into cv_folds folds:
    # Model trained on (cv_folds - 1) of the folds; last fold is saved as validation set
    cv_scores_mse = cross_validation.cross_val_score(gbm, x_train, y_train, cv=cv_folds, scoring='mean_squared_error')
    print '\nModel Report'
    print ('MSE Score: Mean - %.7g | Std - %.7g | Min - %.7g | Max - %.7g' %
          (np.mean(cv_scores_mse), np.std(cv_scores_mse), np.min(cv_scores_mse), np.max(cv_scores_mse)))
    feat_imp = pd.Series(gbm.feature_importances_, features).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    plt.show()

    # Check actual performance on test data
    final_predictions = gbm.predict(x_test)
    test['health_score_in_week'] = final_predictions
    test.to_csv(output_file, columns=['user_id', 'date', 'steps', 'total_sleep', 'resting_hr',
                                      'step_week_slope', 'sleep_week_slope', 'hr_week_slope',
                                      'curr_health_score', 'health_score_in_week'])

    # Save the model to file 'health_prediction.pkl'
    joblib.dump(gbm, 'health_prediction.pkl', compress=1)
开发者ID:Fitomo,项目名称:Prediction-Service,代码行数:26,代码来源:predicted_health_algorithm.py


示例20: normalize_one

def normalize_one(name):
  out_name = path(name).splitext()[0] + '.dat'
  a = sio.loadmat(name)
  desc = a['desc']
  frames = a['frames']
  normalize_sift(desc, inplace=True)
  dump(dict(frames=frames, desc=desc), out_name, compress=3)
开发者ID:yairmov,项目名称:carUnderstanding,代码行数:7,代码来源:matlab_dense_sift.py



注:本文中的sklearn.externals.joblib.dump函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python joblib.hash函数代码示例发布时间:2022-05-27
下一篇:
Python joblib.delayed函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap