本文整理汇总了Python中sklearn.externals.joblib.dump函数的典型用法代码示例。如果您正苦于以下问题:Python dump函数的具体用法?Python dump怎么用?Python dump使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dump函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
pos_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresPos160_60.npy'
neg_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresNeg160_60.npy'
saving_loc = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/'
pos_features = np.load(pos_features_path)[:, 0::3]
neg_features = np.load(neg_features_path) [:, 0::3]
train, val = prepare_features(pos_features, neg_features, True, saving_loc)
del pos_features
del neg_features
clf = svm.SVC(kernel='rbf')
logging.info('starts training')
clf.fit(train[:, 1:], train[:, 0])
del train
logging.info('starts predicting')
predicted = clf.predict(val[:, 1:])
conf_mat = confusion_matrix(predicted, val[:, 0])
acc = accuracy_score(val[:, 0], predicted)
del val
del predicted
logging.info('Confusion matrix: %s' %conf_mat)
logging.info('Accuracy: %s' %acc)
logging.info('saving model')
joblib.dump(clf, join(saving_loc, 'svm_rbf_scaled.pkl'))
开发者ID:Sh-imaa,项目名称:Kaggle_whales,代码行数:27,代码来源:svm_model.py
示例2: predict_test
def predict_test(self,clf, tag):
np.random.seed(1919)
if os.path.isdir('../model/'+tag) == False:
os.mkdir('../model/'+tag)
print "Dir made : "+str(datetime.datetime.now())
print "Fit Started : "+str(datetime.datetime.now())
clf.fit(self.X, self.y)
print "Dump Started : "+str(datetime.datetime.now())
joblib.dump(clf, '../model/'+tag+'/'+tag+'.pkl')
print "Prediction Started : "+str(datetime.datetime.now())
output_arr = clf.predict_proba(self.x_test)
f = open("../data/output_"+str(tag), "w")
f.write("id,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9\n")
i=1
for row in output_arr:
row = map(str, row)
f.write(str(i)+","+str(",".join(row))+"\n")
i += 1
f.close()
print "ALL DONE : "+str(datetime.datetime.now())
开发者ID:raman-sharma,项目名称:ML-Learn,代码行数:25,代码来源:svc.py
示例3: train
def train(self, seg_corpus, dep_corpus, path=None):
assert seg_corpus.keys() == dep_corpus.keys()
features, labels = self.extract_features_from_corpus(
dep_corpus, seg_corpus=seg_corpus)
self._train(features, labels)
if path is not None:
joblib.dump(self.pipeline, path, compress=1, cache_size=1e9)
开发者ID:discourse-lab,项目名称:DiscourseSegmenter,代码行数:7,代码来源:matesegmenter.py
示例4: train_svm
def train_svm(feedback, classes):
print "Building n-grams"
X_train_counts = count_vect.fit_transform(feedback) # converting string to the bag - of - words form, using bi-grams
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts) # weighting the words from bag-of-words form
'''
The vocabulary used here on the training set needs to be saved for classification matters,
what this means is that the number of words during training is going to be different of
the number of words in classification but the count still needs to be the same, in other
words if the word "animal" is associated with the integer 3 during training, it has to be
associated again with number 3 during classification
'''
pickle.dump(count_vect.vocabulary_,open("feature.pkl","wb")) # Saving vocab
print "Saving words features"
c = svm.SVC(kernel = 'rbf',gamma = 0.001, C = 100)
print "training SVM"
c.fit(X_train_tfidf, classes) # Training the SVM
print "Training completed..."
joblib.dump(c, 'filename.pkl', compress= 9) # Saving the Support vectors
开发者ID:Guaramy,项目名称:WeWillRuleML,代码行数:29,代码来源:train_svm.py
示例5: train
def train(self):
with gzip.open(constants.TRAIN_EXPANDED, 'r') as source:
reader = csv.reader(source)
next(reader, None)
n_sample = 0
labels = []
features = []
for feature_vector in reader:
s_features = feature_vector[2:6] + feature_vector[7:]
s_label = int(feature_vector[1])
features.append(s_features)
labels.append(s_label)
# print 'features', s_features
# print 'labels', s_label
# print 'norm features', normalized_features
n_sample += 1
if n_sample % 500000 == 0:
self.clf.partial_fit(features, labels)
features = []
labels = []
print 'Processing sample [%s]' % n_sample
print 'Finished training'
print 'Estimated parameters [%s]' % self.clf.get_params()
# saving model into file
joblib.dump(self.clf, constants.MODEL_FILENAME, compress=9)
开发者ID:trein,项目名称:criteo-challenge,代码行数:30,代码来源:training.py
示例6: fetch_vgg_architecture
def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None):
"""Fetch a pickled version of the caffe model, represented as list of
dictionaries."""
default_filename = os.path.join(VGG_PATH, 'vgg.pickle')
if caffemodel_parsed is not None:
if os.path.exists(caffemodel_parsed):
return joblib.load(caffemodel_parsed)
else:
if os.path.exists(default_filename):
import warnings
warnings.warn('Did not find %s, but found %s. Loading it.' %
(caffemodel_parsed, default_filename))
return joblib.load(default_filename)
else:
if os.path.exists(default_filename):
return joblib.load(default_filename)
# We didn't find the file: let's create it by parsing the protobuffer
protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer)
model = _parse_caffe_model(protobuf_file)
if caffemodel_parsed is not None:
joblib.dump(model, caffemodel_parsed)
else:
joblib.dump(model, default_filename)
return model
开发者ID:Faruk-Ahmed,项目名称:sklearn-theano,代码行数:28,代码来源:vgg.py
示例7: train_model
def train_model(feats_csv):
df = pd.DataFrame()
df = pd.read_csv(feats_csv).iloc[:,1:]
y = np.ravel(df.iloc[:,-1:])
X = np.array(df.iloc[:,:-1])
############ 15 Best selected features using ANOVA F-value score function ###############
X_new = SelectKBest(f_classif, k=15).fit_transform(X, y)
selected_features = SelectKBest(f_classif, k=15).fit(X, y).get_support(indices = True)
############ KNN manhattan ###############
##### preprocessing: data scaling########
min_max_scaler = MinMaxScaler()
X_new = min_max_scaler.fit_transform(X_new)
model = KNeighborsClassifier(n_neighbors = 1,algorithm = 'brute',metric = 'manhattan',weights = 'uniform')
model.fit(X_new,y)
newdir = '../kNN_clfr'
os.mkdir(newdir)
joblib.dump(model, os.path.join(newdir,'kNN.pkl'))
return
开发者ID:LefterisStamellos,项目名称:swoice_assignment_gender,代码行数:26,代码来源:train_model.py
示例8: train_classifier
def train_classifier():
pos_feat_path = positive_features_path
neg_feat_path = negative_features_path
model_path = classifier_model_path
feature_vectors = []
labels = []
for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")):
fd = joblib.load(feat_path)
print len(fd)
if len(fd):
fd = fd.astype(numpy.object)
feature_vectors.append(fd)
labels.append(1)
for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")):
fd = joblib.load(feat_path)
print len(fd)
if len(fd):
fd = fd.astype(numpy.object)
feature_vectors.append(fd)
labels.append(0)
classifier = LinearSVC()
print "Training classifier"
classifier.fit(feature_vectors, labels)
print "Classifier successfully trained"
if not os.path.isdir(os.path.split(model_path)[0]):
os.makedirs(os.path.split(model_path)[0])
joblib.dump(classifier, model_path)
开发者ID:ranveeraggarwal,项目名称:traffic-light-detection,代码行数:32,代码来源:train_classifier.py
示例9: trainClassifier
def trainClassifier(clf,
dir,model_file='adaptive',
data_file='train',
seed=1234,
):
'''
Train classifier
'''
print 'Training classifier'
data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file))
traindata = data[:,:-1]
targetdata = data[:,-1]
pdb.set_trace()
if model_g == 'mlp':
train_mlp((traindata, targetdata), save_file='{0}/{1}_F0_F1.pkl'.format(dir,model_file))
else:
rng = np.random.RandomState(seed)
indices = rng.permutation(traindata.shape[0])
traindata = traindata[indices]
targetdata = targetdata[indices]
scores = cross_validation.cross_val_score(clf, traindata, targetdata)
print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
clf.fit(traindata,targetdata)
#clf.plot_importance_matrix(vars_names)
joblib.dump(clf, '{0}/{1}_F0_F1.pkl'.format(dir,model_file))
开发者ID:jgpavez,项目名称:transfer_learning,代码行数:27,代码来源:transfer_learning_ratios.py
示例10: setTestInputforNN
def setTestInputforNN(self, collection={}, sel_words=[]):
list_of_strings = []
list_of_salary = []
count = 0
sel_words_set = set(sel_words)
sel_words_list = list(sel_words_set)
for document in collection:
count += 1
title = document.getTitle()
description = document.getDescription()
salary = (int)(document.getSalaryNorm())
words = re.split(" ", title) + re.split(" ", description)
# words = [x for x in words if x in sel_words]
wordsUnique = set(words)
wordsUnique = wordsUnique & sel_words_set
words = [x for x in words if x in wordsUnique]
documentString = " ".join(words)
list_of_strings.append(documentString)
list_of_salary.append(salary)
if not (count % 15000):
break
vectorizer = CountVectorizer(vocabulary=sel_words, min_df=1)
self.inp = vectorizer.fit_transform(list_of_strings)
from sklearn.externals import joblib
joblib.dump(self.inp.tocsr(), "test_dataset_in.joblib")
self.inp_size = len(list_of_strings)
output = np.array(list_of_salary)
self.target = output.reshape(len(list_of_strings), 1)
joblib.dump(self.target, "test_dataset_out.joblib")
return [self.inp, self.target]
开发者ID:ananthd88,项目名称:CSCE-633-670-Project,代码行数:35,代码来源:neural_lab.py
示例11: save_classifier
def save_classifier(cl, fn, use_joblib=True, **kwargs):
"""Save a classifier to disk.
Parameters
----------
cl : classifier object
Pickleable object or a classify.VigraRandomForest object.
fn : string
Writeable path/filename.
use_joblib : bool, optional
Whether to prefer joblib persistence to pickle.
kwargs : keyword arguments
Keyword arguments to be passed on to either `pck.dump` or
`joblib.dump`.
Returns
-------
None
Notes
-----
For joblib persistence, `compress=3` is the default.
"""
if isinstance(cl, VigraRandomForest):
cl.save_to_disk(fn)
elif use_joblib and sklearn_available:
if not kwargs.has_key('compress'):
kwargs['compress'] = 3
joblib.dump(cl, fn, **kwargs)
else:
with open(fn, 'w') as f:
pck.dump(cl, f, protocol=kwargs.get('protocol', -1))
开发者ID:cmor,项目名称:gala,代码行数:32,代码来源:classify.py
示例12: trainModel
def trainModel():
# 数据预处理
data_train = joblib.load('data/data_train.pkl')
label_train = joblib.load('data/label_train.pkl')
print data_train.shape
clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0,
kernel='rbf', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=True)
#clf.set_params(kernel='rbf')
print clf
print data_train.shape
print label_train.shape
print 'begin training....'
clf.fit(data_train,label_train)
print 'finish training....'
print clf
joblib.dump(clf, 'model/svm.pkl')
return None
开发者ID:BLKStone,项目名称:EasyPyPR,代码行数:26,代码来源:SVMtrain.py
示例13: rf_fit
def rf_fit():
train_inp,valid_inp,train_target,valid_target = prepare_input()
rf = RandomForestClassifier(random_state=31,n_jobs=-1,verbose=1,n_estimators=100,min_samples_split=5)
start = time.time()
rf.fit(train_inp,train_target)
end = time.time()
print "fitting took {:0.4} seconds".format(end-start)
training_output = rf.predict_proba(train_inp)
validation_output = rf.predict_proba(valid_inp)
training_error = log_loss(train_target,training_output)
validation_error = log_loss(valid_target,validation_output)
print "Train error: {:02.4f}".format(training_error)
print "Validation error: {:02.4f}".format(validation_error)
joblib.dump(rf,rf_filename)
return rf
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:26,代码来源:red_hat.py
示例14: xgb_fit
def xgb_fit():
train_inp,valid_inp,train_target,valid_target = prepare_input()
dtrain = xgb.DMatrix(train_inp,label=train_target)
dvalid = xgb.DMatrix(valid_inp)
param = {'max_depth':10, 'eta':0.02, 'silent':1, 'objective':'binary:logistic' }
param['nthread'] = 4
param['eval_metric'] = 'auc'
param['subsample'] = 0.7
param['colsample_bytree']= 0.7
param['min_child_weight'] = 0
param['booster'] = "gblinear"
watchlist = [(dtrain,'train')]
num_round = 300
early_stopping_rounds=10
bst = xgb.train(param, dtrain, num_round, watchlist,early_stopping_rounds=early_stopping_rounds)
joblib.dump(bst,bst_filename)
train_pred = bst.predict(xgb.DMatrix(train_inp))
valid_pred = bst.predict(xgb.DMatrix(valid_inp))
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:26,代码来源:red_hat.py
示例15: train
def train(trainingData, pklFile):
# ========================================================================= #
# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
# ========================================================================= #
if (pklFile == ''):
os.system('rm -rf learntModel & mkdir learntModel')
pklFile = 'learntModel/learntModel.pkl'
# ========================================================================= #
# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
# ========================================================================= #
NUMBER_OF_FEATURES = len(trainingData[0]) - 1
NUMBER_OF_TRAINING_POINTS = len(trainingData)
x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
y = trainingData[:, NUMBER_OF_FEATURES]
# ========================================================================= #
# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
# ========================================================================= #
minSquareError = np.inf
targetAlpha = None
alphas = np.logspace(-10, -2, 500)
# ========================================================================= #
# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
# ========================================================================= #
clf = LogisticRegressionCV(Cs=alphas)
clf.fit(x, y)
joblib.dump(clf, pklFile)
return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}
开发者ID:ZAZAZakari,项目名称:ML-Algorithm,代码行数:32,代码来源:logisticRegression.py
示例16: perform_cluster_analysis
def perform_cluster_analysis(dataset):
filename = 'elbow_plot.dat'
if os.path.exists(cpath + filename):
data = joblib.load(cpath + filename)
K = data[0]
meandistortions = data[1]
else:
X = dataset
print 'X Shape: ', X.shape
#K = range(1, 50, 5)
K = [1, 2, 5, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
#K = [1, 2, 5, 10, 50, 100]
meandistortions = []
cluster_centers = []
for k in K:
print k
kmeans = KMeans(n_clusters=k, n_jobs=3)
kmeans.fit(X)
#import ipdb; ipdb.set_trace() # debugging code
#meandistortions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1))/X.shape[0])
meandistortions.append(kmeans.inertia_)
cluster_centers.append(kmeans.cluster_centers_)
#print 'k: ', k, ' Cluster Centers: ', kmeans.cluster_centers_
data = [K, meandistortions]
joblib.dump(data, cpath + filename, compress=8)
plot_name = "elbow_plot.png"
title = 'Selecting k with the Elbow Method'
xlabel = 'Number of Clusters (k)'
ylabel = 'Average Distortion'
xyplot(K, meandistortions, 0, 0, 0, 0, title, xlabel, ylabel, staticpath + plot_name, line=1, y_log=0)
开发者ID:tilanukwatta,项目名称:scicano,代码行数:34,代码来源:arxiv_analysis_v3.py
示例17: trainFixed
def trainFixed():
'''
train a machine learner based on data from some fixed parameter point.
save to fixed.pkl
'''
print "Entering train fixed"
trainAndTarget = np.loadtxt('traindata.dat')
traindata = trainAndTarget[:,0:2]
targetdata = trainAndTarget[:,2]
massPoints = np.unique(traindata[:,1])
chunk = len(traindata)/len(massPoints)/2
shift = len(traindata)/2
#plot for fixed mu=0 training
print "training fixed"
clf = svm.NuSVR()
reducedtrain = np.concatenate((traindata[4*chunk : 5*chunk,0],
traindata[4*chunk+shift : 5*chunk+shift , 0]))
reducedtarget = np.concatenate((targetdata[4*chunk : 5*chunk],
targetdata[4*chunk+shift : 5*chunk+shift]))
clf.fit(reducedtrain.reshape((len(reducedtrain),1)), reducedtarget)
joblib.dump(clf, 'fixed.pkl')
开发者ID:cranmer,项目名称:parametrized-learning,代码行数:25,代码来源:GausSigOnExpBkg.py
示例18: compute
def compute(filename):
fileArray = filename.split("/")
operator = fileArray[-1].split(".")[0]
print 'SVM received operator = ' + operator
ip = open(filename)
i = 0
A = []
B = []
for line in ip:
temp = []
elm = line.rstrip("\n").split(" ");
temp = [np.exp(float(row)) for row in range(len(elm)-1) ]
# A.append([temp[0] ,temp[1]])
A.append(temp)
B.append(np.float(elm[len(elm)-1]))
clf = svm.SVR()
clf.fit(A,B)
# f.close()
modelURI = "Models/"+operator+"/"
if not os.path.exists(modelURI):
os.makedirs(modelURI)
modelURI += 'm.pkl'
joblib.dump(clf, modelURI)
print 'SUCCESS,' + modelURI + ' written to disk.'
开发者ID:pvam,项目名称:pvam.github.io,代码行数:31,代码来源:SVMRegression.py
示例19: gbm_fit
def gbm_fit(params, cv_folds):
gbm = GradientBoostingRegressor(**params)
gbm.fit(x_train, y_train)
# Check accuracy of model
# No need for validation data because of cross validation
# Training data is split up into cv_folds folds:
# Model trained on (cv_folds - 1) of the folds; last fold is saved as validation set
cv_scores_mse = cross_validation.cross_val_score(gbm, x_train, y_train, cv=cv_folds, scoring='mean_squared_error')
print '\nModel Report'
print ('MSE Score: Mean - %.7g | Std - %.7g | Min - %.7g | Max - %.7g' %
(np.mean(cv_scores_mse), np.std(cv_scores_mse), np.min(cv_scores_mse), np.max(cv_scores_mse)))
feat_imp = pd.Series(gbm.feature_importances_, features).sort_values(ascending=False)
feat_imp.plot(kind='bar', title='Feature Importances')
plt.ylabel('Feature Importance Score')
plt.show()
# Check actual performance on test data
final_predictions = gbm.predict(x_test)
test['health_score_in_week'] = final_predictions
test.to_csv(output_file, columns=['user_id', 'date', 'steps', 'total_sleep', 'resting_hr',
'step_week_slope', 'sleep_week_slope', 'hr_week_slope',
'curr_health_score', 'health_score_in_week'])
# Save the model to file 'health_prediction.pkl'
joblib.dump(gbm, 'health_prediction.pkl', compress=1)
开发者ID:Fitomo,项目名称:Prediction-Service,代码行数:26,代码来源:predicted_health_algorithm.py
示例20: normalize_one
def normalize_one(name):
out_name = path(name).splitext()[0] + '.dat'
a = sio.loadmat(name)
desc = a['desc']
frames = a['frames']
normalize_sift(desc, inplace=True)
dump(dict(frames=frames, desc=desc), out_name, compress=3)
开发者ID:yairmov,项目名称:carUnderstanding,代码行数:7,代码来源:matlab_dense_sift.py
注:本文中的sklearn.externals.joblib.dump函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论