本文整理汇总了Python中sklearn.preprocessing.scale函数的典型用法代码示例。如果您正苦于以下问题:Python scale函数的具体用法?Python scale怎么用?Python scale使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了scale函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: classify
def classify():
# read training data
lbls1, X, y = readCsv(TRAIN_CSV, True)
# read test data
lbls2, Y, z = readTestCsv(TEST_CSV)
# Conversion to numpy arrays
X = np.array(X)
X = X.astype(float)
y = np.array(y)
Y = np.array(Y)
Y = Y.astype(float)
# perform feature scaling for zero mean and unit variance
scale(X, with_mean = True, with_std = True)
scale(Y, with_mean = True, with_std = True)
lin_svc = svm.LinearSVC(C = 4.0, dual = False)
lin_svc.fit(X, y)
bestmodel = lin_svc
preds = bestmodel.predict(Y)
writePredictions(lbls2, preds)
开发者ID:godofwharf,项目名称:ImageClassification,代码行数:25,代码来源:model.py
示例2: create_data_provider
def create_data_provider(dataset, force_write_cache = False, center_data = True,
scale_data = True, add_bias_feature = True, normalize_datapoints = False,
center_labels = False, scale_labels = False,
transform_labels_to_plus_minus_one = True, test_size=0.0):
data, labels = dataset.get_data(force_write_cache=force_write_cache)
copy = False
if scale_data:
data = preprocessing.scale(data, copy=copy)
elif center_data:
data = preprocessing.scale(data, with_std=False, copy=copy)
if scale_labels:
labels = preprocessing.scale(labels, copy=copy)
elif center_labels:
labels = preprocessing.scale(labels, with_std=False, copy=copy)
if add_bias_feature:
data = np.hstack((data, np.ones((data.shape[0], 1))))
if normalize_datapoints:
data /= np.linalg.norm(data, axis=1)[:, np.newaxis]
if transform_labels_to_plus_minus_one:
labels = labels * 2.0 - 1.0
test_provider = None
if test_size > 0.0:
data, data_test, labels, labels_test = cross_validation.train_test_split(data, labels, test_size=test_size)
test_provider = DataProvider(data_test, labels_test)
return DataProvider(data, labels, test_provider=test_provider)
开发者ID:yk,项目名称:mldatasets,代码行数:25,代码来源:mldatasets.py
示例3: extractFeatures
def extractFeatures(data, n):
logging.info('Features: extracting {0}...'.format(n))
# create DF
columns = []
col_names = ['open', 'high', 'low', 'close', 'volume']
for col_name in col_names:
for m in xrange(1, n+1):
columns.append('{0}_{1}'.format(col_name, m))
# pprint(columns)
df = pd.DataFrame(dtype=float, columns=columns)
pb = ProgressBar(maxval=len(data)).start()
for i in xrange(n, len(data)+1):
pb.update(i)
slice = data.ix[i-n:i]
# print slice
scale(slice, axis=0, copy=False)
# print slice
cntr = 0
item = {}
for slice_index, slice_row in slice.iterrows():
cntr += 1
# print slice_index
# print slice_row
for col in slice.columns:
item['{0}_{1}'.format(col, cntr)] = slice_row[col]
# pprint(item)
df.loc[i] = item
# break
pb.finish()
logging.info('Features: extracted')
return df
开发者ID:vishnuvr,项目名称:trading,代码行数:34,代码来源:generator.py
示例4: split_into_chunks
def split_into_chunks(data, train, predict, step, binary=True, scale=True):
X, Y = [], []
for i in range(0, len(data), step):
try:
x_i = data[i:i+train]
y_i = data[i+train+predict]
# Use it only for daily return time series
if binary:
if y_i > 0.:
y_i = [1., 0.]
else:
y_i = [0., 1.]
if scale: x_i = preprocessing.scale(x_i)
else:
timeseries = np.array(data[i:i+train+predict])
if scale: timeseries = preprocessing.scale(timeseries)
x_i = timeseries[:-1]
y_i = timeseries[-1]
except:
break
X.append(x_i)
Y.append(y_i)
return X, Y
开发者ID:Rachnog,项目名称:Deep-Trading,代码行数:29,代码来源:processing.py
示例5: standardize
def standardize(self):
"""
impute
"""
print('Standardization')
self.tr = scale(self.tr)
self.te = scale(self.te)
开发者ID:Hossein-Noroozpour,项目名称:PyHDM,代码行数:7,代码来源:HDataManager.py
示例6: buildModel
def buildModel(size):
with open('Sentiment Analysis Dataset.csv', 'rb') as csvfile:
pos_tweets =[]
neg_tweets =[]
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
if row[1] == '1':
if not (len(pos_tweets) > size):
pos_tweets.append(_cleanTweet(row[3]))
else:
if not (len(neg_tweets) > size):
neg_tweets.append(_cleanTweet(row[3]))
y = np.concatenate((np.ones(len(pos_tweets[0:size])), np.zeros(len(neg_tweets[0:size]))))
x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos_tweets[0:size], neg_tweets[0:size])), y, test_size=0.2)
x_train = _cleanText(x_train)
x_test = _cleanText(x_test)
n_dim = 100
#Initialize model and build vocab
imdb_w2v = Word2Vec(size=n_dim, min_count=10)
imdb_w2v.build_vocab(x_train)
imdb_w2v.train(x_train)
train_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_train])
train_vecs = scale(train_vecs)
#Train word2vec on test tweets
imdb_w2v.train(x_test)
#Build test tweet vectors then scale
test_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_test])
test_vecs = scale(test_vecs)
lr = SGDClassifier(loss='log', penalty='l1')
lr.fit(train_vecs, y_train)
imdb_w2v.save("imdb_w2v")
f = open("Accuracy.txt","w")
f.write(str(lr.score(test_vecs, y_test))+" "+str(size*2))
f.close()
开发者ID:phugiadang,项目名称:CSCI-4308-Open-Sources-Data-Analytics,代码行数:34,代码来源:TweetAnalWord2Vec.py
示例7: trainModel
def trainModel():
# Model parameters
W = tf.Variable([.1000], tf.float32)
b = tf.Variable([-.1000], tf.float32)
# Model input and output
x = tf.placeholder(tf.float32, shape=None)
linear_model = W * x + b
y = tf.placeholder(tf.float32)
# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training data
x_train = preprocessing.scale(mouseClickX)
y_train = preprocessing.scale(mouseClickY)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong
for i in range(500):
sess.run([train], {x: x_train, y: y_train})
if i % 50 == 0:
# to visualize the result and improvement
try:
ax.lines.remove(lines[0])
except Exception:
pass
print(x_train, y_train, i)
prediction_value = sess.run(linear_model, feed_dict={x: mouseClickX})
# plot the prediction
lines = ax.plot(mouseClickX, prediction_value, 'r-', lw=5)
plt.pause(1)
开发者ID:allamtb,项目名称:neural-networks-and-deep-learning,代码行数:32,代码来源:2+实时回归用户在界面输入的散点.py
示例8: scale
def scale(self):
# FIXME: this cannot work this way, scaling must be done with
# the joined set.
if (self.X != None):
self.X = preprocessing.scale(self.X)
if (self.X_test != None):
self.X_test = preprocessing.scale(self.X_test)
开发者ID:aydindemircioglu,项目名称:MixMex,代码行数:7,代码来源:DataSet.py
示例9: main
def main():
X, Y, X_test = import_data()
X_n = preprocessing.scale(X)
X_t_n = preprocessing.scale(X_test)
X_train, X_test, y_train, y_test = cross_validation.train_test_split( \
X_n, Y, test_size=0.2, random_state=0)
alpha = np.arange(0.001, 2.0, 0.001, np.float)
best_alpha = 0
best_score = 0
for a in alpha:
clf = linear_model.Ridge (alpha = a)
clf.fit(X_train, y_train)
sc = clf.score(X_test, y_test)
if sc > best_score:
best_alpha = a
best_score = sc
clf = linear_model.Ridge (alpha = best_alpha)
clf.fit(X_train, y_train)
res = clf.predict(X_t_n)
for var in res:
print(var[0])
开发者ID:ravediamond,项目名称:HackerRank_DataAnalysis,代码行数:30,代码来源:code.py
示例10: main
def main():
"""TODO: Docstring for main.
:returns: TODO
"""
alpha = 1.
decay = 0.0006
iter_num = 600
finetune_iter = 220
hyper_params = {
'hidden_layers_sizes':[196,], 'iter_nums':[400,],
'alphas':[1.,], 'decays':[0.003,],
'betas':[3,], 'rhos':[0.1,]
}
enc = OneHotEncoder(sparse=False)
mnist = fetch_mldata('MNIST original', data_home='./')
x_train, x_test, y_train, y_test = \
train_test_split(scale(mnist.data.astype(float)).astype('float32'),
mnist.target.astype('float32'),
test_size=0.5, random_state=0)
x_unlabeled = scale(mnist.data[mnist.target>=5,:].astype(float)).astype('float32')
y_train = enc.fit_transform(y_train.reshape(y_train.shape[0],1)).astype('float32')
t_x = T.matrix()
params, extracted = pretrain_sae(x_unlabeled, hyper_params)
extracted = function(inputs=[t_x], outputs=[sae_extract(t_x, params)])(x_train)[0]
params.append(train_softmax(extracted, y_train, iter_num, alpha, decay))
weights = finetune_sae(x_train, y_train, params, finetune_iter, alpha, decay)
all_label = np.array(range(0, 10))
pred = all_label[softmax2class_max(sae_predict(x_test, weights))]
print accuracy_score(y_test, pred)
print classification_report(y_test, pred)
print confusion_matrix(y_test, pred)
开发者ID:ShiehShieh,项目名称:UFLDL-Solution,代码行数:35,代码来源:sae.py
示例11: get_correlation_data
def get_correlation_data(self, round_number, liste_id, dataset):
points = []
#On récupère d'abord les pourcentages de vote pour la liste donnée
poll_data = self.retrieve_total_votes_for_liste(round_number, liste_id)
# on range les des données dans un dico propre
data_x, data_y = [],[]
for dept_data in poll_data:
data_x.append(dept_data["vote_percentage"])
data_y.append(dataset[dept_data["_id"]] / 100)
points.append({"dept_id" : dept_data["_id"],
"votes_percentage" : dept_data["vote_percentage"],
"other_percentage" : dataset[dept_data["_id"]] / 100})
array_x, array_y = array(data_x), array(data_y)
# on normalise les données de vote et du dataset
rescaled_x, rescaled_y = preprocessing.scale(array_x), preprocessing.scale(array_y)
#on calcule les couleurs pour chacun des départements
colors, max_val = self._compute_colors(rescaled_x, rescaled_y)
#surles données non normalisées, on calcule les coefficients de la droite de régression
reg_slope, reg_y_intercept = self._linear_regression(array_x, array_y)
for i, x in enumerate(rescaled_x):
points[i]["votes_normalized"] = rescaled_x[i]
points[i]["other_normalized"] = rescaled_y[i]
points[i]["color"] = colors[i]
return {"points" : points,
"graph_metadata": {"max" : max_val,
"regression": {"slope" : reg_slope,
"intercept" : reg_y_intercept}}}
开发者ID:ThomasPoncet,项目名称:Ocre,代码行数:35,代码来源:correlations.py
示例12: permutation_cross_validation
def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle=True, cvmeth='shufflesplit', score_type='r2', n_perm=1000):
"""
An easy way to evaluate the significance of a cross-validated score by permutations
-------------------------------------------------
Parameters:
estimator: linear model estimator
X: IV
y: DV
n_fold: fold number cross validation
cvmeth: kfold or shufflesplit.
shufflesplit is the random permutation cross-validation iterator
score_type: scoring type, 'r2' as default
n_perm: permutation numbers
Return:
score: model scores
permutation_scores: model scores when permutation labels
pvalues: p value of permutation scores
"""
try:
from sklearn import cross_validation, preprocessing
except ImportError:
raise Exception('To call this function, please install sklearn')
if X.ndim == 1:
X = np.expand_dims(X, axis = 1)
if y.ndim == 1:
y = np.expand_dims(y, axis = 1)
X = preprocessing.scale(X)
y = preprocessing.scale(y)
if cvmeth == 'kfold':
cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle)
elif cvmeth == 'shufflesplit':
testsize = 1.0/n_fold
cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0)
score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm)
return score, permutation_scores, pvalues
开发者ID:helloTC,项目名称:ATT,代码行数:35,代码来源:tools.py
示例13: load_all_data
def load_all_data(f_name, scale=True, rnd=False):
"""Get data with labels, split into training, validation and test set."""
data_file = h5py.File(f_name, 'r')
x_test = data_file['x_test'][:]
x_dev = data_file['x_dev'][:]
x_train = data_file['x_train'][:]
data_file.close()
if scale:
print "scaling..."
x_test = preprocessing.scale(x_test, with_mean=False)
x_dev = preprocessing.scale(x_dev, with_mean=False)
x_train = preprocessing.scale(x_train, with_mean=False)
print "Total dataset size:"
print "n train samples: %d" % x_train.shape[0]
print "n test samples: %d" % x_test.shape[0]
print "n dev samples: %d" % x_dev.shape[0]
print "n features: %d" % x_test.shape[1]
if rnd:
print "Radomizing training set..."
np.random.shuffle(x_train)
return dict(
x_train=x_train,
x_test=x_test,
x_dev=x_dev,
)
开发者ID:mikimaus78,项目名称:groupNMF,代码行数:26,代码来源:base.py
示例14: get_feature_importances
def get_feature_importances(data_table, obs_metadata, lines_table, use_con_flux=False):
feature_importances_list = []
X_colnames = None
for line_name, line_wavelength in lines_table['source', 'wavelength_target']:
subset = data_table[(data_table['source'] == line_name) & (data_table['wavelength_target'] == line_wavelength)]
X, y, labels = get_X_and_y(subset, obs_metadata, use_con_flux)
if X_colnames is None:
X_colnames = X.colnames
params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
'learning_rate': 0.01, 'loss': 'lad'}
clf = ensemble.GradientBoostingRegressor(**params)
X = ndarrayidze(X)
# Scaling is optional, but I think I'm going to do it (for now) for all methods,
# just in comparing between valued here and with e.g. ICA there are fewer diffs
X = skpp.scale(X)
y = skpp.scale(y)
clf.fit(X, y)
feature_importances_list.append(clf.feature_importances_)
fi = np.array(feature_importances_list)
fi_table = Table(fi, names = X_colnames)
fi_table.add_column(lines_table['source'])
fi_table.add_column(lines_table['wavelength_target'])
return fi_table
开发者ID:dcunning11235,项目名称:skyflux,代码行数:28,代码来源:gradient_boost_peaks.py
示例15: run
def run(self):
roi_data = []
seg_data = []
provider_roi = self.roi_layer.dataProvider()
provider_seg = self.seg_layer.dataProvider()
feat_seg = QgsFeature()
self.status.emit('building spatial index')
time.sleep(0.3)
index = QgsSpatialIndex()
piter = 0
feat_count = provider_seg.featureCount()
for f in provider_seg.getFeatures():
seg_data.append(f.attributes()[1:])
index.insertFeature(f)
piter += 1
self.progress.emit(piter * 15 / feat_count)
self.status.emit('extracting attributes')
self.log.emit('extracting attributes from roi segments intersection')
time.sleep(0.3)
# intersect roi with segments and extract attributes
piter = 0
feat_count = provider_roi.featureCount()
for feat_roi in provider_roi.getFeatures():
geom = feat_roi.geometry()
attr_roi = feat_roi.attributes()
intersects = index.intersects(geom.boundingBox())
for fid in intersects:
ffilter = QgsFeatureRequest().setFilterFid(int(fid))
provider_seg.getFeatures(ffilter).nextFeature(feat_seg)
# filter geometries that does not intersect
if geom.intersects(feat_seg.geometry()):
attr_seg = feat_seg.attributes()
roi_data.append(attr_seg[1:] + attr_roi)
# emit progress
piter += 1
self.progress.emit(15 + (piter * 55 / feat_count))
# read train data
roi_data = np.array(roi_data)
samples = roi_data[:,:-1]
labels = roi_data[:,-1].astype(int)
# svm fit and predict
self.status.emit('svm: fitting data')
time.sleep(0.3)
classifier = svm.SVC(**self.svm_dict)
classifier.fit(preprocessing.scale(samples), labels)
self.progress.emit(85)
self.status.emit('svm: predicting labels')
time.sleep(0.3)
seg_data = preprocessing.scale(seg_data)
predictions = classifier.predict(seg_data).tolist()
self.progress.emit(100)
self.output = pickle.dumps(predictions)
开发者ID:vitorhirota,项目名称:QgisImageAnalysis,代码行数:60,代码来源:classifier.py
示例16: try_lvc_clf
def try_lvc_clf(train_X,train_y,test_X,test_y):
train_X=scale(train_X)
lvc=LinearSVC(C=0.1)
lvc.fit(train_X,train_y)
dec_y=lvc.decision_function(train_X)
#choose the smallest 90%
num_sel=int(len(dec_y)*0.8)
assert len(dec_y)==train_X.shape[0]
assert num_sel<=train_X.shape[0]
s_idx=np.argsort(np.abs(dec_y))
assert len(s_idx)==train_X.shape[0]
for i in s_idx:
if np.isnan(train_y[i])==True:
print("smoking index:%s"%i)
n_train_X=train_X[s_idx[0:num_sel],:]
n_train_y=train_y[s_idx[0:num_sel]]
n_train_X=scale(n_train_X)
lvc.fit(n_train_X,n_train_y)
test_X=scale(test_X)
pred_y=lvc.predict(test_X)
return pred_y
开发者ID:kanhua,项目名称:Enron-Email-Fraud,代码行数:34,代码来源:feature+preprocessing+and+selection.py
示例17: load_dataset
def load_dataset(fname="../data/housing/housing.data",cols=(0,)):
X = np.genfromtxt(fname,usecols=cols,delimiter = ',')
#X = np.genfromtxt(fname,usecols=cols)
num_features = X.shape[1]
num_triplets = int(6*num_features*(num_features-1)*(num_features-2)/6);
triplets = np.zeros((num_triplets,4*wx.shape[1]))
print ':: loading dataset...please wait!'
l = 0
for i in range(num_features-2):
for j in range(i+1,num_features-1):
for k in range(j+1,num_features):
permute_idx = itertools.permutations([i,j,k])
for idx in permute_idx:
x = scale(np.array(X[:,idx[0]]))[:,np.newaxis]
y = scale(np.array(X[:,idx[1]]))[:,np.newaxis]
z = scale(np.array(X[:,idx[2]]))[:,np.newaxis]
triplets[l,:] = f3(x,y,z,np.hstack((x,y,z)))
l = l + 1
return (triplets,num_features,num_triplets)
开发者ID:codeaudit,项目名称:causation_learning_theory,代码行数:25,代码来源:experiment_krikamol_triplet.py
示例18: scaled_logistic_regression
def scaled_logistic_regression(x_train, t_train, x_test, t_test):
x_train_new = preprocessing.scale(x_train)
x_test_new = preprocessing.scale(x_test)
return logistic_regression(x_train_new, t_train, x_test_new, t_test)
开发者ID:rohanbhatia,项目名称:Heart-Disease-Dataset,代码行数:7,代码来源:logistic_regression.py
示例19: main
def main():
indata = np.load(inputs)
training_data = indata['data_training']
training_scaled = preprocessing.scale(training_data)
training_labels = indata['label_training']
validation_data = indata['data_val']
validation_scaled = preprocessing.scale(validation_data)
validation_labels = indata['label_val']
ts = range(-12,6)
cs = [pow(10, t) for t in ts]
accuracy_results = []
accuracy_results_scaled = []
for c in cs:
lin_clf = svm.LinearSVC(C=c)
lin_clf.fit(training_data, training_labels)
predictions = lin_clf.predict(validation_data)
accuracy = metrics.accuracy_score(validation_labels, predictions)
accuracy_results.append(accuracy)
lin_clf.fit(training_scaled, training_labels)
predictions = lin_clf.predict(validation_scaled)
accuracy_scaled = metrics.accuracy_score(validation_labels, predictions)
accuracy_results_scaled.append(accuracy_scaled)
plt.plot(range(len(cs)), accuracy_results, label='un-scaled')
plt.plot(range(len(cs)), accuracy_results_scaled, label='scaled')
plt.xticks(range(len(cs)), cs, size='small')
plt.legend()
plt.show()
print accuracy_results
print accuracy_results_scaled
开发者ID:Veterun,项目名称:SparkPythonHanhan,代码行数:32,代码来源:linear_svm.py
示例20: normalize_data
def normalize_data(tr_x,ts_x,normz=None,axis=0):
if normz is 'scale':
tr_x = scale(tr_x,axis=axis)
ts_x = scale(ts_x,axis=axis)
elif normz is 'minmax':
minmax_scaler = MinMaxScaler()
if axis==0:
for c_i in range(tr_x.shape[1]):
tr_x[:,c_i] = minmax_scaler.fit_transform(tr_x[:,c_i])
ts_x[:,c_i] = minmax_scaler.fit_transform(ts_x[:,c_i])
elif axis==1:
for r_i in range(tr_x.shape[0]):
tr_x[r_i,:] = minmax_scaler.fit_transform(tr_x[r_i,:])
ts_x[r_i,:] = minmax_scaler.fit_transform(ts_x[r_i,:])
elif normz is 'sigmoid':
if axis==0:
col_max = np.max(tr_x,axis=0)
cols_non_norm = np.argwhere(col_max>1).tolist()
tr_x[:,cols_non_norm] = -0.5 + (1 / (1 + np.exp(-tr_x[:,cols_non_norm])))
# TODO: implement col_max col_non_norm for test set
ts_x[:,cols_non_norm] = -0.5 + (1/(1+np.exp(-ts_x[:,cols_non_norm])))
elif axis==1:
row_max = np.max(tr_x,axis=1)
rows_non_norm = np.argwhere(row_max>1).tolist()
tr_x[rows_non_norm,:] = -0.5 + (1 / (1 + np.exp(-tr_x[rows_non_norm,:])))
# TODO: implement row_max row_non_norm for test set
ts_x[rows_non_norm,:] = -0.5 + (1/(1+np.exp(-ts_x[rows_non_norm,:])))
return tr_x,ts_x
开发者ID:thushv89,项目名称:kaggle_tel,代码行数:29,代码来源:manual_transform.py
注:本文中的sklearn.preprocessing.scale函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论