本文整理汇总了Python中sklearn.datasets.dump_svmlight_file函数的典型用法代码示例。如果您正苦于以下问题:Python dump_svmlight_file函数的具体用法?Python dump_svmlight_file怎么用?Python dump_svmlight_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dump_svmlight_file函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_dump_concise
def test_dump_concise():
one = 1
two = 2.1
three = 3.01
exact = 1.000000000000001
# loses the last decimal place
almost = 1.0000000000000001
X = [[one, two, three, exact, almost],
[1e9, 2e18, 3e27, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]
y = [one, two, three, exact, almost]
f = BytesIO()
dump_svmlight_file(X, y, f)
f.seek(0)
# make sure it's using the most concise format possible
assert_equal(f.readline(),
b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n"))
assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n"))
assert_equal(f.readline(), b("3.01 \n"))
assert_equal(f.readline(), b("1.000000000000001 \n"))
assert_equal(f.readline(), b("1 \n"))
f.seek(0)
# make sure it's correct too :)
X2, y2 = load_svmlight_file(f)
assert_array_almost_equal(X, X2.toarray())
assert_array_equal(y, y2)
开发者ID:BrenBarn,项目名称:scikit-learn,代码行数:28,代码来源:test_svmlight_format.py
示例2: fit
def fit(self, X, Y):
self.labels=list(set(Y))
if len(self.labels) > 2 :
self.multiclass=True
#print 'multiclass'
else:
self.multiclass=False
self.train_fname =self.base_str +'-svmcmd-train' + '.dat'
self.model_fname =self.train_fname + '.model'
dump_svmlight_file(X,Y,self.train_fname ,zero_based=False)
if self.multiclass:
command_line=path_to_train_program+'gtsvm_initialize {0} -f {1} -o {2} -m 1 '.format(self.param_str, self.train_fname , self.model_fname )
else:
command_line=path_to_train_program+'gtsvm_initialize -f {1} -o {2} {0}'.format(self.param_str, self.train_fname , self.model_fname )
args = shlex.split(command_line)
p = subprocess.Popen(args)
p.wait()
command_line=path_to_train_program+'gtsvm_optimize -i {0} -o {1} -e {2} -n {3}'.format(self.model_fname,self.model_fname,self.tol,self.max_iter)
args = shlex.split(command_line)
p = subprocess.Popen(args,stderr=subprocess.PIPE)
p.wait()
opt_err_str=p.stderr.read() ##gtsvm is too buggy
if len(opt_err_str) < 1:
command_line=path_to_train_program+'gtsvm_shrink -i {0} -o {1}'.format(self.model_fname,self.model_fname)
args = shlex.split(command_line)
p = subprocess.Popen(args)
p.wait()
self.train_fail=False
else :
self.train_fail=True
return self
开发者ID:niitsuma,项目名称:gtsvm,代码行数:35,代码来源:gtsvm.py
示例3: predict
def predict(self, X):
if isinstance(X,list):
self.test_n_sample=len(X)
else:
self.test_n_sample=X.shape[0]
Y=[1]*self.test_n_sample
self.test_fname =self.base_str +'-svmcmd-test' + '.dat'
self.predict_fname =self.base_str +'-svmcmd-predict' + '.dat'
dump_svmlight_file(X,Y,self.test_fname ,zero_based=False)
command_line=path_to_train_program+'gtsvm_classify -f {0} -i {1} -o {2}'.format(self.test_fname , self.model_fname, self.predict_fname )
args = shlex.split(command_line)
p = subprocess.Popen(args)
p.wait()
if self.train_fail:
return [max(self.labels)+1]*self.test_n_sample
if self.multiclass :
f = open(self.predict_fname, 'rb')
self.predicted_weight = map(lambda row: map(float,row), list(csv.reader(f)))
f.close()
Y_predict=map(np.argmax, self.predicted_weight)
else :
self.predicted_weight = np.loadtxt( self.predict_fname)
Y_predict=map(int,map(round,self.predicted_weight))
return Y_predict
开发者ID:niitsuma,项目名称:gtsvm,代码行数:25,代码来源:gtsvm.py
示例4: generate_weekday_newbuyer_exposure
def generate_weekday_newbuyer_exposure(df):
"""
加入新客数,曝光数
"""
X = df[['uv_0612_0618', 'uv_weekday', 'uv_weekend', 'no_subsidy_exposure', 'newbuyer_6_18']]
y = df.uv_0626_0702
dump_svmlight_file(X, y, './uv_weekday_weekend_newbuyer_exposure_without_outliers.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py
示例5: generate_week
def generate_week(df):
"""
生成1维特征
"""
X = df[['uv_0612_0618']]
y = df.uv_0626_0702
dump_svmlight_file(X, y, './uv_week.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py
示例6: executa_extracao_n
def executa_extracao_n(base_treino, metodo, n=1):
inicio = time()
lista_imagens = arq.busca_arquivos(base_treino, "*.png")
n_imgs_treino = len(lista_imagens)
for lado in range(8,n+1,4):
atributos = []
rotulos = []
arq_treino = base_treino + "base_PFTAS_"+str(lado)+"x"+str(lado)+".svm"
## INICIO DO PROCESSO DE EXTRACAO DE ATRIBUTOS
for arq_imagem in lista_imagens:
print("Arquivo: " + arq_imagem)
imagem = mh.imread(arq_imagem)
if (imagem != None):
classe, _ = ex.classe_arquivo(arq_imagem)
print("executa_extracao_n - shape imagem:" + str(imagem.shape))
# Extrai os atributos e gera os arquivos dos patches da base de treino
atrs,rots = extrai_pftas_patches_n(imagem, classe, lado)
atributos += atrs
rotulos += rots
dump_svmlight_file(atributos, rotulos, arq_treino)
log("Extraidos atributos da base " + base_treino + " utilizando " + metodo + "\n para " + str(n_imgs_treino) + "imagens")
# Exibe o tempo de execução
log(str(time()-inicio) + "EXTRAÇÃO")
开发者ID:willianfatec,项目名称:PatchWiser,代码行数:30,代码来源:testes.py
示例7: generate_weekday_weekend
def generate_weekday_weekend(df):
"""
生成3维特征
"""
X = df[['uv_0612_0618', 'uv_weekday', 'uv_weekend']]
y = df.uv_0626_0702
dump_svmlight_file(X, y, './uv_weekday_weekend.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py
示例8: test_dump
def test_dump():
Xs, y = load_svmlight_file(datafile)
Xd = Xs.toarray()
for X in (Xs, Xd):
for zero_based in (True, False):
for dtype in [np.float32, np.float64]:
f = BytesIO()
# we need to pass a comment to get the version info in;
# LibSVM doesn't grok comments so they're not put in by
# default anymore.
dump_svmlight_file(X.astype(dtype), y, f, comment="test",
zero_based=zero_based)
f.seek(0)
comment = f.readline()
assert_in("scikit-learn %s" % sklearn.__version__, comment)
comment = f.readline()
assert_in(["one", "zero"][zero_based] + "-based", comment)
X2, y2 = load_svmlight_file(f, dtype=dtype,
zero_based=zero_based)
assert_equal(X2.dtype, dtype)
if dtype == np.float32:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype), X2.toarray(), 4)
else:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype), X2.toarray(), 15)
assert_array_equal(y, y2)
开发者ID:yzhy,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py
示例9: save_all_data_in_svmlight_format
def save_all_data_in_svmlight_format(self,
file_path,
extraction_method,
label_type):
label_list, feature_vector_list = self.extract_all_data(extraction_method, label_type)
with open(file_path, 'wb') as f:
datasets.dump_svmlight_file(feature_vector_list, label_list, f)
开发者ID:t-usui,项目名称:COMES,代码行数:7,代码来源:data_processor.py
示例10: dump_svmlight
def dump_svmlight(X_matrix, Y, feature_names, output_filename, feature_id_offset = 0):
dump_svmlight_file(X_matrix, Y, output_filename)
contents = None
with open(output_filename) as output_file:
contents = '#' + ' '.join(feature_names) + '\n' + ''.join(output_file.readlines())
with open(output_filename, 'w') as output_file:
output_file.write(contents)
开发者ID:hczhu,项目名称:script-tools,代码行数:7,代码来源:translate_feature_format.py
示例11: batch_fit
def batch_fit(self, Xs, ys, dump=True):
qids = [np.array([i] * len(ys[i])) for i in range(len(ys))]
print "dumping data to Xtrain.data"
if dump:
dump_svmlight_file(
np.concatenate(Xs), np.concatenate(ys), "Xtrain.data", zero_based=False, query_id=np.concatenate(qids)
)
print "now learning"
print call(
[
self.path + "svm_hmm_learn",
"-c",
"%d" % self.C,
"--t",
"%d" % self.t,
"--e",
"%d" % self.e,
"Xtrain.data",
"svmhmm-model.dat",
]
)
return self
开发者ID:Hanshan1988,项目名称:smartphone-activity-recognition,代码行数:25,代码来源:svmhmm.py
示例12: load_training_data
def load_training_data(file_location=str, load_from_database=False, limit=int(1000), clean_dataset=True):
"""
If ```load_from_database``` is True, retrieves and stores data from database to file.
Arguments:
file_location (str): Path + filename of libsvm file to save/load (e.g. 'training_data')
load_from_database (bool): Should data be retrieved from database?
limit (int): Amount of records to retrieve from database (default=1000)
clean_dataset (bool): Should questions be cleaned (e.g. remove code samples, hexadecimals, numbers, etc)?
Returns:
(pandas.DataFrame.from_csv, sklearn.datasets.load_svmlight_file):
Tuple containing a pandas.DataFrame (all data retrieved from database) and
tuple with training data (load_svmlight_file)
See:
| ```MySQLDatabase().retrieve_training_data```
| ```pandas.DataFrame.to_csv```
| ```pandas.DataFrame.from_csv```
| ```sklearn.datasets.dump_svmlight_file```
| ```sklearn.datasets.load_svmlight_file```
"""
svm_file = file_location + ".dat"
csv_file = file_location + ".csv"
if load_from_database:
comment = u"label: (-1: Bad question, +1: Good question); features: (term_id, frequency)"
MySQLDatabase().set_vote_value_params()
data = MySQLDatabase().retrieve_training_data(limit, clean_dataset)
# create a term-document matrix
vectorizer = CountVectorizer(analyzer='word', min_df=0.01, stop_words="english")
td_matrix = vectorizer.fit_transform(data.get(QUESTION_TEXT_KEY))
data.to_csv(csv_file)
dump_svmlight_file(td_matrix, data[CLASS_LABEL_KEY], f=svm_file, comment=comment)
return DataFrame.from_csv(csv_file), load_svmlight_file(svm_file)
开发者ID:klAndersen,项目名称:IMT4904_MasterThesis_Code,代码行数:34,代码来源:test_all_algorithms.py
示例13: test_load_with_offsets
def test_load_with_offsets(sparsity, n_samples, n_features):
rng = np.random.RandomState(0)
X = rng.uniform(low=0.0, high=1.0, size=(n_samples, n_features))
if sparsity:
X[X < sparsity] = 0.0
X = sp.csr_matrix(X)
y = rng.randint(low=0, high=2, size=n_samples)
f = BytesIO()
dump_svmlight_file(X, y, f)
f.seek(0)
size = len(f.getvalue())
# put some marks that are likely to happen anywhere in a row
mark_0 = 0
mark_1 = size // 3
length_0 = mark_1 - mark_0
mark_2 = 4 * size // 5
length_1 = mark_2 - mark_1
# load the original sparse matrix into 3 independent CSR matrices
X_0, y_0 = load_svmlight_file(f, n_features=n_features,
offset=mark_0, length=length_0)
X_1, y_1 = load_svmlight_file(f, n_features=n_features,
offset=mark_1, length=length_1)
X_2, y_2 = load_svmlight_file(f, n_features=n_features,
offset=mark_2)
y_concat = np.concatenate([y_0, y_1, y_2])
X_concat = sp.vstack([X_0, X_1, X_2])
assert_array_almost_equal(y, y_concat)
assert_array_almost_equal(X.toarray(), X_concat.toarray())
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py
示例14: subsample_to_file
def subsample_to_file(svm_file, out_dir, out_name, multilabel=False,
row_ratio=0.5, col_ratio=0.3, random_state=12):
"""
Example:
'''python
# run the following command in the current directory will create a
# `tmp` folder, if not already exists, and generate a file called
# `a9a_sub` from the original file `./data/a9a`. Both files are
# in libsvm format.
subsample_to_file("./data/a9a", "./tmp", "a9a_sub")
# read the subsampled file and make sure its number of rows is half of
# that of a9a and its number of column is roughly third of a9a (123)
X, y = load_svmlight_file('./tmp/a9a_sub')
assert X.shape == (16280, 36)
'''
"""
assert 1 >= row_ratio > 0, \
"Row ratio {row_ratio} must be (0, 1]" \
.format(**locals())
assert 1 >= col_ratio > 0, \
"Col ratio {col_ratio} must be (0, 1]" \
.format(**locals())
X, y = load_svmlight_file(svm_file, multilabel=multilabel)
n, m = X.shape
subn = int(n*row_ratio)
subm = int(m*col_ratio)
rst = np.random.RandomState(random_state)
ridx = rst.choice(n, subn, replace=False)
cidx = rst.choice(m, subm, replace=False)
mkdir_p(out_dir)
out_file = os.path.join(out_dir, out_name)
dump_svmlight_file(X[ridx,:][:,cidx], y[ridx],
out_file, multilabel=multilabel)
开发者ID:mktal,项目名称:peregrine,代码行数:35,代码来源:utils.py
示例15: save_libfm
def save_libfm(X_sprs_mat, y_array, f):
print("Save LibFM Format")
dump_svmlight_file(X_sprs_mat, y_array, f)
return
开发者ID:nancyya,项目名称:Predictors,代码行数:7,代码来源:dataProcessing.py
示例16: test_load_with_long_qid
def test_load_with_long_qid():
# load svmfile with longint qid attribute
data = b("""
1 qid:0 0:1 1:2 2:3
0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""")
X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)
true_X = [[1, 2, 3],
[1440446648, 72048431380967004, 236784985],
[1440446648, 72048431380967004, 236784985],
[1440446648, 72048431380967004, 236784985]]
true_y = [1, 0, 0, 3]
trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807]
assert_array_equal(y, true_y)
assert_array_equal(X.toarray(), true_X)
assert_array_equal(qid, trueQID)
f = BytesIO()
dump_svmlight_file(X, y, f, query_id=qid, zero_based=True)
f.seek(0)
X, y, qid = load_svmlight_file(f, query_id=True, zero_based=True)
assert_array_equal(y, true_y)
assert_array_equal(X.toarray(), true_X)
assert_array_equal(qid, trueQID)
f.seek(0)
X, y = load_svmlight_file(f, query_id=False, zero_based=True)
assert_array_equal(y, true_y)
assert_array_equal(X.toarray(), true_X)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py
示例17: create_train_test
def create_train_test(n_samples, doc2vec, save_svmlight=True):
print "Creating train & test sets..."
# Create labelled data arrays.
data = np.zeros((n_samples, doc2vec.size))
labels = np.zeros(n_samples)
for i in range(n_samples / 2):
prefix_train_pos = 'TRAIN_POS_' + str(i)
prefix_train_neg = 'TRAIN_NEG_' + str(i)
data[i] = doc2vec.model.docvecs[prefix_train_pos]
data[n_samples / 2 + i] = doc2vec.model.docvecs[prefix_train_neg]
labels[i] = 1
# Split in train and validation arrays.
train, test, train_labels, test_labels = train_test_split(
data, labels, test_size=0.3, random_state=42)
if save_svmlight:
current_path = os.path.abspath(
os.path.join(os.getcwd(), os.pardir))
dump_svmlight_file(train, train_labels, current_path + "/Data/Processed/TrainSet.svm")
dump_svmlight_file(test, test_labels, current_path + "/Data/Processed/TestSet.svm")
return train, test, train_labels, test_labels
开发者ID:bailingwjw,项目名称:OpinionMining,代码行数:33,代码来源:Classifying.py
示例18: test_dump
def test_dump():
Xs, y = load_svmlight_file(datafile)
Xd = Xs.toarray()
for X in (Xs, Xd):
for zero_based in (True, False):
for dtype in [np.float32, np.float64]:
f = BytesIO()
dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based)
f.seek(0)
comment = f.readline()
assert_in("scikit-learn %s" % sklearn.__version__, comment)
comment = f.readline()
assert_in(["one", "zero"][zero_based] + "-based", comment)
X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
assert_equal(X2.dtype, dtype)
if dtype == np.float32:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype),
X2.toarray(),
4,
)
else:
assert_array_almost_equal(
# allow a rounding error at the last decimal place
Xd.astype(dtype),
X2.toarray(),
15,
)
assert_array_equal(y, y2)
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py
示例19: data_dump
def data_dump(self, f, X_train, X_test, y_train, y_test):
from sklearn.datasets import dump_svmlight_file
ddd = dict()
new_y_train = []
last = 0
for yy in y_train:
if yy in ddd:
yy = (ddd[yy])
else:
ddd[yy] = last
yy = last
last += 1
new_y_train.append(yy)
dump_svmlight_file(X_train, new_y_train, f + ".svmlight.train")
new_y_test = []
for yy in y_test:
if yy in ddd:
yy = (ddd[yy])
else:
ddd[yy] = last
yy = last
last += 1
new_y_test.append(yy)
dump_svmlight_file(X_test, new_y_test, f + ".svmlight.test")
开发者ID:osmanbaskaya,项目名称:mapping-impact,代码行数:27,代码来源:classifier_eval.py
示例20: test_dump_comment
def test_dump_comment():
X, y = load_svmlight_file(datafile)
X = X.toarray()
f = BytesIO()
ascii_comment = "This is a comment\nspanning multiple lines."
dump_svmlight_file(X, y, f, comment=ascii_comment, zero_based=False)
f.seek(0)
X2, y2 = load_svmlight_file(f, zero_based=False)
assert_array_almost_equal(X, X2.toarray())
assert_array_equal(y, y2)
# XXX we have to update this to support Python 3.x
utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc"
f = BytesIO()
assert_raises(UnicodeDecodeError, dump_svmlight_file, X, y, f, comment=utf8_comment)
unicode_comment = utf8_comment.decode("utf-8")
f = BytesIO()
dump_svmlight_file(X, y, f, comment=unicode_comment, zero_based=False)
f.seek(0)
X2, y2 = load_svmlight_file(f, zero_based=False)
assert_array_almost_equal(X, X2.toarray())
assert_array_equal(y, y2)
f = BytesIO()
assert_raises(ValueError, dump_svmlight_file, X, y, f, comment="I've got a \0.")
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:29,代码来源:test_svmlight_format.py
注:本文中的sklearn.datasets.dump_svmlight_file函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论