• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.load_svmlight_file函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.load_svmlight_file函数的典型用法代码示例。如果您正苦于以下问题:Python load_svmlight_file函数的具体用法?Python load_svmlight_file怎么用?Python load_svmlight_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_svmlight_file函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: classification_subfeature

def classification_subfeature(train, test, outclss):
    fields = iot.read_fields()
    print len(fields)
    foi = ['liwc_anal.result.i',
           'liwc_anal.result.we',
           'liwc_anal.result.affect',
           'liwc_anal.result.posemo',
           'liwc_anal.result.negemo',
           'liwc_anal.result.bio',
           'liwc_anal.result.body',
           'liwc_anal.result.health',
           'liwc_anal.result.ingest']
    indeces = [np.where(fields==f)[0][0] for f in foi]
    print fields[indeces]

    '''Load Training data'''
    X_train, y_train = load_svmlight_file(train)
    X_train = X_train.toarray()[:, indeces]
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    print X_train.shape
    '''Load Test data'''
    X_test, y_test = load_svmlight_file(test)
    X_test = X_test.toarray()[:, indeces]
    X_test = scaler.transform(X_test)
    print X_test.shape

    svc_lin = SVC(kernel='linear', class_weight='balanced')
    y_lin = svc_lin.fit(X_train, y_train).predict(X_test)
    # pickle.dump(y_test, open(outid, 'w'))
    pickle.dump(y_lin, open(outclss, 'w'))
开发者ID:wtgme,项目名称:ohsn,代码行数:31,代码来源:classification.py


示例2: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                # we need to pass a comment to get the version info in;
                # LibSVM doesn't grok comments so they're not put in by
                # default anymore.
                dump_svmlight_file(X.astype(dtype), y, f, comment="test",
                                   zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype,
                                            zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 4)
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 15)
                assert_array_equal(y, y2)
开发者ID:yzhy,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py


示例3: train

 def train(self, examples, outDir, parameters, classifyExamples=None, dummy=False):
     outDir = os.path.abspath(outDir)
     
     examples = self.getExampleFile(examples, dummy=dummy)
     classifyExamples = self.getExampleFile(classifyExamples, dummy=dummy)
     
     # Return a new classifier instance for following the training process and using the model
     classifier = copy.copy(self)
     classifier.parameters = parameters
     classifier._filesToRelease = [examples, classifyExamples]
     
     if not os.path.exists(outDir):
         os.makedirs(outDir)
     
     trainFeatures, trainClasses = datasets.load_svmlight_file(examples)
     if classifyExamples != None:
         develFeatures, develClasses = datasets.load_svmlight_file(classifyExamples, trainFeatures.shape[1])
     binarizer = preprocessing.LabelBinarizer()
     binarizer.fit(trainClasses)
     trainClasses = binarizer.transform(trainClasses)
     if classifyExamples != None:
         develClasses = binarizer.transform(develClasses)
     
     print >> sys.stderr, "Training Keras model with parameters:", parameters
     parameters = Parameters.get(parameters, {"TEES.classifier":"KerasClassifier", "layers":5, "lr":0.001, "epochs":1, "batch_size":64, "patience":10})
     np.random.seed(10)
     classifier.kerasModel = classifier._defineModel(outDir, parameters, trainFeatures, trainClasses, develFeatures, develClasses)
     classifier._fitModel(outDir, parameters, trainFeatures, trainClasses, develFeatures, develClasses)
开发者ID:jbjorne,项目名称:TEES,代码行数:28,代码来源:KerasClassifier.py


示例4: gridSearch

def gridSearch():
	
	X_train, y_train = load_svmlight_file(svmPath + "/" + trainFile)
	X_test, y_test = load_svmlight_file(svmPath + "/" + testFile, n_features=X_train.shape[1])

	
	tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}]#, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

	#training
#	clf = svm.SVC(kernel='linear')
#	clf.fit(X_features, trainingLabels)	

	scores = ['precision', 'recall']

	for score in scores:
		print("# Tuning hyper-parameters for %s" % score)
		print()

    	clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5, scoring=score)
    	clf.fit(X_train, y_train)
    	print("Best parameters set found on development set:")
    	print()
    	print(clf.best_estimator_)
    	print()
    	print("Grid scores on development set:")
    	print()
    	for params, mean_score, scores in clf.grid_scores_:
    		print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params))
    		print()
    		print("Detailed classification report:")
    		print()
    		print("The model is trained on the full development set.")
    		print("The scores are computed on the full evaluation set.")
    		print()
开发者ID:debanjanghosh,项目名称:argessay_ACL2016,代码行数:34,代码来源:scikit_expr_embedding.py


示例5: load

	def load(self, dataset = None, data_dir = "/home/drunkeneye/lab/data", verbose = None):
		if verbose == None:
			verbose = self.verbose
			
		if dataset == None:
			dataset = self.name
		# first try to load the data 'directly'
		try:
			filePath = os.path.join(data_dir, dataset, dataset)
			if verbose:
				print("  Trying to load data set from {}". format(filePath))
			self.X, self.y = load_svmlight_file(filePath)
			self.X = np.asarray(self.X.todense())
			if verbose:
				print ("    Loaded from {}". format( filePath))
			return
		except:
			pass
		
		# next try
		try:
			filePath = os.path.join(data_dir, dataset, dataset + ".combined.scaled")
			if verbose:
				print("  Trying to load data set from {}". format(filePath))
			self.X, self.y = load_svmlight_file(filePath)
			self.X = np.asarray(self.X.todense())
			if verbose:
				print ("    Loaded from {}". format( filePath))
			return 
		except:
			pass
开发者ID:aydindemircioglu,项目名称:MixMex,代码行数:31,代码来源:DataSet.py


示例6: run

def run(train_fp, test_fp, pred_fp, key_fp):

	keys = []
	load(key_fp, keys)

	X_train, y_train = load_svmlight_file(train_fp)
	X_test, y_test = load_svmlight_file(test_fp)

	#dtrain = xgb.DMatrix(train_fp)
	#dtest = xgb.DMatrix(test_fp)

	params = {}
	with open("lr_reg.params", 'r') as f:
		params = json.load(f)
	print "[%s] [INFO] params: %s\n" % (t_now(), str(params))

	model = linear_model.Ridge (alpha = params['alpha'])
	model.fit(X_train, y_train)
	pred = model.predict(X_test)
	#model = xgb.train( params, dtrain, params['n_round'])
	#model = xgb.train( params, dtrain, params['n_round'], obj = customed_obj_1)
	#pred = model.predict(dtest, ntree_limit=params['n_round'])
	#pred = model.predict(dtest)

	f = open(pred_fp, 'w')
	for i in range(len(keys)):
		f.write(keys[i] + "," + str(max(1.0, pred[i])) + "\n")
	f.close()

	return 0
开发者ID:HouJP,项目名称:di-tech-16,代码行数:30,代码来源:lr_reg.py


示例7: main

def main():

    # svm_para = {'C': 10.0, 'kernel': 'rbf', 'gamma': 1.667, 'verbose': False}
    # svm_para = {'kernel': 'linear', 'verbose': False}
    # loading data
    # X_train, y_train = datasets.load_svmlight_file(r'./dataset/mnist_train_784_poly_8vr.dat')
    # X_train, y_train = datasets.load_svmlight_file(r'./dataset/covtype_tr_2vr.data')

    # svm_para = {'C': 10.0, 'kernel': 'rbf', 'gamma': 0.00002, 'tol': 0.01, 'verbose': False}

    # census
    svm_para = {"C": 10.0, "kernel": "rbf", "gamma": 1.667, "verbose": False}
    X_train, y_train = datasets.load_svmlight_file(r"./dataset/census.train")

    # test ramdom sampling
    RS_SVM = RandomSamplingSVM(svm_para)
    start_time = time.time()
    model = RS_SVM.train_one_half_v2(X_train, y_train)

    print("Remain SVs: " + str(model.n_support_), flush=True)
    print("--- %s seconds ---" % (time.time() - start_time), flush=True)

    if model is None:
        print("Can not train the dataset", flush=True)
    else:

        # X_test, y_test = datasets.load_svmlight_file(r'./dataset/mnist_test_784_poly_8vr.dat')
        # X_test, y_test = datasets.load_svmlight_file(r'./dataset/covtype_tst_2vr.data')
        X_test, y_test = datasets.load_svmlight_file(r"./dataset/census.train")
        ratio = model.score(X_test, y_test)
        print(ratio)
        print("--- %s seconds ---" % (time.time() - start_time), flush=True)
开发者ID:viethoangcr,项目名称:thesis,代码行数:32,代码来源:RS_SVM_v2.py


示例8: train_predict

def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
                  n_fold=5):

    feature_name = os.path.basename(train_file)[:-10]
    logging.basicConfig(format='%(asctime)s   %(levelname)s   %(message)s',
                        level=logging.DEBUG,
                        filename='esb_xg_grid_colsub_{}.log'.format(feature_name))

    logging.info('Loading training and test data...')
    X, y = load_svmlight_file(train_file)
    X_tst, _ = load_svmlight_file(test_file)

    xg = xgb.XGBClassifier()
    param = {'learning_rate': [.01, .03, .05], 'max_depth': [4, 5, 6],
             'n_estimators': [400, 600]}
    cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)
    clf = GridSearchCV(xg, param, scoring='log_loss', verbose=1, cv=cv)

    logging.info('Cross validation for grid search...')
    clf.fit(X, y)
    p = clf.predict_proba(X)[:, 1]

    logging.info('best model = {}'.format(clf.best_estimator_))
    logging.info('best score = {:.4f}'.format(clf.best_score_))

    logging.info('Retraining with 100% data...')
    clf.best_estimator_.fit(X, y)
    p_tst = clf.best_estimator_.predict_proba(X_tst)[:, 1]

    logging.info('Saving predictions...')
    np.savetxt(predict_valid_file, p, fmt='%.6f')
    np.savetxt(predict_test_file, p_tst, fmt='%.6f')
开发者ID:drivendata,项目名称:countable-care-3rd-place,代码行数:32,代码来源:train_predict_esb_xg_grid_colsub.py


示例9: scale_mnist8m

def scale_mnist8m():
    from sklearn.datasets import load_svmlight_file


    print "loading train",datetime.datetime.now()
    dd_train = load_svmlight_file(base_folder_mnist + "mnist8m_6_8_train.libsvm")
    print "loading test", datetime.datetime.now()
    dd_test = load_svmlight_file(base_folder_mnist + "mnist8m_6_8_test.libsvm")

    Xtrain = dd_train[0]
    Xtest = dd_test[0]
    Ytrain = dd_train[1]
    Ytest = dd_test[1]

    Xtrain = csr_matrix((Xtrain.data, Xtrain.indices, Xtrain.indptr), shape=(Xtrain.shape[0], 786))
    Xtest = csr_matrix((Xtest.data, Xtest.indices, Xtest.indptr), shape=(Xtest.shape[0], 786))
    from sklearn.externals import joblib


    print "densifying train",datetime.datetime.now()
    Xtrain = Xtrain.todense()
    print "densifying test",datetime.datetime.now()
    Xtest = Xtest.todense()

    print "dumping train",datetime.datetime.now()
    joblib.dump((np.asarray(Xtrain),Ytrain),base_folder_mnist + "mnist8m_6_8_train_reshaped")
    #joblib.load(base_folder + "mnist8m_6_8_train_touple_small")
    print "dumping test",datetime.datetime.now()
    joblib.dump((np.asarray(Xtest),Ytest),base_folder_mnist + "mnist8m_6_8_test_reshaped")
    print "finished",datetime.datetime.now()
开发者ID:nikste,项目名称:doubly_random_svm,代码行数:30,代码来源:dataio.py


示例10: svm

def svm():
    #load data
    x_train,y_train=load_svmlight_file("12trainset")
    x_train.todense()
    x_test,y_test=load_svmlight_file("12testdata")
    x_test.todense()
    sk=SelectKBest(f_classif,9).fit(x_train,y_train)
    x_new=sk.transform(x_train)
    x_newtest=sk.transform(x_test)
    print(sk.scores_)
    print(x_new.shape)
    print(sk.get_support())
    #classfier
    clf=SVC(C=2,gamma=2)
    ovrclf=OneVsRestClassifier(clf,-1)
    ovrclf.fit(x_train,y_train)
    y_pred=ovrclf.predict(x_test)
    # write result
    with open("result.txt","w") as fw:
        for st in y_pred.tolist():
            fw.write(str(st)+'\n')
    print(np.array(y_pred).shape)

    target_names=['0','1','2','3']
    #result
    #sum_y = np.sum((np.array(y_pred)-np.array(y_test))**2)
    #print(classification_report(y_test,y_pred,target_names=target_names))
    #print("sougouVal: ",float(sum_y)/y_pred.shape[0])
    print(time.time()-start_time)
开发者ID:lkprof,项目名称:sema,代码行数:29,代码来源:svm.py


示例11: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        4,
                    )
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        15,
                    )
                assert_array_equal(y, y2)
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py


示例12: test_load_with_long_qid

def test_load_with_long_qid():
    # load svmfile with longint qid attribute
    data = b("""
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985""")
    X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)

    true_X = [[1,          2,                 3],
             [1440446648, 72048431380967004, 236784985],
             [1440446648, 72048431380967004, 236784985],
             [1440446648, 72048431380967004, 236784985]]

    true_y = [1, 0, 0, 3]
    trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807]
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
    assert_array_equal(qid, trueQID)

    f = BytesIO()
    dump_svmlight_file(X, y, f, query_id=qid, zero_based=True)
    f.seek(0)
    X, y, qid = load_svmlight_file(f, query_id=True, zero_based=True)
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
    assert_array_equal(qid, trueQID)

    f.seek(0)
    X, y = load_svmlight_file(f, query_id=False, zero_based=True)
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py


示例13: test_load_with_offsets

def test_load_with_offsets(sparsity, n_samples, n_features):
    rng = np.random.RandomState(0)
    X = rng.uniform(low=0.0, high=1.0, size=(n_samples, n_features))
    if sparsity:
        X[X < sparsity] = 0.0
    X = sp.csr_matrix(X)
    y = rng.randint(low=0, high=2, size=n_samples)

    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)

    size = len(f.getvalue())

    # put some marks that are likely to happen anywhere in a row
    mark_0 = 0
    mark_1 = size // 3
    length_0 = mark_1 - mark_0
    mark_2 = 4 * size // 5
    length_1 = mark_2 - mark_1

    # load the original sparse matrix into 3 independent CSR matrices
    X_0, y_0 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_0, length=length_0)
    X_1, y_1 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_1, length=length_1)
    X_2, y_2 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_2)

    y_concat = np.concatenate([y_0, y_1, y_2])
    X_concat = sp.vstack([X_0, X_1, X_2])
    assert_array_almost_equal(y, y_concat)
    assert_array_almost_equal(X.toarray(), X_concat.toarray())
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py


示例14: loadData

def loadData():
    data1, target = load_svmlight_file('dataset/text.scale')
    data2, target = load_svmlight_file('dataset/following.scale')

    data1, data2, target = shuffle(data1, data2, target)

    return (data1, data2, target)
开发者ID:pyongjoo,项目名称:twitter-research,代码行数:7,代码来源:ml_cotrain.py


示例15: check_data_compatibility

 def check_data_compatibility(self):
     try:
         load_svmlight_file(self.input_path)
         return True
     except Exception as ex:
         print ex.message
         return False
开发者ID:patrickyeh,项目名称:datalabsdk-python,代码行数:7,代码来源:DataType.py


示例16: load_data

def load_data(dataset1, dataset2=None, make_dense=False):
    """Loads the dataset(s) given in the the svmlight / libsvm format

    **Parameters**

    * dataset1 (*str*) - Path to the file of the first dataset.
    * dataset2 (*str or None*) - If not None, path to the file of second dataset
    * make_dense (*boolean*) - Whether to return dense matrices instead of sparse ones

    **Returns**

    * (X_pool, X_test, y_pool, y_test) - Pool and test files if two files are provided
    * (X, y) - The single dataset

    """
    if dataset2:
        X_pool, y_pool = load_svmlight_file(dataset1)
        _, num_feat = X_pool.shape
        X_test, y_test = load_svmlight_file(dataset2, n_features=num_feat)
        if make_dense:
            X_pool = X_pool.todense()
            X_test = X_test.todense()
        return (X_pool, X_test, y_pool, y_test) 

    else:
        X, y = load_svmlight_file(dataset1)
        if make_dense:
            X = X.todense()
        return X, y
开发者ID:haiamour,项目名称:AL,代码行数:29,代码来源:run_al_cl.py


示例17: test

def test():
    x_train,y_train=load_svmlight_file("D:/traindata/12trainset")
    x_train.todense()
    x_test,y_test=load_svmlight_file("D:/traindata/12testset")
    x_test.todense()
    print(x_train.shape)
    #classifier
    clf=SVC(kernel='rbf')
    ovrclf=OneVsRestClassifier(clf,-1)
    #parameter
    parameters=[{'estimator__C':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5],
                 'estimator__kernel':['rbf'],
                 'estimator__gamma':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5]},
                {'estimator__C':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5],
                 'estimator__kernel':['linear']}]
    para={'estimator__C':[2**-5,2**-4],
                 'estimator__kernel':['rbf'],
                 'estimator__gamma':[2**-1,1]}
    #scoring
    sougou_score=make_scorer(score_func,greater_is_better=False)
    #cross_validation iterator
    sfk=c_v.StratifiedKFold(y_train,shuffle=True,n_folds=5,random_state=0)
    #grid search
    gsclf=g_s.GridSearchCV(ovrclf,param_grid=para,cv=sfk,scoring=sougou_score)
    gsclf.fit(x_train,y_train)
    print("best score: ",gsclf.best_score_)
    print("best parameters: ",gsclf.best_params_)
    y_pred=gsclf.predict(x_test)

    #result
    target_names=['0','1','2','3']
    sum_y = np.sum((np.array(y_pred)-np.array(y_test))**2)
    print(classification_report(y_test,y_pred,target_names=target_names))
    print("sougouVal: ",float(sum_y)/y_pred.shape[0])
    print(time.time()-start_time)
开发者ID:lkprof,项目名称:sema,代码行数:35,代码来源:svm.py


示例18: test_load_compressed

def test_load_compressed():
    X, y = load_svmlight_file(datafile)

    with NamedTemporaryFile(prefix="sklearn-test", suffix=".gz") as tmp:
        tmp.close()  # necessary under windows
        with open(datafile, "rb") as f:
            with gzip.open(tmp.name, "wb") as fh_out:
                shutil.copyfileobj(f, fh_out)
        Xgz, ygz = load_svmlight_file(tmp.name)
        # because we "close" it manually and write to it,
        # we need to remove it manually.
        os.remove(tmp.name)
    assert_array_almost_equal(X.toarray(), Xgz.toarray())
    assert_array_almost_equal(y, ygz)

    with NamedTemporaryFile(prefix="sklearn-test", suffix=".bz2") as tmp:
        tmp.close()  # necessary under windows
        with open(datafile, "rb") as f:
            with BZ2File(tmp.name, "wb") as fh_out:
                shutil.copyfileobj(f, fh_out)
        Xbz, ybz = load_svmlight_file(tmp.name)
        # because we "close" it manually and write to it,
        # we need to remove it manually.
        os.remove(tmp.name)
    assert_array_almost_equal(X.toarray(), Xbz.toarray())
    assert_array_almost_equal(y, ybz)
开发者ID:mikebotazzo,项目名称:scikit-learn,代码行数:26,代码来源:test_svmlight_format.py


示例19: train_and_test

def train_and_test(domain_dir, sentences):
    train_dir = os.path.join(domain_dir, "train")
    test_dir = os.path.join(domain_dir, "test")
    X_train, y_train = load_svmlight_file(os.path.join(train_dir, "feature_vector"))
    X_test, y_test = load_svmlight_file(os.path.join(test_dir, "feature_vector"))
    clf = LogisticRegression(C=1.0, intercept_scaling=1, dual=False,
                             fit_intercept=True, penalty="l2", tol=0.0001)
    print("fit..")
    clf.fit(X_train, y_train)
    print("fit end...")
    y_train_predict = clf.predict(X_train)
    print(f1_score(y_train, y_train_predict))
    y = clf.predict(X_test)
    f = open(os.path.join(test_dir, "relation.classifier"), "w", encoding="utf8")
    i = 0
    for sentence in sentences:
        flag = False
        str_list = []
        str_list.append("S\t{0}".format(sentence.text))
        for pair in sentence.candidate_relation:
            if y[i] != 0:
                flag = True
                str_list.append("R\t{0}\t{1}\t{2}\t{3}".format(
                    sentence.print_phrase(pair[0]).lower(),
                    sentence.print_phrase(pair[1]).lower(),
                    list(pair[0]),
                    list(pair[1])))
            i += 1
        if flag:
            for s in str_list:
                print(s, file=f)
    f.close()
开发者ID:AntNLP,项目名称:opie,代码行数:32,代码来源:test_relation.py


示例20: train_predict

def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
                  C, n_fold=5):

    logging.basicConfig(format='%(asctime)s   %(levelname)s   %(message)s',
                        level=logging.DEBUG, filename='lr_{}.log'.format(C))

    logging.info('Loading training and test data...')
    X, y = load_svmlight_file(train_file)
    X_tst, _ = load_svmlight_file(test_file)

    clf = LR(penalty='l2', dual=True, C=C, class_weight='auto',
             random_state=2015)

    cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)

    logging.info('Cross validation...')
    p_val = np.zeros_like(y)
    lloss = 0.
    for i_trn, i_val in cv:
        clf.fit(X[i_trn], y[i_trn])
        p_val[i_val] = clf.predict_proba(X[i_val])[:, 1]
        lloss += log_loss(y[i_val], p_val[i_val])

    logging.info('Log Loss = {:.4f}'.format(lloss))

    logging.info('Retraining with 100% data...')
    clf.fit(X, y)
    p_tst = clf.predict_proba(X_tst)[:, 1]

    logging.info('Saving predictions...')
    np.savetxt(predict_valid_file, p_val, fmt='%.6f')
    np.savetxt(predict_test_file, p_tst, fmt='%.6f')
开发者ID:drivendata,项目名称:countable-care-3rd-place,代码行数:32,代码来源:train_predict_lr.py



注:本文中的sklearn.datasets.load_svmlight_file函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.load_svmlight_files函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.load_linnerud函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap