• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.dump_svmlight_file函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.dump_svmlight_file函数的典型用法代码示例。如果您正苦于以下问题:Python dump_svmlight_file函数的具体用法?Python dump_svmlight_file怎么用?Python dump_svmlight_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了dump_svmlight_file函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_dump_concise

def test_dump_concise():
    one = 1
    two = 2.1
    three = 3.01
    exact = 1.000000000000001
    # loses the last decimal place
    almost = 1.0000000000000001
    X = [[one, two, three, exact, almost],
         [1e9, 2e18, 3e27, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0]]
    y = [one, two, three, exact, almost]
    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)
    # make sure it's using the most concise format possible
    assert_equal(f.readline(),
                 b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n"))
    assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n"))
    assert_equal(f.readline(), b("3.01 \n"))
    assert_equal(f.readline(), b("1.000000000000001 \n"))
    assert_equal(f.readline(), b("1 \n"))
    f.seek(0)
    # make sure it's correct too :)
    X2, y2 = load_svmlight_file(f)
    assert_array_almost_equal(X, X2.toarray())
    assert_array_equal(y, y2)
开发者ID:BrenBarn,项目名称:scikit-learn,代码行数:28,代码来源:test_svmlight_format.py


示例2: fit

    def fit(self, X, Y):

        self.labels=list(set(Y))
        if len(self.labels) > 2 :
            self.multiclass=True
            #print 'multiclass'
        else:
            self.multiclass=False
            
        self.train_fname =self.base_str +'-svmcmd-train' +  '.dat'
        self.model_fname =self.train_fname + '.model'
        dump_svmlight_file(X,Y,self.train_fname ,zero_based=False)
        if self.multiclass:
            command_line=path_to_train_program+'gtsvm_initialize {0} -f {1} -o {2}  -m 1 '.format(self.param_str, self.train_fname , self.model_fname )
        else:
            command_line=path_to_train_program+'gtsvm_initialize -f {1} -o {2} {0}'.format(self.param_str, self.train_fname , self.model_fname )
        args = shlex.split(command_line)
        p = subprocess.Popen(args)
        p.wait()
        command_line=path_to_train_program+'gtsvm_optimize -i {0} -o {1} -e {2} -n {3}'.format(self.model_fname,self.model_fname,self.tol,self.max_iter)    
        args = shlex.split(command_line)
        p = subprocess.Popen(args,stderr=subprocess.PIPE)
        p.wait()
        opt_err_str=p.stderr.read() ##gtsvm is too buggy
        if len(opt_err_str) < 1: 
            command_line=path_to_train_program+'gtsvm_shrink -i {0}  -o {1}'.format(self.model_fname,self.model_fname)
            args = shlex.split(command_line)
            p = subprocess.Popen(args)
            p.wait()
            self.train_fail=False
        else :
            self.train_fail=True
            
        
        return self
开发者ID:niitsuma,项目名称:gtsvm,代码行数:35,代码来源:gtsvm.py


示例3: predict

 def predict(self, X):
     if isinstance(X,list):
         self.test_n_sample=len(X)
     else:
         self.test_n_sample=X.shape[0]
     Y=[1]*self.test_n_sample
     self.test_fname =self.base_str +'-svmcmd-test' +  '.dat'
     self.predict_fname =self.base_str +'-svmcmd-predict' +  '.dat'
     dump_svmlight_file(X,Y,self.test_fname ,zero_based=False)
     command_line=path_to_train_program+'gtsvm_classify -f {0}  -i {1} -o {2}'.format(self.test_fname , self.model_fname, self.predict_fname )
     args = shlex.split(command_line)
     p = subprocess.Popen(args)
     p.wait()
     if self.train_fail:
         return [max(self.labels)+1]*self.test_n_sample
     
     if self.multiclass : 
         f = open(self.predict_fname, 'rb')
         self.predicted_weight = map(lambda row: map(float,row), list(csv.reader(f)))
         f.close()
         Y_predict=map(np.argmax, self.predicted_weight)
     else :
         self.predicted_weight = np.loadtxt( self.predict_fname)
         Y_predict=map(int,map(round,self.predicted_weight))
     return Y_predict
开发者ID:niitsuma,项目名称:gtsvm,代码行数:25,代码来源:gtsvm.py


示例4: generate_weekday_newbuyer_exposure

def generate_weekday_newbuyer_exposure(df):
    """
    加入新客数,曝光数
    """
    X = df[['uv_0612_0618', 'uv_weekday', 'uv_weekend', 'no_subsidy_exposure', 'newbuyer_6_18']]
    y = df.uv_0626_0702
    dump_svmlight_file(X, y, './uv_weekday_weekend_newbuyer_exposure_without_outliers.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py


示例5: generate_week

def generate_week(df):
    """
    生成1维特征
    """
    X = df[['uv_0612_0618']]
    y = df.uv_0626_0702
    dump_svmlight_file(X, y, './uv_week.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py


示例6: executa_extracao_n

def executa_extracao_n(base_treino, metodo, n=1):
    inicio = time()    
    
    lista_imagens = arq.busca_arquivos(base_treino, "*.png")
    n_imgs_treino = len(lista_imagens)
    
    for lado in range(8,n+1,4):
        atributos = []    
        rotulos = []     
            
        arq_treino = base_treino + "base_PFTAS_"+str(lado)+"x"+str(lado)+".svm"
        ##  INICIO DO PROCESSO DE EXTRACAO DE ATRIBUTOS    
        
        for arq_imagem in lista_imagens: 
            print("Arquivo: " + arq_imagem)
            imagem = mh.imread(arq_imagem) 
            if (imagem != None):
                classe, _ = ex.classe_arquivo(arq_imagem)             
                print("executa_extracao_n - shape imagem:" + str(imagem.shape))
                # Extrai os atributos e gera os arquivos dos patches da base de treino
                atrs,rots = extrai_pftas_patches_n(imagem, classe, lado)                            
                atributos += atrs
                rotulos += rots
        
        dump_svmlight_file(atributos, rotulos, arq_treino)
    
    log("Extraidos atributos da base " + base_treino + " utilizando " + metodo + "\n para " + str(n_imgs_treino) + "imagens") 
  
    # Exibe o tempo de execução    
    log(str(time()-inicio) + "EXTRAÇÃO")     
开发者ID:willianfatec,项目名称:PatchWiser,代码行数:30,代码来源:testes.py


示例7: generate_weekday_weekend

def generate_weekday_weekend(df):
    """
    生成3维特征
    """
    X = df[['uv_0612_0618', 'uv_weekday', 'uv_weekend']]
    y = df.uv_0626_0702
    dump_svmlight_file(X, y, './uv_weekday_weekend.dat')
开发者ID:lujiaying,项目名称:jiayinglu,代码行数:7,代码来源:generate_libsvm_format.py


示例8: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                # we need to pass a comment to get the version info in;
                # LibSVM doesn't grok comments so they're not put in by
                # default anymore.
                dump_svmlight_file(X.astype(dtype), y, f, comment="test",
                                   zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype,
                                            zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 4)
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype), X2.toarray(), 15)
                assert_array_equal(y, y2)
开发者ID:yzhy,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py


示例9: save_all_data_in_svmlight_format

 def save_all_data_in_svmlight_format(self,
                                      file_path,
                                      extraction_method,
                                      label_type):
     label_list, feature_vector_list = self.extract_all_data(extraction_method, label_type)
     with open(file_path, 'wb') as f:
         datasets.dump_svmlight_file(feature_vector_list, label_list, f)
开发者ID:t-usui,项目名称:COMES,代码行数:7,代码来源:data_processor.py


示例10: dump_svmlight

def dump_svmlight(X_matrix, Y, feature_names, output_filename, feature_id_offset = 0):
  dump_svmlight_file(X_matrix, Y, output_filename)
  contents = None
  with open(output_filename) as output_file:
    contents = '#' + ' '.join(feature_names) + '\n' + ''.join(output_file.readlines())
  with open(output_filename, 'w') as output_file:
    output_file.write(contents)
开发者ID:hczhu,项目名称:script-tools,代码行数:7,代码来源:translate_feature_format.py


示例11: batch_fit

    def batch_fit(self, Xs, ys, dump=True):
        qids = [np.array([i] * len(ys[i])) for i in range(len(ys))]
        print "dumping data to Xtrain.data"
        if dump:
            dump_svmlight_file(
                np.concatenate(Xs), np.concatenate(ys), "Xtrain.data", zero_based=False, query_id=np.concatenate(qids)
            )

        print "now learning"

        print call(
            [
                self.path + "svm_hmm_learn",
                "-c",
                "%d" % self.C,
                "--t",
                "%d" % self.t,
                "--e",
                "%d" % self.e,
                "Xtrain.data",
                "svmhmm-model.dat",
            ]
        )

        return self
开发者ID:Hanshan1988,项目名称:smartphone-activity-recognition,代码行数:25,代码来源:svmhmm.py


示例12: load_training_data

def load_training_data(file_location=str, load_from_database=False, limit=int(1000), clean_dataset=True):
    """
    If ```load_from_database``` is True, retrieves and stores data from database to file.

    Arguments:
        file_location (str): Path + filename of libsvm file to save/load (e.g. 'training_data')
        load_from_database (bool): Should data be retrieved from database?
        limit (int): Amount of records to retrieve from database (default=1000)
        clean_dataset (bool): Should questions be cleaned (e.g. remove code samples, hexadecimals, numbers, etc)?

    Returns:
         (pandas.DataFrame.from_csv, sklearn.datasets.load_svmlight_file):
         Tuple containing a pandas.DataFrame (all data retrieved from database) and
         tuple with training data (load_svmlight_file)

    See:
        | ```MySQLDatabase().retrieve_training_data```
        | ```pandas.DataFrame.to_csv```
        | ```pandas.DataFrame.from_csv```
        | ```sklearn.datasets.dump_svmlight_file```
        | ```sklearn.datasets.load_svmlight_file```
    """
    svm_file = file_location + ".dat"
    csv_file = file_location + ".csv"
    if load_from_database:
        comment = u"label: (-1: Bad question, +1: Good question); features: (term_id, frequency)"
        MySQLDatabase().set_vote_value_params()
        data = MySQLDatabase().retrieve_training_data(limit, clean_dataset)
        # create a term-document matrix
        vectorizer = CountVectorizer(analyzer='word', min_df=0.01, stop_words="english")
        td_matrix = vectorizer.fit_transform(data.get(QUESTION_TEXT_KEY))
        data.to_csv(csv_file)
        dump_svmlight_file(td_matrix, data[CLASS_LABEL_KEY], f=svm_file, comment=comment)
    return DataFrame.from_csv(csv_file), load_svmlight_file(svm_file)
开发者ID:klAndersen,项目名称:IMT4904_MasterThesis_Code,代码行数:34,代码来源:test_all_algorithms.py


示例13: test_load_with_offsets

def test_load_with_offsets(sparsity, n_samples, n_features):
    rng = np.random.RandomState(0)
    X = rng.uniform(low=0.0, high=1.0, size=(n_samples, n_features))
    if sparsity:
        X[X < sparsity] = 0.0
    X = sp.csr_matrix(X)
    y = rng.randint(low=0, high=2, size=n_samples)

    f = BytesIO()
    dump_svmlight_file(X, y, f)
    f.seek(0)

    size = len(f.getvalue())

    # put some marks that are likely to happen anywhere in a row
    mark_0 = 0
    mark_1 = size // 3
    length_0 = mark_1 - mark_0
    mark_2 = 4 * size // 5
    length_1 = mark_2 - mark_1

    # load the original sparse matrix into 3 independent CSR matrices
    X_0, y_0 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_0, length=length_0)
    X_1, y_1 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_1, length=length_1)
    X_2, y_2 = load_svmlight_file(f, n_features=n_features,
                                  offset=mark_2)

    y_concat = np.concatenate([y_0, y_1, y_2])
    X_concat = sp.vstack([X_0, X_1, X_2])
    assert_array_almost_equal(y, y_concat)
    assert_array_almost_equal(X.toarray(), X_concat.toarray())
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py


示例14: subsample_to_file

def subsample_to_file(svm_file, out_dir, out_name, multilabel=False,
                      row_ratio=0.5, col_ratio=0.3, random_state=12):
  """
  Example:

  '''python
     # run the following command in the current directory will create a
     # `tmp` folder, if not already exists, and generate a file called
     # `a9a_sub` from the original file `./data/a9a`. Both files are
     # in libsvm format.
     subsample_to_file("./data/a9a", "./tmp", "a9a_sub")
     # read the subsampled file and make sure its number of rows is half of
     # that of a9a and its number of column is roughly third of a9a (123)
     X, y = load_svmlight_file('./tmp/a9a_sub')
     assert X.shape == (16280, 36)
  '''

  """
  assert 1 >= row_ratio > 0, \
         "Row ratio {row_ratio} must be (0, 1]" \
         .format(**locals())
  assert 1 >= col_ratio > 0, \
         "Col ratio {col_ratio} must be (0, 1]" \
         .format(**locals())
  X, y = load_svmlight_file(svm_file, multilabel=multilabel)
  n, m = X.shape
  subn = int(n*row_ratio)
  subm = int(m*col_ratio)
  rst = np.random.RandomState(random_state)
  ridx = rst.choice(n, subn, replace=False)
  cidx = rst.choice(m, subm, replace=False)
  mkdir_p(out_dir)
  out_file = os.path.join(out_dir, out_name)
  dump_svmlight_file(X[ridx,:][:,cidx], y[ridx],
                     out_file, multilabel=multilabel)
开发者ID:mktal,项目名称:peregrine,代码行数:35,代码来源:utils.py


示例15: save_libfm

def save_libfm(X_sprs_mat, y_array, f):
    
    print("Save LibFM Format")
    
    dump_svmlight_file(X_sprs_mat, y_array, f)
    
    return
开发者ID:nancyya,项目名称:Predictors,代码行数:7,代码来源:dataProcessing.py


示例16: test_load_with_long_qid

def test_load_with_long_qid():
    # load svmfile with longint qid attribute
    data = b("""
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985""")
    X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)

    true_X = [[1,          2,                 3],
             [1440446648, 72048431380967004, 236784985],
             [1440446648, 72048431380967004, 236784985],
             [1440446648, 72048431380967004, 236784985]]

    true_y = [1, 0, 0, 3]
    trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807]
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
    assert_array_equal(qid, trueQID)

    f = BytesIO()
    dump_svmlight_file(X, y, f, query_id=qid, zero_based=True)
    f.seek(0)
    X, y, qid = load_svmlight_file(f, query_id=True, zero_based=True)
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
    assert_array_equal(qid, trueQID)

    f.seek(0)
    X, y = load_svmlight_file(f, query_id=False, zero_based=True)
    assert_array_equal(y, true_y)
    assert_array_equal(X.toarray(), true_X)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:32,代码来源:test_svmlight_format.py


示例17: create_train_test

def create_train_test(n_samples, doc2vec, save_svmlight=True):

    print "Creating train & test sets..."

    # Create labelled data arrays.

    data = np.zeros((n_samples, doc2vec.size))
    labels = np.zeros(n_samples)

    for i in range(n_samples / 2):

        prefix_train_pos = 'TRAIN_POS_' + str(i)
        prefix_train_neg = 'TRAIN_NEG_' + str(i)

        data[i] = doc2vec.model.docvecs[prefix_train_pos]
        data[n_samples / 2 + i] = doc2vec.model.docvecs[prefix_train_neg]

        labels[i] = 1

    # Split in train and validation arrays.

    train, test, train_labels, test_labels = train_test_split(
        data, labels, test_size=0.3, random_state=42)

    if save_svmlight:

        current_path = os.path.abspath(
            os.path.join(os.getcwd(), os.pardir))

        dump_svmlight_file(train, train_labels, current_path + "/Data/Processed/TrainSet.svm")
        dump_svmlight_file(test, test_labels, current_path + "/Data/Processed/TestSet.svm")

    return train, test, train_labels, test_labels
开发者ID:bailingwjw,项目名称:OpinionMining,代码行数:33,代码来源:Classifying.py


示例18: test_dump

def test_dump():
    Xs, y = load_svmlight_file(datafile)
    Xd = Xs.toarray()

    for X in (Xs, Xd):
        for zero_based in (True, False):
            for dtype in [np.float32, np.float64]:
                f = BytesIO()
                dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based)
                f.seek(0)

                comment = f.readline()
                assert_in("scikit-learn %s" % sklearn.__version__, comment)
                comment = f.readline()
                assert_in(["one", "zero"][zero_based] + "-based", comment)

                X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based)
                assert_equal(X2.dtype, dtype)
                if dtype == np.float32:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        4,
                    )
                else:
                    assert_array_almost_equal(
                        # allow a rounding error at the last decimal place
                        Xd.astype(dtype),
                        X2.toarray(),
                        15,
                    )
                assert_array_equal(y, y2)
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:33,代码来源:test_svmlight_format.py


示例19: data_dump

    def data_dump(self, f, X_train, X_test, y_train, y_test):
        from sklearn.datasets import dump_svmlight_file
        ddd = dict()
        new_y_train = []
        last = 0
        for yy in y_train:
            if yy in ddd:
                yy = (ddd[yy])
            else:
                ddd[yy] = last
                yy = last
                last += 1
            new_y_train.append(yy)

        dump_svmlight_file(X_train, new_y_train, f + ".svmlight.train")
        
        new_y_test = []
        for yy in y_test:
            if yy in ddd:
                yy = (ddd[yy])
            else:
                ddd[yy] = last
                yy = last
                last += 1
            new_y_test.append(yy)
        
        dump_svmlight_file(X_test, new_y_test, f + ".svmlight.test")
开发者ID:osmanbaskaya,项目名称:mapping-impact,代码行数:27,代码来源:classifier_eval.py


示例20: test_dump_comment

def test_dump_comment():
    X, y = load_svmlight_file(datafile)
    X = X.toarray()

    f = BytesIO()
    ascii_comment = "This is a comment\nspanning multiple lines."
    dump_svmlight_file(X, y, f, comment=ascii_comment, zero_based=False)
    f.seek(0)

    X2, y2 = load_svmlight_file(f, zero_based=False)
    assert_array_almost_equal(X, X2.toarray())
    assert_array_equal(y, y2)

    # XXX we have to update this to support Python 3.x
    utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc"
    f = BytesIO()
    assert_raises(UnicodeDecodeError, dump_svmlight_file, X, y, f, comment=utf8_comment)

    unicode_comment = utf8_comment.decode("utf-8")
    f = BytesIO()
    dump_svmlight_file(X, y, f, comment=unicode_comment, zero_based=False)
    f.seek(0)

    X2, y2 = load_svmlight_file(f, zero_based=False)
    assert_array_almost_equal(X, X2.toarray())
    assert_array_equal(y, y2)

    f = BytesIO()
    assert_raises(ValueError, dump_svmlight_file, X, y, f, comment="I've got a \0.")
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:29,代码来源:test_svmlight_format.py



注:本文中的sklearn.datasets.dump_svmlight_file函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.fetch_20newsgroups函数代码示例发布时间:2022-05-27
下一篇:
Python cross_validation.train_test_split函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap