• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python classification.LogisticRegressionWithLBFGS类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyspark.mllib.classification.LogisticRegressionWithLBFGS的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegressionWithLBFGS类的具体用法?Python LogisticRegressionWithLBFGS怎么用?Python LogisticRegressionWithLBFGS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了LogisticRegressionWithLBFGS类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: cross_validation_lr

def cross_validation_lr(Data_1,Data_2,Data_3,regType, num_iter):
    # Training the model using Logistic Regression Classifier
    model_train_1 =LogisticRegressionWithLBFGS.train(Data_1.union(Data_2),
                                                     regType =regType, iterations=num_iter, numClasses=5)

    # Evaluate model on test instances and compute test error
    predictions_1 = model_train_1.predict(Data_3.map(lambda x: x.features))
    labelsAndPredictions_1 = Data_3.map(lambda lp: lp.label).zip(predictions_1)
    testMSE_1 = labelsAndPredictions_1.map(lambda (v, p): (v +0.5 - p) * (v +0.5- p )).sum() /\
        float(Data_3.count())


    model_train_2 =LogisticRegressionWithLBFGS.train(Data_2.union(Data_3),
                                                     regType =regType, iterations=num_iter, numClasses=5)

    # Evaluate model on test instances and compute test error
    predictions_2 = model_train_2.predict(Data_1.map(lambda x: x.features))
    labelsAndPredictions_2 = Data_1.map(lambda lp: lp.label).zip(predictions_2)
    testMSE_2 = labelsAndPredictions_2.map(lambda (v, p): (v +0.5- p) * (v +0.5- p )).sum() /\
        float(Data_1.count())


    model_train_3 =LogisticRegressionWithLBFGS.train(Data_3.union(Data_1),
                                                     regType =regType, iterations=num_iter, numClasses=5)


    # Evaluate model on test instances and compute test error
    predictions_3 = model_train_3.predict(Data_2.map(lambda x: x.features))
    labelsAndPredictions_3 = Data_2.map(lambda lp: lp.label).zip(predictions_3)
    testMSE_3 = labelsAndPredictions_3.map(lambda (v, p): (v +0.5- p ) * (v +0.5- p)).sum() /\
        float(Data_2.count())

    return (testMSE_1+testMSE_2+testMSE_3)/3
开发者ID:USF-ML2,项目名称:Rectastic-,代码行数:33,代码来源:LR_models.py


示例2: test_train

    def test_train(self, df, target, train_split, test_split, regularization=None, num_of_iterations=100):
        try:
            LOGGER.info("Generation logistic regression")

            spark_df = self.sql_context.createDataFrame(df)
            feature_columns = spark_df.columns
            feature_columns.remove(target)

            train, test = spark_df.randomSplit([train_split, test_split], seed=1000000)

            X_train = train.select(*feature_columns).map(lambda x: list(x))
            y_train = train.select(target).map(lambda x: x[0])

            zipped = y_train.zip(X_train)
            train_data = zipped.map(lambda x: LabeledPoint(x[0], x[1]))

            numOfClasses = len(df[target].unique())

            logistic_model = LogisticRegressionWithLBFGS.train(train_data,
                                                               numClasses=numOfClasses, regParam=0,
                                                               regType=regularization, intercept=True,
                                                               iterations=num_of_iterations, validateData=False)

            X_test = test.select(*feature_columns).map(lambda x: list(x))
            y_test = test.select(target).map(lambda x: x[0])

            prediction = X_test.map(lambda lp: (float(logistic_model.predict(lp))))
            prediction_and_label = prediction.zip(y_test)

            LOGGER.info(prediction_and_label.map(lambda labelAndPred: labelAndPred[0] == labelAndPred[1]).mean())
        except Exception as e:
            raise e
开发者ID:bertomartin,项目名称:fifthel-2016-workshop,代码行数:32,代码来源:logistic_regression.py


示例3: lrTest

def lrTest(sqlContext,dataset_rdd,positive_negotive_rate):
	dataset_positive = dataset_rdd.filter(lambda e:e[1]>0.5)
	dataset_negotive =  dataset_rdd.filter(lambda e:e[1]<0.5)
	train_positive = dataset_positive.sample(False,0.8)
	test_positive = dataset_positive.subtract(train_positive)
	train_negotive = dataset_negotive.sample(False,0.8)
	test_negotive = dataset_negotive.subtract(train_negotive)
	trainset_rdd = train_positive.union(train_negotive)
	testset_rdd = test_positive.union(test_negotive)
	trainset = trainset_rdd.map(lambda e:LabeledPoint(e[1],e[2:]))
	trainset_nums = trainset.count()
	testset = testset_rdd.map(lambda e:LabeledPoint(e[1],e[2:]))
	testset_nums = testset.count()
	trainset_positive = train_positive.count()
	testset_positive = test_positive.count()
	model = LogisticRegressionWithLBFGS.train(trainset,iterations = 100)
	predict = testset.map(lambda p:(p.label,model.predict(p.features)))
	hitALL =predict.filter(lambda e:e[0]==e[1]).count()
	hitPositive = predict.filter(lambda e:e[0]==e[1] and (e[0]>0.5)).count()
	positive = predict.filter(lambda e:e[1]>0.5).count()
	recallPositive = hitPositive/float(testset_positive)
	precision = hitPositive/float(positive)
	accuracy = hitALL/float(testset.count())
	F_Value = 2/(1/precision+1/recallPositive)
	return (trainset_nums,testset_nums,trainset_positive,testset_positive,positive,hitPositive,precision,recallPositive,accuracy,F_Value,model)
开发者ID:fighting410381,项目名称:youmi,代码行数:25,代码来源:spark_script.py


示例4: seg_model_lr

def seg_model_lr(train_data, test_data, regType, num_iter):
    removelist_train= set(['stars', 'business_id', 'bus_id', 'b_id','review_id', 'user_id'])
    newlist_train = [v for i, v in enumerate(train_data.columns) if v not in removelist_train]

    # Putting data in vector assembler form
    assembler_train = VectorAssembler(inputCols=newlist_train, outputCol="features")

    transformed_train = assembler_train.transform(train_data.fillna(0))

    # Creating input dataset in the form of labeled point for training the model
    data_train= (transformed_train.select("features", "stars")).map(lambda row: LabeledPoint(row.stars, row.features))

    # Training the model using Logistic regression Classifier
    model_train = LogisticRegressionWithLBFGS.train(sc.parallelize(data_train.collect(),5),
                                                    regType =regType, iterations=num_iter, numClasses=5)

    # Creating a list of features to be used for predictions
    removelist_final = set(['business_id', 'bus_id', 'b_id','review_id', 'user_id'])
    newlist_final = [v for i, v in enumerate(test_data.columns) if v not in removelist_final]

    # Putting data in vector assembler form
    assembler_final = VectorAssembler(inputCols=newlist_final,outputCol="features")

    transformed_final= assembler_final.transform(test_data.fillna(0))

    # Creating input dataset to be used for predictions
    data_final = transformed_final.select("features", "review_id")

    # Predicting ratings using the developed model
    predictions = model_train.predict(data_final.map(lambda x: x.features))
    labelsAndPredictions = data_final.map(lambda data_final: data_final.review_id).zip(predictions)
    return labelsAndPredictions
开发者ID:USF-ML2,项目名称:Rectastic-,代码行数:32,代码来源:LR_models.py


示例5: train

    def train(self, df, target, regularization=None, num_of_iterations=100):
        try:
            LOGGER.info("Generation logistic regression")

            spark_df = self.sql_context.createDataFrame(df)
            feature_columns = spark_df.columns
            feature_columns.remove(target)


            X_train = spark_df.select(*feature_columns).map(lambda x: list(x))
            y_train = spark_df.select(target).map(lambda x: x[0])

            zipped = y_train.zip(X_train)
            train_data = zipped.map(lambda x: LabeledPoint(x[0], x[1]))
            numOfClasses = len(df[target].unique())

            logistic_model = LogisticRegressionWithLBFGS.train(train_data,
                                                               numClasses=numOfClasses, regParam=0,
                                                               regType=regularization, intercept=True,
                                                               iterations=num_of_iterations, validateData=False)


            self.model = logistic_model

        except Exception as e:
            raise e
开发者ID:bertomartin,项目名称:fifthel-2016-workshop,代码行数:26,代码来源:logistic_regression.py


示例6: RunLogit

def RunLogit(tf):
	rdd = tf.map(parseAsLabeledPoints)
	train, test = rdd.randomSplit([.8, .2])
	numCat = len(genCats)
	model = LogisticRegressionWithLBFGS.train(train, numClasses=numCat, iterations=100)
	predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
	accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()

	print 'Accuracy of Logit = ', accuracy * 100
	print "Test Error = ", (1.0 - accuracy) * 100
开发者ID:Sunhick,项目名称:music-cognita,代码行数:10,代码来源:genre_classification.py


示例7: train_model

def train_model(training_rdd, **kwargs):
    """
    Train a classifier model using  an rdd training dataset
    :param training_rdd: the rdd of the training dataset
    :param kwargs: additional key-value params for the training (if any)
    :return:
    """
    return LogisticRegressionWithLBFGS.train(training_rdd,
                                             regType=_REGULARIZATION,
                                             intercept=_INTERCEPT,
                                             **kwargs)
开发者ID:cmantas,项目名称:asap.cslab,代码行数:11,代码来源:imr_classification.py


示例8: regression

def regression(reg_data):
    (trainingData, testData) = reg_data.randomSplit([0.7, 0.3])
    lrmodel = LogisticRegressionWithLBFGS.train(trainingData)
    labelsAndPreds = testData.map(lambda p: (p.label, lrmodel.predict(p.features)))

    trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(testData.count())
    falsePos = labelsAndPreds.filter(lambda (v, p): v != p and v == 0.0).count() / float(testData.filter(lambda lp: lp.label == 0.0).count())
    falseNeg = labelsAndPreds.filter(lambda (v, p): v != p and v == 1.0).count() / float(testData.filter(lambda lp: lp.label == 1.0).count())

    print "*** Error Rate: %f ***" % trainErr
    print "*** False Positive Rate: %f ***" % falsePos
    print "*** False Negative Rate: %f ***" % falseNeg
开发者ID:Jiangshangmin,项目名称:gensim-doc2vec-spark,代码行数:12,代码来源:movie_review.py


示例9: validation_lr

def validation_lr(trainingData,testData, regType, num_iter):
    # Training the model using Logistic Regression Classifier

    model_train =LogisticRegressionWithLBFGS.train(trainingData, regType =regType, iterations=num_iter, numClasses=5)

    # Evaluate model on test instances and compute test error
    predictions = model_train.predict(testData.map(lambda x: x.features))

    testMSE_1 = labelsAndPredictions_1.map(lambda (v, p): (v - p) * (v - p)).sum() /\
        float(testData.count())
    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
    testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
        float(testData.count())
    return testMSE_1,testMSE
开发者ID:USF-ML2,项目名称:Rectastic-,代码行数:14,代码来源:LR_models.py


示例10: training

def training(path):
	#import dataset into RDD
	raw_data = sc.textFile(path)
	#parse raw data into label bag-of-words pairs
	parsed_data = raw_data.map(lambda line: parse_line(line))
	#separate into training set and test set
	training_set, test_set = parsed_data.randomSplit([0.6, 0.4], 17)
	#get features for model training
	features = feature_extraction(training_set)
	labeled_points_training = training_set.map(lambda line: construct_labeled_point(line, features))
	labeled_points_test = test_set.map(lambda line: construct_labeled_point(line, features))
	#train logistic regression model
	lrModel = LogisticRegressionWithLBFGS.train(labeled_points_training)
	#train naive bayes model
	nbModel = NaiveBayes.train(labeled_points_training)
	return lrModel, nbModel, labeled_points_test
开发者ID:JiayingYu,项目名称:twitter_event_monitor_Spark,代码行数:16,代码来源:classifier.py


示例11: logisticRegression

def logisticRegression(features,sc,output_n):
	features_and_label = features.collect()
	training_features_labels = features_and_label[0:70]
	
	testing_features_labels = features_and_label[70:]

	labeled_training = []
	labeled_testing = []
	for x in training_features_labels:
		labeled_training.append(LabeledPoint(x[0],x[1]))

	for y in testing_features_labels:
		labeled_testing.append(LabeledPoint(y[0],y[1]))

	test = sc.parallelize(labeled_testing)

 	logregression_model = LogisticRegressionWithLBFGS.train(labeled_training)
 	predictions = test.map(lambda line: (line.label, float(logregression_model.predict(line.features))))
 	return predictions
开发者ID:gitofsid,项目名称:StocksPrediction-ML,代码行数:19,代码来源:classifiers_for_stocks_replace_feat.py


示例12: create_model

def create_model(name, training):
    if name == 'logistic':
        print_box()
        print "Logistic Regression Model"
        print_box()
        model = LogisticRegressionWithLBFGS.train(training)
    elif name == 'tree':
        print_box()
        print "Decision Tree Model"
        print_box()
        model = DecisionTree.trainClassifier(training, numClasses=2, categoricalFeaturesInfo={},
                                     impurity='gini', maxDepth=5, maxBins=32)
    elif name == 'rf':
        print_box()
        print "Random Forest Model"
        print_box()
        model = RandomForest.trainClassifier(training, numClasses=2, categoricalFeaturesInfo={},
                                    numTrees=15, featureSubsetStrategy="auto", impurity='gini', maxDepth=5, maxBins=50)

    return model
开发者ID:ayushsagar,项目名称:big-data-analytics,代码行数:20,代码来源:models.py


示例13: main

def main(input_file_path):

    print('=====>>>>>')
    print('ddd')
    data = sc.textFile(input_file_path)
    traning_data_RDD = data.filter(lambda line: line.split(',')[4] != '' and line.split(',')[0] != 'INDEX')
    unseen_data_RDD = data.filter(lambda line: line.split(',')[4] == '')

    traning_data_pddf = create_pddf(traning_data_RDD)
    traning_data_df = sqlContext.createDataFrame(traning_data_pddf)
    print(traning_data_df.head())

    parsed_data = rdd_to_labeled_point(traning_data_df.rdd)
    parsed_data.persist()
    # Correct print: [LabeledPoint(1.0, [1.0,8.6662186586,6.98047693487])]
    logisticRegressionWithLBFGS = LogisticRegressionWithLBFGS.train(parsed_data, iterations=500, numClasses=100)

    labels_and_preds = parsed_data.map(lambda lp: [lp.label, logisticRegressionWithLBFGS.predict(lp.features)])
    Accuracy = float(labels_and_preds.filter(lambda ele: (int(ele[0]) - int(ele[1])) ** 2).reduce(lambda x, y: x + y)[0]) / float(parsed_data.count())
    print("Training Accuracy on training data = " + str(Accuracy))

    unseen_data_pddf = create_pddf(unseen_data_RDD)
    unseen_data_df = sqlContext.createDataFrame(unseen_data_pddf)
    unseen_parsed_data = rdd_to_index_featurs(unseen_data_df.rdd)
    unseen_parsed_data.persist()

    file = open('/Users/1002720/Documents/workspace/SNU-project/data/BDA2Project/1-GenderPrediction/result2.csv', 'w',
                encoding='utf-8')
    file.write('INDEX,AGE\n')
    for data in unseen_parsed_data.collect():
        file.write(str(data[0]) + ',' + str(logisticRegressionWithLBFGS.predict(data[1])) + '\n')
    # print(labels_and_preds.collect())



    parsed_data.unpersist()
    unseen_parsed_data.unpersist()
    print('=====>>>>>')
    print('=====>>>>>')
    print('=====>>>>>')
    print('=====>>>>>')
开发者ID:Ggoals,项目名称:SNU-project,代码行数:41,代码来源:2-logistic_regression_with_LBFGS.py


示例14: train

    def train(self, feat='tfidf'):
        """
        Trains a multinomal NaiveBayes classifier on TFIDF features.

        Parameters
        ---------
        Spark DataFrame with columns:
        key: (label, filepath) tuple
        tf: Term-frequency Sparse Vector.
        IDF: TFIDF Sparse Vector.

        Returns
        ---------
        model: MLLib NaiveBayesModel object, trained.
        test_score: Accuracy of the model on test dataset.
        """
        if not self.lp_path:
            self.labeled_points = self.make_labeled_points(self.extract_features())
        self.make_train_test(self.test_size)

        train_rdd = self.labeled_points.join(self.y_train) \
                        .map(lambda (key, (lp, label)): lp) \
                        .repartition(self.n_part).cache()

        if self.model_type == 'naive_bayes':
            nb = NaiveBayes()
            self.model = nb.train(train_rdd)

        elif self.model_type == 'log_reg':
            n_classes = len(self.unique_ratings())
            features = train_rdd.map(lambda lp: LabeledPoint(lp.label, lp.features.toArray()))
            logreg = LogisticRegressionWithLBFGS.train(features, numClasses=n_classes)
            self.model = logreg

        # elif self

        return self
开发者ID:Nathx,项目名称:parental_advisory_ml,代码行数:37,代码来源:spark_model.py


示例15: processData

def processData(sc):
	#load and parse the data
	raw_data = sc.textFile(DATA_FILE)
	raw_data.persist()	
	
	print "Train data size {}".format(raw_data.count()) 
	# map data to a format needed for logistic regression
	parsedData = raw_data.map(mapper)
	
	print "Sample of input to algorithm ", parsedData.take(10)
	
	# Train model
	t0 = time()	
	model = LogisticRegressionWithLBFGS.train(parsedData)
	t1 = time() - t0
	print "Classifier trained in {} seconds".format(round(t1, 3))

	labelsAndPreds = parsedData.map(lambda point: (point.label, model.predict(point.features)))
	
	# Evaluating the model on training data
	trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())

	# Print some stuff
	print("Training Error = " + str(trainErr))
开发者ID:ashishsjsu,项目名称:Spark101,代码行数:24,代码来源:LogisticRegression2.py


示例16: main

def main():
    appName = "BadOrGood;zl"
    
    conf = (SparkConf()
            .setAppName(appName)
            .set("spark.executor.memory", "5g")
            .set("spark.executor.cores","3")
            .set("spark.executor.instance", "3")
            )
    sc = SparkContext(conf = conf)
    hc = HiveContext(sc)

    #fetch data
    #filepath = '/sshomework_zl/BadOrGood/AllDataRowrdd'
    #fetchDataToFile(hc, filepath)
    
    #load data
    # AllDataRawrdd = sc.pickleFile(filepath) \
                    # .map( lambda _: {'label':int(_.status), 'feature':extractFeature(_)} ) \
                    # .repartition(10)
    
    AllDataRawrdd = sc.pickleFile('/pickleData').repartition(10)
    
    
    #standardizer for train and test data
    model = StandardScaler(True, True) \
            .fit( AllDataRawrdd \
                  .map( lambda _: Vectors.dense(_['feature']) ) 
            )
    labels = AllDataRawrdd.map(lambda _: _['label'])
    featureTransformed = model.transform( AllDataRawrdd.map(lambda _: _['feature']) )
    AllDataRawrdd = labels \
                    .zip(featureTransformed) \
                    .map( lambda _: { 'label':_[0], 'feature':_[1] } )
    #sampling
    trainDataRawrdd, testDataRawrdd = AllDataRawrdd.randomSplit(weights=[0.7, 0.3], seed=100)
    trainDatardd = trainDataRawrdd.map( lambda _: LabeledPoint( _['label'], _['feature'] ) ).persist()
    testDatardd = testDataRawrdd.map( lambda _: {'label': _['label'], 'feature': list(_['feature']) } ).persist()
    
    #prediction & test
    lrmLBFGS = LogisticRegressionWithLBFGS.train(trainDatardd, iterations=3000, regParam=0.01, regType="l1")
    resultrdd = test(lrmLBFGS, testDatardd)
    lrmLBFGSFone = fone(resultrdd)
    lrmLBFGSac = accuracy(resultrdd)

    lrmSGD = LogisticRegressionWithSGD.train(trainDatardd, iterations=3000, step=0.1, regParam=0.01, regType="l1")
    resultrdd = test(lrmSGD, testDatardd)
    lrmSGDFone = fone(resultrdd)
    lrmSGDac = accuracy(resultrdd)
  
    dt = DecisionTree.trainClassifier(trainDatardd, 2, {}, maxDepth=10)
    resultrdd = test(dt, testDatardd)
    dtFone = fone(resultrdd)
    dtac = accuracy(resultrdd)
  
    rf = RandomForest.trainClassifier(trainDatardd, 2, {}, 10)
    resultrdd = test(rf, testDatardd)
    rfFone = fone(resultrdd)
    rfac = accuracy(resultrdd)

    print "LR_LBFGS f1 is : %f, ac is : %f" % (lrmLBFGSFone, lrmLBFGSac)
    print "LR_SGD f1 is : %f, ac is : %f" % (lrmSGDFone, lrmSGDac)
    print "Decision Tree f1 is: %f, ac is : %f" % (dtFone, dtac)
    print "Random Forest f1 is: %f, ac is : %f" % (rfFone, rfac)

    print lrmLBFGS.weights
    print lrmSGD.weights

    sc.stop()
开发者ID:retanoj,项目名称:ss_homework,代码行数:69,代码来源:BadOrGood.py


示例17: range

Err = 0.0
results = []
for train_index, test_index in ss:
	X_training, Y_training, X_test, Y_test = [], [], [], []
	for i in train_index:
		X_training.append(X[i])
		Y_training.append(Y[i])
	for i in test_index:
		X_test.append(X[i])
		Y_test.append(Y[i])
		
	parsedData = []
	for i in range(0, len(X_training)):
		parsedData.append(LabeledPoint(Y_training[i], X_training[i]))
		
	model = LogisticRegressionWithLBFGS.train(sc.parallelize(parsedData))
		
	testErr = 0
	for i in range(0, len(X_test)):
		a = Y_test[i]
		b = model.predict(X_test[i])
		#b = 1
		if a != b:
			testErr += 1
		
	Err += float(testErr) / float(len(X_test))

	 
print ("AVG test error: %.6f" % 
	(Err/iter_number))
开发者ID:Patechoc,项目名称:labs-untested,代码行数:30,代码来源:logisticRegressionWithLBFGS.py


示例18: help

print irisTrainRDD.take(2)
print irisTestRDD.take(2)

# COMMAND ----------

# MAGIC %md
# MAGIC Now, we can use MLlib's logistic regression on our `RDD` of `LabeledPoints`.  Note that we'll use `LogisticRegressionWithLBFGS` as it tends to converge faster than `LogisticRegressionWithSGD`.

# COMMAND ----------

from pyspark.mllib.classification import LogisticRegressionWithLBFGS
help(LogisticRegressionWithLBFGS)

# COMMAND ----------

mllibModel = LogisticRegressionWithLBFGS.train(irisTrainRDD, iterations=1000, regParam=0.0)

# COMMAND ----------

# MAGIC %md
# MAGIC Let's calculate our accuracy using `RDDs`.

# COMMAND ----------

rddPredictions = mllibModel.predict(irisTestRDD.values())
predictAndLabels = rddPredictions.zip(irisTestRDD.keys())

mllibAccuracy = predictAndLabels.map(lambda (p, l): p == l).mean()
print 'MLlib model accuracy: {0:.3f}'.format(mllibAccuracy)
开发者ID:Inscrutive,项目名称:spark,代码行数:29,代码来源:V.py


示例19: LabeledPoint

        j=0
    return LabeledPoint(float(int(hashlib.md5(datapoints[3]).hexdigest(), 16)/pow(10,38)), datapoints[1:3])

working_directory = os.getcwd()
working_directory = working_directory+"/"





configuartion=py.SparkConf()                                # setting the Spark Configuration
sContext=py.SparkContext(conf=configuartion)                # setting the Spark context
sContext.defaultParallelism
data = sContext.textFile(working_directory+"Test-TrainingData_SVM.csv")
testdata = sContext.textFile("/media/vyassu/OS/Users/vyas/Documents/Assigments/BigData/AudioData/KL/")

print testdata.take(1)

parsedData = data.map(parsePoint)
print parsedData.take(10)
# Build the modelLogisticRegressionWithLBFGS
model = LogisticRegressionWithLBFGS.train(parsedData, iterations=10,numClasses=7)

# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))

# # Save and load model
# model.save(sc, "myModelPath")
# sameModel = SVMModel.load(sc, "myModelPath")
开发者ID:smeera380,项目名称:DeepSentiment,代码行数:31,代码来源:SpeechNetSVM_Spark.py


示例20: LabeledPoint

	
	#Cancelled becomes the 8th column now, and total columns in the data = 8
	label = clean_line_split[7]
	nonLable = clean_line_split[0:7]

	return LabeledPoint (label, nonLable)

parsedData = raw_data.map (parsePoint)
#divide training and test data by 70-30 rule
(training, test) = parsedData.randomSplit ([0.7, 0.3], seed=11L)
training.cache ()

#start timer at this point
startTime = datetime.now()
#build the model
model = LogisticRegressionWithLBFGS.train (training, numClasses=3)

#evaluate the model on training data
labelAndPreds = test.map (lambda x: (x.label, model.predict (x.features)))

#labelAndPreds = testData.map (lambda x: (x.label, model.predict (x.features)))
trainErr = labelAndPreds.filter (lambda (w, x): w != x).count () / float (test.count ())

print ('Time consumed = '), (datetime.now() - startTime)

print ("Training error = " + str (trainErr))

#save and load model
model.save(sc, "LRW-95-08")
sameModel = LogisticRegressionModel.load(sc, "LRW-95-08")
sc.stop ()
开发者ID:bmewing,项目名称:spark_vs_r,代码行数:30,代码来源:logistic_regression-wide.py



注:本文中的pyspark.mllib.classification.LogisticRegressionWithLBFGS类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python classification.LogisticRegressionWithSGD类代码示例发布时间:2022-05-26
下一篇:
Python wrapper.JavaParams类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap