• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python regression.LinearRegressionWithSGD类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyspark.mllib.regression.LinearRegressionWithSGD的典型用法代码示例。如果您正苦于以下问题:Python LinearRegressionWithSGD类的具体用法?Python LinearRegressionWithSGD怎么用?Python LinearRegressionWithSGD使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了LinearRegressionWithSGD类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: do_all

def do_all(f_path,out_name):
	sc = SparkContext()
	data = sc.textFile(f_path)

	data = data.map(parseKeepD).filter(lambda p: p[0] != None)

	# Scale Features
	features = data.map(lambda x: x[0].features)
	summary = Statistics.colStats(features)
	global means
	global varis
	means = summary.mean()
	varis = summary.variance()

	#scale the points
	data = data.map(lambda y: (conv_label_pt(y[0]),y[1]))

	#train model
	model = LinearRegressionWithSGD().train(data.map(lambda x: x[0]), intercept=True, regType='none')

	#calculate disparity
	disparity = data.map(lambda p: (p[0].label, model.predict(p[0].features), p[1]))  

	#calculate SSR for later
	ssr = disparity.map(lambda x: (x[0] - x[1])**2).sum()

	#keep N
	N = disparity.count()
	#shut down SC
	MSE = ssr/float(N)
	se = std_errors(data,MSE,N)
	disparity.saveAsTextFile(out_loc + out_name)

	sc.stop()
	return model.intercept,model.weights,se,disparity, ssr, N
开发者ID:ssz225,项目名称:bigdata_final,代码行数:35,代码来源:spark_reg_local.py


示例2: test_regression

    def test_regression(self):
        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
            RidgeRegressionWithSGD
        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
        data = [
            LabeledPoint(-1.0, [0, -1]),
            LabeledPoint(1.0, [0, 1]),
            LabeledPoint(-1.0, [0, -2]),
            LabeledPoint(1.0, [0, 2])
        ]
        rdd = self.sc.parallelize(data)
        features = [p.features.tolist() for p in data]

        lr_model = LinearRegressionWithSGD.train(rdd, iterations=10)
        self.assertTrue(lr_model.predict(features[0]) <= 0)
        self.assertTrue(lr_model.predict(features[1]) > 0)
        self.assertTrue(lr_model.predict(features[2]) <= 0)
        self.assertTrue(lr_model.predict(features[3]) > 0)

        lasso_model = LassoWithSGD.train(rdd, iterations=10)
        self.assertTrue(lasso_model.predict(features[0]) <= 0)
        self.assertTrue(lasso_model.predict(features[1]) > 0)
        self.assertTrue(lasso_model.predict(features[2]) <= 0)
        self.assertTrue(lasso_model.predict(features[3]) > 0)

        rr_model = RidgeRegressionWithSGD.train(rdd, iterations=10)
        self.assertTrue(rr_model.predict(features[0]) <= 0)
        self.assertTrue(rr_model.predict(features[1]) > 0)
        self.assertTrue(rr_model.predict(features[2]) <= 0)
        self.assertTrue(rr_model.predict(features[3]) > 0)

        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
        dt_model = DecisionTree.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, maxBins=4)
        self.assertTrue(dt_model.predict(features[0]) <= 0)
        self.assertTrue(dt_model.predict(features[1]) > 0)
        self.assertTrue(dt_model.predict(features[2]) <= 0)
        self.assertTrue(dt_model.predict(features[3]) > 0)

        rf_model = RandomForest.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=10, maxBins=4, seed=1)
        self.assertTrue(rf_model.predict(features[0]) <= 0)
        self.assertTrue(rf_model.predict(features[1]) > 0)
        self.assertTrue(rf_model.predict(features[2]) <= 0)
        self.assertTrue(rf_model.predict(features[3]) > 0)

        gbt_model = GradientBoostedTrees.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numIterations=4)
        self.assertTrue(gbt_model.predict(features[0]) <= 0)
        self.assertTrue(gbt_model.predict(features[1]) > 0)
        self.assertTrue(gbt_model.predict(features[2]) <= 0)
        self.assertTrue(gbt_model.predict(features[3]) > 0)

        try:
            LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
            LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
            RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
        except ValueError:
            self.fail()
开发者ID:1ambda,项目名称:spark,代码行数:59,代码来源:tests.py


示例3: iterateLRwSGDBatch

def iterateLRwSGDBatch(iterNums, stepSizes, fractions, train, valid):
  for numIter in iterNums:
    for step in stepSizes:
      for miniBFraction in fractions:
        alg = LinearRegressionWithSGD()
        model = alg.train(train, intercept=True, iterations=numIter, step=step, miniBatchFraction=miniBFraction)
        rescaledPredicts = train.map(lambda x: (model.predict(x.features), x.label))
        validPredicts = valid.map(lambda x: (model.predict(x.features), x.label))
        meanSquared = math.sqrt(rescaledPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
        meanSquaredValid = math.sqrt(validPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
        print("%d, %5.3f %5.3f -> %.4f, %.4f" % (numIter, step, miniBFraction, meanSquared, meanSquaredValid))
开发者ID:AkiraKane,项目名称:first-edition,代码行数:11,代码来源:ch07-listings.py


示例4: iterateLRwSGD

def iterateLRwSGD(iterNums, stepSizes, train, valid):
  from pyspark.mllib.regression import LinearRegressionWithSGD
  import math
  for numIter in iterNums:
    for step in stepSizes:
      alg = LinearRegressionWithSGD()
      model = alg.train(train, iterations=numIter, step=step, intercept=True)
      rescaledPredicts = train.map(lambda x: (float(model.predict(x.features)), x.label))
      validPredicts = valid.map(lambda x: (float(model.predict(x.features)), x.label))
      meanSquared = math.sqrt(rescaledPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
      meanSquaredValid = math.sqrt(validPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
      print("%d, %5.3f -> %.4f, %.4f" % (numIter, step, meanSquared, meanSquaredValid))
开发者ID:AkiraKane,项目名称:first-edition,代码行数:12,代码来源:ch07-listings.py


示例5: test_regression

    def test_regression(self):
        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
                RidgeRegressionWithSGD
        data = [
            LabeledPoint(-1.0, self.scipy_matrix(2, {1: -1.0})),
            LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
            LabeledPoint(-1.0, self.scipy_matrix(2, {1: -2.0})),
            LabeledPoint(1.0, self.scipy_matrix(2, {1: 2.0}))
        ]
        rdd = self.sc.parallelize(data)
        features = [p.features for p in data]

        lr_model = LinearRegressionWithSGD.train(rdd)
        self.assertTrue(lr_model.predict(features[0]) <= 0)
        self.assertTrue(lr_model.predict(features[1]) > 0)
        self.assertTrue(lr_model.predict(features[2]) <= 0)
        self.assertTrue(lr_model.predict(features[3]) > 0)

        lasso_model = LassoWithSGD.train(rdd)
        self.assertTrue(lasso_model.predict(features[0]) <= 0)
        self.assertTrue(lasso_model.predict(features[1]) > 0)
        self.assertTrue(lasso_model.predict(features[2]) <= 0)
        self.assertTrue(lasso_model.predict(features[3]) > 0)

        rr_model = RidgeRegressionWithSGD.train(rdd)
        self.assertTrue(rr_model.predict(features[0]) <= 0)
        self.assertTrue(rr_model.predict(features[1]) > 0)
        self.assertTrue(rr_model.predict(features[2]) <= 0)
        self.assertTrue(rr_model.predict(features[3]) > 0)
开发者ID:EronWright,项目名称:spark,代码行数:29,代码来源:tests.py


示例6: regression

def regression():
    #Regression Point
    #Reads the data from the joinedResults directory as a parquet file
    datadf = sqlContext.read.parquet(output+"/joinedResults")
    datadf.show()
    data = datadf.rdd.map(lambda w: (float(w.avg_prcp), int(w.yy), float(w.latitude), float(w.longitude)))
    max_prcp = data.max()
    min_prcp = data.min()
    lat = data.map(lambda x: (x[2])).cache()
    min_lat = lat.min()
    max_lat = lat.max()

    longt =  data.map(lambda x: (x[3])).cache()
    min_long = longt.min()
    max_long = longt.max()
    
    max_ = [max_prcp[0], float(2050), max_lat, max_long]
    min_ = [min_prcp[0], float(1990), min_lat, min_long]
    # change the format to fit in LinearRegression library
    parsedData = data.map(lambda x: parsePointPrediction(x, max_, min_)).cache()
    # Split data aproximately into training (80%) and test (20%)
    trainData, testData = parsedData.randomSplit([0.8, 0.2], seed = 0)
    trainData.cache()
    testData.cache()
    # Build the model using Try and error to find out the Parameters.
    model = LinearRegressionWithSGD.train(trainData, iterations =500, regType="l2", regParam=10, intercept="true"  )
    # Evaluate the model on test data
    valuesAndPreds = testData.map(lambda p: (p.label, model.predict(p.features)))
    MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))
    maxVal=max_prcp[0]

    model.save(sc, output+"/modelpath")
    return
开发者ID:sasoltan,项目名称:DroughtPercipitation,代码行数:34,代码来源:finalcode.py


示例7: evaluate

def evaluate(train,test,iterations,step,regParam,regType,intercept):
    model = LinearRegressionWithSGD.train(train, iterations, step,regParam=regParam, regType=regType, intercept=intercept)
    tp = test.map(lambda p: (p.label, model.predict(p.features)))
    rmse = np.sqrt(tp.map(lambda (t,p): squarred_error(t,p)).mean())
    mae = np.sqrt(tp.map(lambda (t,p): abs_error(t,p)).mean())
    rmsle = np.sqrt(true_vs_predicted.map(lambda (t,p): squared_log_error(t,p)).mean())
    opt_metrics = [rmse,mae,rmsle] 
    return opt_metrics
开发者ID:kevllino,项目名称:WeatherPred,代码行数:8,代码来源:weather_predict.py


示例8: get_best_result

def get_best_result(best_step_size, training_lp, testing_lp, iterations):
    model = LinearRegressionWithSGD.train(training_lp, iterations=iterations, step=best_step_size, regType = 'l2')
    values_and_preds = testing_lp.map(lambda p: (p.label, model.predict(p.features)))
    MSE = values_and_preds.map(lambda (v, p): (v-p)**2).reduce(operator.add)
    RMSE = math.sqrt(MSE)

    result_str = 'best step size got by cross validation cv: ' + str(best_step_size) + ', lowest RMSE: ' + str(RMSE)
    return result_str
开发者ID:Veterun,项目名称:SparkPythonHanhan,代码行数:8,代码来源:tfidf_cv_lowestRMSE_normalized.py


示例9: getRMSE

def getRMSE(step_array):
	valRMSE_list = []
	for step in step_array:
		model = LinearRegressionWithSGD.train(train_featureScoreTimeRDD, iterations=5000, step=step)
		labelsAndPreds = val_featureScoreTimeRDD.map(lambda p: (p.label, model.predict(p.features)))
		valMSE = labelsAndPreds.map(lambda (v, p): (v - p)**2).reduce(lambda x, y: x + y) / val_featureScoreTimeRDD.count()
		valRMSE=valMSE**0.5
		valRMSE_list.append((step, valRMSE))
	return valRMSE_list
开发者ID:shaileshr,项目名称:SentimentAnalysis,代码行数:9,代码来源:Qn6.py


示例10: linearRegression

def linearRegression(features,sc,output_n):
	features_and_label = features.collect()
	training_features_labels = features_and_label[0:70]
	
	testing_features_labels = features_and_label[70:116]
	
	
	linearregression_model = LinearRegressionWithSGD.train(training_data,iterations=0,regParam=200)
	prediction = testing_data.map(lambda line: (line.label, linearregression_model.predict(line.features)))
	return linearregression_model,prediction
开发者ID:gitofsid,项目名称:StocksPrediction-ML,代码行数:10,代码来源:classifiers_for_stocks.py


示例11: test_regression

    def test_regression(self):
        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
            RidgeRegressionWithSGD
        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
        data = [
            LabeledPoint(-1.0, [0, -1]),
            LabeledPoint(1.0, [0, 1]),
            LabeledPoint(-1.0, [0, -2]),
            LabeledPoint(1.0, [0, 2])
        ]
        rdd = self.sc.parallelize(data)
        features = [p.features.tolist() for p in data]

        lr_model = LinearRegressionWithSGD.train(rdd)
        self.assertTrue(lr_model.predict(features[0]) <= 0)
        self.assertTrue(lr_model.predict(features[1]) > 0)
        self.assertTrue(lr_model.predict(features[2]) <= 0)
        self.assertTrue(lr_model.predict(features[3]) > 0)

        lasso_model = LassoWithSGD.train(rdd)
        self.assertTrue(lasso_model.predict(features[0]) <= 0)
        self.assertTrue(lasso_model.predict(features[1]) > 0)
        self.assertTrue(lasso_model.predict(features[2]) <= 0)
        self.assertTrue(lasso_model.predict(features[3]) > 0)

        rr_model = RidgeRegressionWithSGD.train(rdd)
        self.assertTrue(rr_model.predict(features[0]) <= 0)
        self.assertTrue(rr_model.predict(features[1]) > 0)
        self.assertTrue(rr_model.predict(features[2]) <= 0)
        self.assertTrue(rr_model.predict(features[3]) > 0)

        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
        dt_model = DecisionTree.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
        self.assertTrue(dt_model.predict(features[0]) <= 0)
        self.assertTrue(dt_model.predict(features[1]) > 0)
        self.assertTrue(dt_model.predict(features[2]) <= 0)
        self.assertTrue(dt_model.predict(features[3]) > 0)

        rf_model = RandomForest.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=100)
        self.assertTrue(rf_model.predict(features[0]) <= 0)
        self.assertTrue(rf_model.predict(features[1]) > 0)
        self.assertTrue(rf_model.predict(features[2]) <= 0)
        self.assertTrue(rf_model.predict(features[3]) > 0)

        gbt_model = GradientBoostedTrees.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
        self.assertTrue(gbt_model.predict(features[0]) <= 0)
        self.assertTrue(gbt_model.predict(features[1]) > 0)
        self.assertTrue(gbt_model.predict(features[2]) <= 0)
        self.assertTrue(gbt_model.predict(features[3]) > 0)
开发者ID:greatyan,项目名称:spark,代码行数:52,代码来源:tests.py


示例12: get_best_stepsize

def get_best_stepsize(step_sizes, training_lp, testing_lp, iterations):
    best_stepsize = 0
    lowest_RMSE = float("inf")
    for step_size in step_sizes:
        model = LinearRegressionWithSGD.train(training_lp, iterations=iterations, step=step_size)
        values_and_preds = testing_lp.map(lambda p: (p.label, model.predict(p.features)))
        MSE = values_and_preds.map(lambda (v, p): (v-p)**2).reduce(operator.add)
        RMSE = math.sqrt(MSE)
        if RMSE < lowest_RMSE:
            lowest_RMSE = RMSE
            best_stepsize = step_size

    result_str = 'best step size: ' + str(best_stepsize) + ', lowest RMSE: ' + str(lowest_RMSE)
    return result_str
开发者ID:Veterun,项目名称:SparkPythonHanhan,代码行数:14,代码来源:word2vec_best_RMSE.py


示例13: LinearRegression

def LinearRegression(filename, sc):
	filename = "/Users/Jacob/repository/SparkService/data/lpsa.data"
	data = sc.textFile(filename)
	parsedData = data.map(parsePoint)

	# train the model
	model = LinearRegressionWithSGD.train(parsedData)

	# Evaluate the model on training data
	valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
	MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(lambda x, y: x + y) / valuesAndPreds.count()
	print("\n\n\n\n\n\nMean Squared Error = " + str(MSE) + "\n\n\n\n\n")

	# Save and load model
	#model.save(sc, "myModelPath")
	#sameModel = LinearRegressionModel.load(sc, "myModelPath")
开发者ID:bangjieliu,项目名称:SparkService,代码行数:16,代码来源:linear_regression.py


示例14: test_spark

def test_spark():
    def parsePoint(line):
        values = [float(x) for x in line.replace(',', ' ').split(' ')]
        return LabeledPoint(values[0], values[1:])

    data = sc.textFile(r"/usr/local/Cellar/apache-spark/1.6.1/libexec/data/mllib/ridge-data/lpsa.data")
    parsedData = data.map(parsePoint)
    print parsedData.collect()

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
    MSE = valuesAndPreds.map(lambda (v, p): (v - p) ** 2).reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))
    print "Model coefficients:", str(model)
开发者ID:WarnWang,项目名称:Dissertation,代码行数:17,代码来源:spark_test.py


示例15: algo

def algo(a):
    global data
    global week 
    global target
    test = week 
    week_target = week.map(convert)
    #apply(convert, axis=1)
    #np.random.seed(123)
    data_final = LabeledPoint(target, data)
    #make rdd that is input for algo 


    if a == 'sgd':
        #time_0 = time.time()
        lrm = LinearRegressionWithSGD.train(sc.parallelize(data_final), iterations=10, initialWeights=np.array([1.0]))
        print (abs(lrm.predict(test)))
        print time.time() - time_0 
开发者ID:mmeoni,项目名称:LHCDataAnalysis,代码行数:17,代码来源:ensemble.py


示例16: linearRegression

def linearRegression(features,sc,output_n):
	features_and_label = features.collect()
	training_features_labels = features_and_label[0:70]
	
	testing_features_labels = features_and_label[70:]

	labeled_training = []
	labeled_testing = []
	for x in training_features_labels:
		labeled_training.append(LabeledPoint(x[0],x[1]))

	for y in testing_features_labels:
		labeled_testing.append(LabeledPoint(y[0],y[1]))

	test = sc.parallelize(labeled_testing)

	linearregression_model = LinearRegressionWithSGD.train(labeled_training,iterations=0,regParam=200)
	predictions = test.map(lambda line: (line.label, float(linearregression_model.predict(line.features))))
	return predictions
开发者ID:gitofsid,项目名称:StocksPrediction-ML,代码行数:19,代码来源:classifiers_for_stocks_replace_feat.py


示例17: linearRegression_f

def linearRegression_f(mode):
    if   mode == "no_reg":
         model = LinearRegressionWithSGD.train(parsedData)
    elif mode == "L1_reg":
         model = LassoWithSGD.train(parsedData)
    elif mode == "L2_reg":
         model = RidgeRegressionWithSGD.train(parsedData)
    else:
        print("ERROR Mode")
        
    #Evaluate the model on training data
    # parsedData map method to get {train_data, predict_data} pairs 
    valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
    
    #calculate the key-value pairs to get MSE
    MSE = valuesAndPreds.map(lambda (v, p): (v-p)**2).reduce(lambda x, y: x+y)/valuesAndPreds.count()
    
  
    return MSE
开发者ID:ZaphyrRobin,项目名称:linear_regression_bill_vs_tip,代码行数:19,代码来源:tip_linear_regression.py


示例18: LinearRegression

def LinearRegression(trainFile, testFile, taskid,sc):
	# filename = "/Users/Jacob/repository/SparkService/data/lpsa.data"
	# data = sc.textFile(filename)
	# parsedData = data.map(parsePoint)

	trainData = MLUtils.loadLibSVMFile(sc, trainFile)
	testData = MLUtils.loadLibSVMFile(sc, testFile)

	# train the model
	model = LinearRegressionWithSGD.train(trainData)

	# Evaluate the model on training data
	# predictionAndLabels = parsedData.map(lambda p: (p.label, model.predict(p.features)))
	predictionAndLabels = testData.map(lambda p: (p.label, model.predict(p.features)))
	MSE = predictionAndLabels.map(lambda (v, p): (v - p)**2).reduce(lambda x, y: x + y) / predictionAndLabels.count()
	print("\n\n\n\n\n\nMean Squared Error = " + str(MSE) + "\n\n\n\n\n")

	# Save and load model
	#model.save(sc, "myModelPath")
	#sameModel = LinearRegressionModel.load(sc, "myModelPath")
开发者ID:honeycombcmu,项目名称:SparkService,代码行数:20,代码来源:linear_regression.py


示例19: get_best_stepsize

def get_best_stepsize(step_sizes, training_lp, iterations, cv_trails):
    best_stepsize = 0
    lowest_RMSE = float("inf")
    num_folds = 4
    fold_set = [1]*num_folds
    cv_data = training_lp.randomSplit(fold_set) # 4 folds
    for step_size in step_sizes:
        total_RMSE = 0.0
        for i in range(num_folds):
            cv_testing = cv_data[i]
            cv_training = training_lp.subtract(cv_testing)
            model = LinearRegressionWithSGD.train(cv_training, iterations=iterations, step=step_size)
            values_and_preds = cv_testing.map(lambda p: (p.label, model.predict(p.features)))
            MSE = values_and_preds.map(lambda (v, p): (v-p)**2).reduce(operator.add)
            RMSE = math.sqrt(MSE)
            total_RMSE += RMSE
        avg_RMSE = total_RMSE/cv_trails
        if avg_RMSE < lowest_RMSE:
            lowest_RMSE = avg_RMSE
            best_stepsize = step_size

    return best_stepsize
开发者ID:Veterun,项目名称:SparkPythonHanhan,代码行数:22,代码来源:tfidf_cv_lowestRMSE_normalized.py


示例20: train_amount_model

    def train_amount_model(self, model, data, i):
        rdd_data = self.sc.parallelize(data)
        self.logger.info('Start to train the amount model')
        if self.amount_prediction_method == self.ARTIFICIAL_NEURAL_NETWORK:
            input_num = self.feature_num
            layers = [input_num, input_num / 3 * 2, input_num / 3, 1]

            neural_network = NeuralNetworkSpark(layers=layers, bias=0)
            model = neural_network.train(rdd_data, method=neural_network.BP, seed=1234, learn_rate=0.0001,
                                         iteration=15, model=model)
        elif self.amount_prediction_method == self.RANDOM_FOREST:
            model = RandomForest.trainRegressor(rdd_data, categoricalFeaturesInfo={}, numTrees=40,
                                                featureSubsetStrategy="auto", impurity='variance', maxDepth=20,
                                                maxBins=32)

        elif self.amount_prediction_method == self.LINEAR_REGRESSION:
            model = LinearRegressionWithSGD.train(rdd_data, iterations=10000, step=0.001,
                                                  initialWeights=model.weights if model is not None else None)

        else:
            self.logger.error("Unknown training method {}".format(self.amount_prediction_method))
            raise ValueError("Unknown training method {}".format(self.amount_prediction_method))
        return model
开发者ID:WarnWang,项目名称:Dissertation,代码行数:23,代码来源:composition_prediction_system.py



注:本文中的pyspark.mllib.regression.LinearRegressionWithSGD类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python regression.RidgeRegressionWithSGD类代码示例发布时间:2022-05-26
下一篇:
Python regression.LassoWithSGD类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap