• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python linalg.Vectors类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyspark.ml.linalg.Vectors的典型用法代码示例。如果您正苦于以下问题:Python Vectors类的具体用法?Python Vectors怎么用?Python Vectors使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Vectors类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_save_load_simple_estimator

    def test_save_load_simple_estimator(self):
        temp_path = tempfile.mkdtemp()
        dataset = self.spark.createDataFrame(
            [(Vectors.dense([0.0]), 0.0),
             (Vectors.dense([0.4]), 1.0),
             (Vectors.dense([0.5]), 0.0),
             (Vectors.dense([0.6]), 1.0),
             (Vectors.dense([1.0]), 1.0)] * 10,
            ["features", "label"])

        lr = LogisticRegression()
        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
        evaluator = BinaryClassificationEvaluator()

        # test save/load of CrossValidator
        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
        cvModel = cv.fit(dataset)
        cvPath = temp_path + "/cv"
        cv.save(cvPath)
        loadedCV = CrossValidator.load(cvPath)
        self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid)
        self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid)
        self.assertEqual(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps())

        # test save/load of CrossValidatorModel
        cvModelPath = temp_path + "/cvModel"
        cvModel.save(cvModelPath)
        loadedModel = CrossValidatorModel.load(cvModelPath)
        self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
开发者ID:Brett-A,项目名称:spark,代码行数:29,代码来源:test_tuning.py


示例2: test_equals

 def test_equals(self):
     indices = [1, 2, 4]
     values = [1., 3., 2.]
     self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 0., 2.]))
     self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 1., 0., 2.]))
     self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 0., 2.]))
     self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 2., 2.]))
开发者ID:JkSelf,项目名称:spark,代码行数:7,代码来源:test_linalg.py


示例3: test_expose_sub_models

    def test_expose_sub_models(self):
        temp_path = tempfile.mkdtemp()
        dataset = self.spark.createDataFrame(
            [(Vectors.dense([0.0]), 0.0),
             (Vectors.dense([0.4]), 1.0),
             (Vectors.dense([0.5]), 0.0),
             (Vectors.dense([0.6]), 1.0),
             (Vectors.dense([1.0]), 1.0)] * 10,
            ["features", "label"])
        lr = LogisticRegression()
        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
        evaluator = BinaryClassificationEvaluator()
        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
                                   collectSubModels=True)
        tvsModel = tvs.fit(dataset)
        self.assertEqual(len(tvsModel.subModels), len(grid))

        # Test the default value for option "persistSubModel" to be "true"
        testSubPath = temp_path + "/testTrainValidationSplitSubModels"
        savingPathWithSubModels = testSubPath + "cvModel3"
        tvsModel.save(savingPathWithSubModels)
        tvsModel3 = TrainValidationSplitModel.load(savingPathWithSubModels)
        self.assertEqual(len(tvsModel3.subModels), len(grid))
        tvsModel4 = tvsModel3.copy()
        self.assertEqual(len(tvsModel4.subModels), len(grid))

        savingPathWithoutSubModels = testSubPath + "cvModel2"
        tvsModel.write().option("persistSubModels", "false").save(savingPathWithoutSubModels)
        tvsModel2 = TrainValidationSplitModel.load(savingPathWithoutSubModels)
        self.assertEqual(tvsModel2.subModels, None)

        for i in range(len(grid)):
            self.assertEqual(tvsModel.subModels[i].uid, tvsModel3.subModels[i].uid)
开发者ID:Brett-A,项目名称:spark,代码行数:33,代码来源:test_tuning.py


示例4: test_java_object_gets_detached

    def test_java_object_gets_detached(self):
        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
                                        ["label", "weight", "features"])
        lr = LinearRegression(maxIter=1, regParam=0.0, solver="normal", weightCol="weight",
                              fitIntercept=False)

        model = lr.fit(df)
        summary = model.summary

        self.assertIsInstance(model, JavaWrapper)
        self.assertIsInstance(summary, JavaWrapper)
        self.assertIsInstance(model, JavaParams)
        self.assertNotIsInstance(summary, JavaParams)

        error_no_object = 'Target Object ID does not exist for this gateway'

        self.assertIn("LinearRegression_", model._java_obj.toString())
        self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())

        model.__del__()

        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
            model._java_obj.toString()
        self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())

        try:
            summary.__del__()
        except:
            pass

        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
            model._java_obj.toString()
        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
            summary._java_obj.toString()
开发者ID:Brett-A,项目名称:spark,代码行数:35,代码来源:test_wrapper.py


示例5: test_persistence

 def test_persistence(self):
     # Test save/load for LDA, LocalLDAModel, DistributedLDAModel.
     df = self.spark.createDataFrame([
         [1, Vectors.dense([0.0, 1.0])],
         [2, Vectors.sparse(2, {0: 1.0})],
     ], ["id", "features"])
     # Fit model
     lda = LDA(k=2, seed=1, optimizer="em")
     distributedModel = lda.fit(df)
     self.assertTrue(distributedModel.isDistributed())
     localModel = distributedModel.toLocal()
     self.assertFalse(localModel.isDistributed())
     # Define paths
     path = tempfile.mkdtemp()
     lda_path = path + "/lda"
     dist_model_path = path + "/distLDAModel"
     local_model_path = path + "/localLDAModel"
     # Test LDA
     lda.save(lda_path)
     lda2 = LDA.load(lda_path)
     self._compare(lda, lda2)
     # Test DistributedLDAModel
     distributedModel.save(dist_model_path)
     distributedModel2 = DistributedLDAModel.load(dist_model_path)
     self._compare(distributedModel, distributedModel2)
     # Test LocalLDAModel
     localModel.save(local_model_path)
     localModel2 = LocalLDAModel.load(local_model_path)
     self._compare(localModel, localModel2)
     # Clean up
     try:
         rmtree(path)
     except OSError:
         pass
开发者ID:Brett-A,项目名称:spark,代码行数:34,代码来源:test_algorithms.py


示例6: test_output_columns

 def test_output_columns(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr, parallelism=1)
     model = ovr.fit(df)
     output = model.transform(df)
     self.assertEqual(output.columns, ["label", "features", "rawPrediction", "prediction"])
开发者ID:Brett-A,项目名称:spark,代码行数:10,代码来源:test_algorithms.py


示例7: test_copy

 def test_copy(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     ovr1 = ovr.copy({lr.maxIter: 10})
     self.assertEqual(ovr.getClassifier().getMaxIter(), 5)
     self.assertEqual(ovr1.getClassifier().getMaxIter(), 10)
     model = ovr.fit(df)
     model1 = model.copy({model.predictionCol: "indexed"})
     self.assertEqual(model1.getPredictionCol(), "indexed")
开发者ID:Brett-A,项目名称:spark,代码行数:13,代码来源:test_algorithms.py


示例8: test_parallelism_doesnt_change_output

 def test_parallelism_doesnt_change_output(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     ovrPar1 = OneVsRest(classifier=LogisticRegression(maxIter=5, regParam=.01), parallelism=1)
     modelPar1 = ovrPar1.fit(df)
     ovrPar2 = OneVsRest(classifier=LogisticRegression(maxIter=5, regParam=.01), parallelism=2)
     modelPar2 = ovrPar2.fit(df)
     for i, model in enumerate(modelPar1.models):
         self.assertTrue(np.allclose(model.coefficients.toArray(),
                                     modelPar2.models[i].coefficients.toArray(), atol=1E-4))
         self.assertTrue(np.allclose(model.intercept, modelPar2.models[i].intercept, atol=1E-4))
开发者ID:Brett-A,项目名称:spark,代码行数:13,代码来源:test_algorithms.py


示例9: test_support_for_weightCol

 def test_support_for_weightCol(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8), 1.0),
                                      (1.0, Vectors.sparse(2, [], []), 1.0),
                                      (2.0, Vectors.dense(0.5, 0.5), 1.0)],
                                     ["label", "features", "weight"])
     # classifier inherits hasWeightCol
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr, weightCol="weight")
     self.assertIsNotNone(ovr.fit(df))
     # classifier doesn't inherit hasWeightCol
     dt = DecisionTreeClassifier()
     ovr2 = OneVsRest(classifier=dt, weightCol="weight")
     self.assertIsNotNone(ovr2.fit(df))
开发者ID:Brett-A,项目名称:spark,代码行数:13,代码来源:test_algorithms.py


示例10: test_offset

    def test_offset(self):

        df = self.spark.createDataFrame(
            [(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0)),
             (0.5, 2.1, 0.5, Vectors.dense(1.0, 2.0)),
             (0.9, 0.4, 1.0, Vectors.dense(2.0, 1.0)),
             (0.7, 0.7, 0.0, Vectors.dense(3.0, 3.0))], ["label", "weight", "offset", "features"])

        glr = GeneralizedLinearRegression(family="poisson", weightCol="weight", offsetCol="offset")
        model = glr.fit(df)
        self.assertTrue(np.allclose(model.coefficients.toArray(), [0.664647, -0.3192581],
                                    atol=1E-4))
        self.assertTrue(np.isclose(model.intercept, -1.561613, atol=1E-4))
开发者ID:Brett-A,项目名称:spark,代码行数:13,代码来源:test_algorithms.py


示例11: ztest_toPandas

 def ztest_toPandas(self):
     data = [(Vectors.dense([0.1, 0.2]),),
             (Vectors.sparse(2, {0:0.3, 1:0.4}),),
             (Vectors.sparse(2, {0:0.5, 1:0.6}),)]
     df = self.sql.createDataFrame(data, ["features"])
     self.assertEqual(df.count(), 3)
     pd = self.converter.toPandas(df)
     self.assertEqual(len(pd), 3)
     self.assertTrue(isinstance(pd.features[0], csr_matrix),
                     "Expected pd.features[0] to be csr_matrix but found: %s" %
                     type(pd.features[0]))
     self.assertEqual(pd.features[0].shape[0], 3)
     self.assertEqual(pd.features[0].shape[1], 2)
     self.assertEqual(pd.features[0][0,0], 0.1)
     self.assertEqual(pd.features[0][0,1], 0.2)
开发者ID:Sandy4321,项目名称:spark-sklearn,代码行数:15,代码来源:converter_test.py


示例12: test_binomial_logistic_regression_with_bound

    def test_binomial_logistic_regression_with_bound(self):

        df = self.spark.createDataFrame(
            [(1.0, 1.0, Vectors.dense(0.0, 5.0)),
             (0.0, 2.0, Vectors.dense(1.0, 2.0)),
             (1.0, 3.0, Vectors.dense(2.0, 1.0)),
             (0.0, 4.0, Vectors.dense(3.0, 3.0)), ], ["label", "weight", "features"])

        lor = LogisticRegression(regParam=0.01, weightCol="weight",
                                 lowerBoundsOnCoefficients=Matrices.dense(1, 2, [-1.0, -1.0]),
                                 upperBoundsOnIntercepts=Vectors.dense(0.0))
        model = lor.fit(df)
        self.assertTrue(
            np.allclose(model.coefficients.toArray(), [-0.2944, -0.0484], atol=1E-4))
        self.assertTrue(np.isclose(model.intercept, 0.0, atol=1E-4))
开发者ID:Brett-A,项目名称:spark,代码行数:15,代码来源:test_algorithms.py


示例13: test_bisecting_kmeans_summary

 def test_bisecting_kmeans_summary(self):
     data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
             (Vectors.sparse(1, [], []),)]
     df = self.spark.createDataFrame(data, ["features"])
     bkm = BisectingKMeans(k=2)
     model = bkm.fit(df)
     self.assertTrue(model.hasSummary)
     s = model.summary
     self.assertTrue(isinstance(s.predictions, DataFrame))
     self.assertEqual(s.featuresCol, "features")
     self.assertEqual(s.predictionCol, "prediction")
     self.assertTrue(isinstance(s.cluster, DataFrame))
     self.assertEqual(len(s.clusterSizes), 2)
     self.assertEqual(s.k, 2)
     self.assertEqual(s.numIter, 20)
开发者ID:Brett-A,项目名称:spark,代码行数:15,代码来源:test_training_summary.py


示例14: test_kmeans_summary

 def test_kmeans_summary(self):
     data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
             (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
     df = self.spark.createDataFrame(data, ["features"])
     kmeans = KMeans(k=2, seed=1)
     model = kmeans.fit(df)
     self.assertTrue(model.hasSummary)
     s = model.summary
     self.assertTrue(isinstance(s.predictions, DataFrame))
     self.assertEqual(s.featuresCol, "features")
     self.assertEqual(s.predictionCol, "prediction")
     self.assertTrue(isinstance(s.cluster, DataFrame))
     self.assertEqual(len(s.clusterSizes), 2)
     self.assertEqual(s.k, 2)
     self.assertEqual(s.numIter, 1)
开发者ID:Brett-A,项目名称:spark,代码行数:15,代码来源:test_training_summary.py


示例15: test_kmean_pmml_basic

 def test_kmean_pmml_basic(self):
     # Most of the validation is done in the Scala side, here we just check
     # that we output text rather than parquet (e.g. that the format flag
     # was respected).
     data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
             (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
     df = self.spark.createDataFrame(data, ["features"])
     kmeans = KMeans(k=2, seed=1)
     model = kmeans.fit(df)
     path = tempfile.mkdtemp()
     km_path = path + "/km-pmml"
     model.write().format("pmml").save(km_path)
     pmml_text_list = self.sc.textFile(km_path).collect()
     pmml_text = "\n".join(pmml_text_list)
     self.assertIn("Apache Spark", pmml_text)
     self.assertIn("PMML", pmml_text)
开发者ID:Brett-A,项目名称:spark,代码行数:16,代码来源:test_persistence.py


示例16: test_vector_size_hint

    def test_vector_size_hint(self):
        df = self.spark.createDataFrame(
            [(0, Vectors.dense([0.0, 10.0, 0.5])),
             (1, Vectors.dense([1.0, 11.0, 0.5, 0.6])),
             (2, Vectors.dense([2.0, 12.0]))],
            ["id", "vector"])

        sizeHint = VectorSizeHint(
            inputCol="vector",
            handleInvalid="skip")
        sizeHint.setSize(3)
        self.assertEqual(sizeHint.getSize(), 3)

        output = sizeHint.transform(df).head().vector
        expected = DenseVector([0.0, 10.0, 0.5])
        self.assertEqual(output, expected)
开发者ID:Brett-A,项目名称:spark,代码行数:16,代码来源:test_feature.py


示例17: test_linear_regression_pmml_basic

 def test_linear_regression_pmml_basic(self):
     # Most of the validation is done in the Scala side, here we just check
     # that we output text rather than parquet (e.g. that the format flag
     # was respected).
     df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                      (0.0, 2.0, Vectors.sparse(1, [], []))],
                                     ["label", "weight", "features"])
     lr = LinearRegression(maxIter=1)
     model = lr.fit(df)
     path = tempfile.mkdtemp()
     lr_path = path + "/lr-pmml"
     model.write().format("pmml").save(lr_path)
     pmml_text_list = self.sc.textFile(lr_path).collect()
     pmml_text = "\n".join(pmml_text_list)
     self.assertIn("Apache Spark", pmml_text)
     self.assertIn("PMML", pmml_text)
开发者ID:JingchengDu,项目名称:spark,代码行数:16,代码来源:test_persistence.py


示例18: test_tweedie_distribution

    def test_tweedie_distribution(self):

        df = self.spark.createDataFrame(
            [(1.0, Vectors.dense(0.0, 0.0)),
             (1.0, Vectors.dense(1.0, 2.0)),
             (2.0, Vectors.dense(0.0, 0.0)),
             (2.0, Vectors.dense(1.0, 1.0)), ], ["label", "features"])

        glr = GeneralizedLinearRegression(family="tweedie", variancePower=1.6)
        model = glr.fit(df)
        self.assertTrue(np.allclose(model.coefficients.toArray(), [-0.4645, 0.3402], atol=1E-4))
        self.assertTrue(np.isclose(model.intercept, 0.7841, atol=1E-4))

        model2 = glr.setLinkPower(-1.0).fit(df)
        self.assertTrue(np.allclose(model2.coefficients.toArray(), [-0.6667, 0.5], atol=1E-4))
        self.assertTrue(np.isclose(model2.intercept, 0.6667, atol=1E-4))
开发者ID:Brett-A,项目名称:spark,代码行数:16,代码来源:test_algorithms.py


示例19: reduce

def reduce(inputpath,alg,k):
	n_data = 0
	n_features = 0
	result = "successful!"
	inputdir = os.path.dirname(inputpath)
	print "inputdir: " + inputdir + result
	inputfile = open(inputpath,'r')
	for line in inputfile:
                input_n = len(line.split(" "))
                n_data += 1
		#print "Selected data set has " + str(input_n) + " features"
                #break
        inputfile.close()

       # result = "File: " + os.path.basename(output_data) + '</br>'
       # result += "Path: " + os.path.dirname(output_data) +  '/' + alg + str(k) + "_Features/" + '</br>'
       # result += "Dimension: " + str(n_data) + " x " + str(n_features) + "</br>"
       # context = {'result': result}
       # yield context

	if int(k) >= input_n:
                print "reduced features must be smaller than input features."
                result =  "reduced features must be smaller than input features."
	else:
#		os.system("export _JAVA_OPTIONS='-Xms1g -Xmx40g'")
#		conf = (SparkConf().set("spark.driver.maxResultSize", "5g"))
 #               sc = SparkContext(conf=conf)
  #              sqlContext = SQLContext(sc)
                lines = sc.textFile(inputpath).map(lambda x:x.split(" "))
                lines = lines.map(lambda x:(x[0],[float(y) for y in x[1:]]))
                df = lines.map(lambda x: Row(labels=x[0],features=Vectors.dense(x[1]))).toDF()
	
		if alg == "pca":
			output_data = pca(inputdir,df,alg,k)
			#os.system("spark-submit /home/ubuntu/yi-imPro/imagepro/pca.py " + inputpath + " " + k)

		output_data = inputdir + "/" + alg + str(k) + "_Data"
		inputfile = open(output_data, 'r')
	       	file_size = str(os.stat(output_data).st_size )
        	counter = 0
  	     	n_features = '0'
        	for line in inputfile:
                	input_n = len(line.split(" "))
                	n_features = str(input_n)
                	counter += 1

        	inputfile.close()
        	n_data = str(counter)

                result = "File: " + os.path.basename(output_data) + '</br>'
                result += "Path: " + os.path.dirname(output_data) +  '/' + alg + str(k) + "_Features/" + '</br>'
                result += "Dimension: " + n_data + " x " + n_features + "</br>"
                result += "Size: " + file_size + ' bytes'
		print result
#		sc.stop()		

        print "Dimension reduction finished!"

        context = {'n_data': n_data, 'n_features': n_features, 'result': result}
	return context
开发者ID:eason001,项目名称:imPro,代码行数:60,代码来源:views.py


示例20: test_parallel_evaluation

 def test_parallel_evaluation(self):
     dataset = self.spark.createDataFrame(
         [(Vectors.dense([0.0]), 0.0),
          (Vectors.dense([0.4]), 1.0),
          (Vectors.dense([0.5]), 0.0),
          (Vectors.dense([0.6]), 1.0),
          (Vectors.dense([1.0]), 1.0)] * 10,
         ["features", "label"])
     lr = LogisticRegression()
     grid = ParamGridBuilder().addGrid(lr.maxIter, [5, 6]).build()
     evaluator = BinaryClassificationEvaluator()
     tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
     tvs.setParallelism(1)
     tvsSerialModel = tvs.fit(dataset)
     tvs.setParallelism(2)
     tvsParallelModel = tvs.fit(dataset)
     self.assertEqual(tvsSerialModel.validationMetrics, tvsParallelModel.validationMetrics)
开发者ID:Brett-A,项目名称:spark,代码行数:17,代码来源:test_tuning.py



注:本文中的pyspark.ml.linalg.Vectors类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python param.Params类代码示例发布时间:2022-05-26
下一篇:
Python feature.VectorAssembler类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap