• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python linalg.Vectors类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyspark.mllib.linalg.Vectors的典型用法代码示例。如果您正苦于以下问题:Python Vectors类的具体用法?Python Vectors怎么用?Python Vectors使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Vectors类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_glr_summary

 def test_glr_summary(self):
     from pyspark.mllib.linalg import Vectors
     df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                      (0.0, 2.0, Vectors.sparse(1, [], []))],
                                     ["label", "weight", "features"])
     glr = GeneralizedLinearRegression(family="gaussian", link="identity", weightCol="weight",
                                       fitIntercept=False)
     model = glr.fit(df)
     self.assertTrue(model.hasSummary)
     s = model.summary
     # test that api is callable and returns expected types
     self.assertEqual(s.numIterations, 1)  # this should default to a single iteration of WLS
     self.assertTrue(isinstance(s.predictions, DataFrame))
     self.assertEqual(s.predictionCol, "prediction")
     self.assertTrue(isinstance(s.residuals(), DataFrame))
     self.assertTrue(isinstance(s.residuals("pearson"), DataFrame))
     coefStdErr = s.coefficientStandardErrors
     self.assertTrue(isinstance(coefStdErr, list) and isinstance(coefStdErr[0], float))
     tValues = s.tValues
     self.assertTrue(isinstance(tValues, list) and isinstance(tValues[0], float))
     pValues = s.pValues
     self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
     self.assertEqual(s.degreesOfFreedom, 1)
     self.assertEqual(s.residualDegreeOfFreedom, 1)
     self.assertEqual(s.residualDegreeOfFreedomNull, 2)
     self.assertEqual(s.rank, 1)
     self.assertTrue(isinstance(s.solver, basestring))
     self.assertTrue(isinstance(s.aic, float))
     self.assertTrue(isinstance(s.deviance, float))
     self.assertTrue(isinstance(s.nullDeviance, float))
     self.assertTrue(isinstance(s.dispersion, float))
     # test evaluation (with training dataset) produces a summary with same values
     # one check is enough to verify a summary is returned, Scala version runs full test
     sameSummary = model.evaluate(df)
     self.assertAlmostEqual(sameSummary.deviance, s.deviance)
开发者ID:A7mech,项目名称:spark,代码行数:35,代码来源:tests.py


示例2: test_equals

 def test_equals(self):
     indices = [1, 2, 4]
     values = [1., 3., 2.]
     self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 0., 2.]))
     self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 1., 0., 2.]))
     self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 0., 2.]))
     self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 2., 2.]))
开发者ID:drewrobb,项目名称:spark,代码行数:7,代码来源:test_linalg.py


示例3: test_logistic_regression_summary

 def test_logistic_regression_summary(self):
     from pyspark.mllib.linalg import Vectors
     sqlContext = SQLContext(self.sc)
     df = sqlContext.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                      (0.0, 2.0, Vectors.sparse(1, [], []))],
                                     ["label", "weight", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
     model = lr.fit(df)
     self.assertTrue(model.hasSummary)
     s = model.summary
     # test that api is callable and returns expected types
     self.assertTrue(isinstance(s.predictions, DataFrame))
     self.assertEqual(s.probabilityCol, "probability")
     self.assertEqual(s.labelCol, "label")
     self.assertEqual(s.featuresCol, "features")
     objHist = s.objectiveHistory
     self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
     self.assertGreater(s.totalIterations, 0)
     self.assertTrue(isinstance(s.roc, DataFrame))
     self.assertAlmostEqual(s.areaUnderROC, 1.0, 2)
     self.assertTrue(isinstance(s.pr, DataFrame))
     self.assertTrue(isinstance(s.fMeasureByThreshold, DataFrame))
     self.assertTrue(isinstance(s.precisionByThreshold, DataFrame))
     self.assertTrue(isinstance(s.recallByThreshold, DataFrame))
     # test evaluation (with training dataset) produces a summary with same values
     # one check is enough to verify a summary is returned, Scala version runs full test
     sameSummary = model.evaluate(df)
     self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
开发者ID:Bella-Lin,项目名称:spark,代码行数:28,代码来源:tests.py


示例4: test_nnclassifier_in_pipeline

    def test_nnclassifier_in_pipeline(self):

        if self.sc.version.startswith("1"):
            from pyspark.mllib.linalg import Vectors

            df = self.sqlContext.createDataFrame(
                [(Vectors.dense([2.0, 1.0]), 1.0),
                 (Vectors.dense([1.0, 2.0]), 2.0),
                 (Vectors.dense([2.0, 1.0]), 1.0),
                 (Vectors.dense([1.0, 2.0]), 2.0),
                 ], ["features", "label"])

            scaler = MinMaxScaler().setInputCol("features").setOutputCol("scaled")
            model = Sequential().add(Linear(2, 2))
            criterion = ClassNLLCriterion()
            classifier = NNClassifier(model, criterion, MLlibVectorToTensor([2]))\
                .setBatchSize(4) \
                .setLearningRate(0.01).setMaxEpoch(1).setFeaturesCol("scaled")

            pipeline = Pipeline(stages=[scaler, classifier])

            pipelineModel = pipeline.fit(df)

            res = pipelineModel.transform(df)
            assert type(res).__name__ == 'DataFrame'
开发者ID:ru003ar,项目名称:analytics-zoo,代码行数:25,代码来源:test_nn_classifier.py


示例5: test_save_load

 def test_save_load(self):
     temp_path = tempfile.mkdtemp()
     sqlContext = SQLContext(self.sc)
     dataset = sqlContext.createDataFrame(
         [(Vectors.dense([0.0]), 0.0),
          (Vectors.dense([0.4]), 1.0),
          (Vectors.dense([0.5]), 0.0),
          (Vectors.dense([0.6]), 1.0),
          (Vectors.dense([1.0]), 1.0)] * 10,
         ["features", "label"])
     lr = LogisticRegression()
     grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
     evaluator = BinaryClassificationEvaluator()
     cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
     cvModel = cv.fit(dataset)
     cvPath = temp_path + "/cv"
     cv.save(cvPath)
     loadedCV = CrossValidator.load(cvPath)
     self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid)
     self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid)
     self.assertEqual(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps())
     cvModelPath = temp_path + "/cvModel"
     cvModel.save(cvModelPath)
     loadedModel = CrossValidatorModel.load(cvModelPath)
     self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
开发者ID:Bella-Lin,项目名称:spark,代码行数:25,代码来源:tests.py


示例6: test_append_bias_with_sp_vector

 def test_append_bias_with_sp_vector(self):
     data = Vectors.sparse(3, {0: 2.0, 2: 2.0})
     expected = Vectors.sparse(4, {0: 2.0, 2: 2.0, 3: 1.0})
     # Returned value must be SparseVector
     ret = MLUtils.appendBias(data)
     self.assertEqual(ret, expected)
     self.assertEqual(type(ret), SparseVector)
开发者ID:drewrobb,项目名称:spark,代码行数:7,代码来源:test_util.py


示例7: test_persistence

 def test_persistence(self):
     # Test save/load for LDA, LocalLDAModel, DistributedLDAModel.
     sqlContext = SQLContext(self.sc)
     df = sqlContext.createDataFrame([
         [1, Vectors.dense([0.0, 1.0])],
         [2, Vectors.sparse(2, {0: 1.0})],
     ], ["id", "features"])
     # Fit model
     lda = LDA(k=2, seed=1, optimizer="em")
     distributedModel = lda.fit(df)
     self.assertTrue(distributedModel.isDistributed())
     localModel = distributedModel.toLocal()
     self.assertFalse(localModel.isDistributed())
     # Define paths
     path = tempfile.mkdtemp()
     lda_path = path + "/lda"
     dist_model_path = path + "/distLDAModel"
     local_model_path = path + "/localLDAModel"
     # Test LDA
     lda.save(lda_path)
     lda2 = LDA.load(lda_path)
     self._compare(lda, lda2)
     # Test DistributedLDAModel
     distributedModel.save(dist_model_path)
     distributedModel2 = DistributedLDAModel.load(dist_model_path)
     self._compare(distributedModel, distributedModel2)
     # Test LocalLDAModel
     localModel.save(local_model_path)
     localModel2 = LocalLDAModel.load(local_model_path)
     self._compare(localModel, localModel2)
     # Clean up
     try:
         rmtree(path)
     except OSError:
         pass
开发者ID:bsangee,项目名称:spark,代码行数:35,代码来源:tests.py


示例8: test_model_transform

    def test_model_transform(self):
        weight = Vectors.dense([3, 2, 1])

        densevec = Vectors.dense([4, 5, 6])
        sparsevec = Vectors.sparse(3, [0], [1])
        eprod = ElementwiseProduct(weight)
        self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6]))
        self.assertEqual(
            eprod.transform(sparsevec), SparseVector(3, [0], [3]))
开发者ID:HodaAlemi,项目名称:spark,代码行数:9,代码来源:tests.py


示例9: test_right_number_of_results

 def test_right_number_of_results(self):
     num_cols = 1001
     sparse_data = [
         LabeledPoint(0.0, Vectors.sparse(num_cols, [(100, 2.0)])),
         LabeledPoint(0.1, Vectors.sparse(num_cols, [(200, 1.0)]))
     ]
     chi = Statistics.chiSqTest(self.sc.parallelize(sparse_data))
     self.assertEqual(len(chi), num_cols)
     self.assertIsNotNone(chi[1000])
开发者ID:greatyan,项目名称:spark,代码行数:9,代码来源:tests.py


示例10: _get_train_data

 def _get_train_data(self):
     sql_context = SQLContext(self.sc)
     l = [
         (1, Vectors.dense([1, 2, 3]), 1.0),
         (2, Vectors.dense([1, 2, 3]), 0.0),
         (3, Vectors.dense([1, 2, 3]), 1.0),
         (4, Vectors.dense([1, 2, 3]), 0.0),
     ]
     return sql_context.createDataFrame(l, ['id', 'features', 'label'])
开发者ID:ngarneau,项目名称:sentiment-analysis,代码行数:9,代码来源:pipelines.py


示例11: test_parse_vector

 def test_parse_vector(self):
     a = DenseVector([3, 4, 6, 7])
     self.assertTrue(str(a), '[3.0,4.0,6.0,7.0]')
     self.assertTrue(Vectors.parse(str(a)), a)
     a = SparseVector(4, [0, 2], [3, 4])
     self.assertTrue(str(a), '(4,[0,2],[3.0,4.0])')
     self.assertTrue(Vectors.parse(str(a)), a)
     a = SparseVector(10, [0, 1], [4, 5])
     self.assertTrue(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
开发者ID:HodaAlemi,项目名称:spark,代码行数:9,代码来源:tests.py


示例12: test_idf_model

 def test_idf_model(self):
     data = [
         Vectors.dense([1, 2, 6, 0, 2, 3, 1, 1, 0, 0, 3]),
         Vectors.dense([1, 3, 0, 1, 3, 0, 0, 2, 0, 0, 1]),
         Vectors.dense([1, 4, 1, 0, 0, 4, 9, 0, 1, 2, 0]),
         Vectors.dense([2, 1, 0, 3, 0, 0, 5, 0, 2, 3, 9])
     ]
     model = IDF().fit(self.sc.parallelize(data, 2))
     idf = model.idf()
     self.assertEqual(len(idf), 11)
开发者ID:HodaAlemi,项目名称:spark,代码行数:10,代码来源:tests.py


示例13: test_output_columns

 def test_output_columns(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     output = model.transform(df)
     self.assertEqual(output.columns, ["label", "features", "prediction"])
开发者ID:A7mech,项目名称:spark,代码行数:10,代码来源:tests.py


示例14: load_data_rdd

def load_data_rdd(csv_file, shuffle=True, train=True):
    if shuffle:
        shuffle_csv(csv_file)
    data = sc.textFile(data_path + csv_file)
    data = data.filter(lambda x:x.split(',')[0] != 'id').map(lambda line: line.split(','))
    if train:
        data = data.map(
            lambda line: (Vectors.dense(np.asarray(line[1:-1]).astype(np.float32)),
            str(line[-1]).replace('Class_', '')) )
    else:
        data = data.map(lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)), "1") )
    return data
开发者ID:thisiskofi,项目名称:elephas,代码行数:12,代码来源:ml_pipeline_otto.py


示例15: parseEntry

def parseEntry(xx):

    mindate=datetime.datetime(datetime.MINYEAR, 1, 1,1,1)
    xx=xx.split('\t')
    a_virtual=xx[0]
    browser=xx[1]
    referrer=xx[2]
    a_user_key=xx[3]
    try:
        birthyear=int(xx[4])
        age=2015-birthyear
    except Exception as _:
        birthyear=xx[4]
        age=-1
    gender=xx[5]
    #print(xx)
    #print(xx[6])
    if xx[6]!='NAN':
        reg_date=datetime.datetime.strptime(xx[6],'%Y-%m-%d')
    else:
        reg_date=mindate
    device=xx[7]
    date=datetime.datetime.strptime(xx[8],'%d-%m-%Y')
    tdiff=datetime.timedelta(hours=int(xx[9]))
    date=date+tdiff
    year=date.year
    month=date.month
    day=date.day
    hour=int(xx[9])
    weekday=date.weekday()

    if reg_date>mindate:
        days_since_registration=(date-reg_date).days
    else:
        days_since_registration=-1

    metrics=list([int(x.replace(',0','')) for x in xx[10:]])
    visits=metrics[0]
    visits_betalt=metrics[1]
    pageviews=metrics[2]
    pageview_nothome=metrics[3]
    pageview_betalt=metrics[4]

    timegroup_pvs=Vectors.sparse(maxInd,[(intervalIndDict[(weekday,hour)],pageviews)])
    timegroup_visit=Vectors.sparse(maxInd,[(intervalIndDict[(weekday,hour)],1.)])

    return Row(browser=browser,a_user_key=a_user_key,age=age,\
               day=day,hour=hour,date=date,weekday=weekday,pv=pageviews,\
               pv_nh=pageview_nothome,pv_bet=pageview_betalt,referrer=referrer,\
               device=device,gender=gender,days_since_registration=days_since_registration,\
               reg_date=reg_date,timegroup_pvs=timegroup_pvs,timegroup_visit=timegroup_visit,\
               a_virtual=a_virtual)
开发者ID:Froskekongen,项目名称:content-consumption,代码行数:52,代码来源:consume_profiles_spark_2.py


示例16: remove_time_dependent_effects

 def remove_time_dependent_effects(self, ts):
     """
     Given a timeseries, apply inverse operations to obtain the original series of underlying errors.
     Parameters
     ----------
     ts:
         Time series of observations with this model's characteristics as a Numpy array
     
     returns the time series with removed time-dependent effects as a Numpy array
     """
     destts = Vectors.dense(np.array([0] * len(ts)))
     result =  self._jmodel.removeTimeDependentEffects(_py2java(self._ctx, Vectors.dense(ts)), _py2java(self._ctx, destts))
     return _java2py(self._ctx, result.toArray())
开发者ID:BabelTower,项目名称:spark-timeseries,代码行数:13,代码来源:_model.py


示例17: create_rows_for_rdd

def create_rows_for_rdd(x):
    """

    :param x:
    :return:
    """
    features = list(x[1])
    l = len(features) - 1
    label = float(features.pop(l))
    meta_data = x[0]
    return Row(label=label,
               features=Vectors.dense(features),
               meta_data=Vectors.dense(meta_data))
开发者ID:USF-ML2,项目名称:SKYNET-,代码行数:13,代码来源:modeling_utils.py


示例18: load_data_frame

def load_data_frame(csv_file, shuffle=True, train=True):
    if shuffle:
        shuffle_csv(csv_file)
    data = sc.textFile('/home/minglu/dist_spark/data/' + csv_file) # This is an RDD, which will later be transformed to a data frame
    data = data.filter(lambda x:x.split(',')[0] != 'label').map(lambda line: line.split(','))
    if train:
        data = data.map(
            lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)),
                          'class_'+str(line[0]),int(line[0])) )
    else:
        # Test data gets dummy labels. We need the same structure as in Train data
        data = data.map( lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)),'class_'+str(line[0]),int(line[0])) ) 
    return sqlcontext.createDataFrame(data, ['features', 'category','label'])
开发者ID:ChienHsiung,项目名称:python,代码行数:13,代码来源:spark101.py


示例19: test_copy

 def test_copy(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     ovr1 = ovr.copy({lr.maxIter: 10})
     self.assertEqual(ovr.getClassifier().getMaxIter(), 5)
     self.assertEqual(ovr1.getClassifier().getMaxIter(), 10)
     model = ovr.fit(df)
     model1 = model.copy({model.predictionCol: "indexed"})
     self.assertEqual(model1.getPredictionCol(), "indexed")
开发者ID:A7mech,项目名称:spark,代码行数:13,代码来源:tests.py


示例20: add_time_dependent_effects

 def add_time_dependent_effects(self, ts):
     """
     Given a timeseries, apply a model to it.
     
     Parameters
     ----------
     ts:
         Time series of i.i.d. observations as a Numpy array
     
     returns the time series with added time-dependent effects as a Numpy array.
     """
     destts = Vectors.dense([0] * len(ts))
     result =  self._jmodel.addTimeDependentEffects(_py2java(self._ctx, Vectors.dense(ts)), _py2java(self._ctx, destts))
     return _java2py(self._ctx, result.toArray())
开发者ID:BabelTower,项目名称:spark-timeseries,代码行数:14,代码来源:_model.py



注:本文中的pyspark.mllib.linalg.Vectors类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python random.RandomRDDs类代码示例发布时间:2022-05-26
下一篇:
Python linalg.SparseVector类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap