本文整理汇总了Python中pyspark.mllib.linalg._convert_to_vector函数的典型用法代码示例。如果您正苦于以下问题:Python _convert_to_vector函数的具体用法?Python _convert_to_vector怎么用?Python _convert_to_vector使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_convert_to_vector函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: appendBias
def appendBias(data):
"""
Returns a new vector with `1.0` (bias) appended to
the end of the input vector.
"""
vec = _convert_to_vector(data)
if isinstance(vec, SparseVector):
newIndices = np.append(vec.indices, len(vec))
newValues = np.append(vec.values, 1.0)
return SparseVector(len(vec) + 1, newIndices, newValues)
else:
return _convert_to_vector(np.append(vec.toArray(), 1.0))
开发者ID:AsafZ,项目名称:spark,代码行数:12,代码来源:util.py
示例2: _regression_train_wrapper
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
from pyspark.mllib.classification import LogisticRegressionModel
first = data.first()
if not isinstance(first, LabeledPoint):
raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
if initial_weights is None:
initial_weights = [0.0] * len(data.first().features)
if (modelClass == LogisticRegressionModel):
weights, intercept, numFeatures, numClasses = train_func(
data, _convert_to_vector(initial_weights))
return modelClass(weights, intercept, numFeatures, numClasses)
else:
weights, intercept = train_func(data, _convert_to_vector(initial_weights))
return modelClass(weights, intercept)
开发者ID:BeforeRain,项目名称:spark,代码行数:14,代码来源:regression.py
示例3: update
def update(self, data, decayFactor, timeUnit):
"""Update the centroids, according to data
:param data:
RDD with new data for the model update.
:param decayFactor:
Forgetfulness of the previous centroids.
:param timeUnit:
Can be "batches" or "points". If points, then the decay factor
is raised to the power of number of new points and if batches,
then decay factor will be used as is.
"""
if not isinstance(data, RDD):
raise TypeError("Data should be of an RDD, got %s." % type(data))
data = data.map(_convert_to_vector)
decayFactor = float(decayFactor)
if timeUnit not in ["batches", "points"]:
raise ValueError(
"timeUnit should be 'batches' or 'points', got %s." % timeUnit)
vectorCenters = [_convert_to_vector(center) for center in self.centers]
updatedModel = callMLlibFunc(
"updateStreamingKMeansModel", vectorCenters, self._clusterWeights,
data, decayFactor, timeUnit)
self.centers = array(updatedModel[0])
self._clusterWeights = list(updatedModel[1])
return self
开发者ID:11wzy001,项目名称:spark,代码行数:26,代码来源:clustering.py
示例4: _regression_train_wrapper
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
first = data.first()
if not isinstance(first, LabeledPoint):
raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
initial_weights = initial_weights or [0.0] * len(data.first().features)
weights, intercept = train_func(data, _convert_to_vector(initial_weights))
return modelClass(weights, intercept)
开发者ID:dnprock,项目名称:spark,代码行数:7,代码来源:regression.py
示例5: predict
def predict(self, x):
"""
Predict the label of one or more examples.
:param x: Data point (feature vector),
or an RDD of data points (feature vectors).
"""
SerDe = self._sc._jvm.SerDe
ser = PickleSerializer()
if isinstance(x, RDD):
# Bulk prediction
first = x.take(1)
if not first:
return self._sc.parallelize([])
if not isinstance(first[0], Vector):
x = x.map(_convert_to_vector)
jPred = self._java_model.predict(x._to_java_object_rdd()).toJavaRDD()
jpyrdd = self._sc._jvm.PythonRDD.javaToPython(jPred)
return RDD(jpyrdd, self._sc, BatchedSerializer(ser, 1024))
else:
# Assume x is a single data point.
bytes = bytearray(ser.dumps(_convert_to_vector(x)))
vec = self._sc._jvm.SerDe.loads(bytes)
return self._java_model.predict(vec)
开发者ID:312268112,项目名称:spark,代码行数:25,代码来源:tree.py
示例6: predictSoft
def predictSoft(self, x):
"""
Find the membership of point 'x' or each point in RDD 'x' to all mixture components.
:param x: vector or RDD of vector represents data points.
:return: the membership value to all mixture components for vector 'x'
or each vector in RDD 'x'.
"""
if isinstance(x, RDD):
means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
membership_matrix = callMLlibFunc(
"predictSoftGMM", x.map(_convert_to_vector), _convert_to_vector(self.weights), means, sigmas
)
return membership_matrix.map(lambda x: pyarray.array("d", x))
else:
return self.call("predictSoft", _convert_to_vector(x)).toArray()
开发者ID:Raynes,项目名称:spark,代码行数:16,代码来源:clustering.py
示例7: predict
def predict(self, x):
"""
Predict the value of the dependent variable given a vector x
containing values for the independent variables.
"""
x = _convert_to_vector(x)
return self.weights.dot(x) + self.intercept
开发者ID:31z4,项目名称:spark,代码行数:7,代码来源:regression.py
示例8: train
def train(
cls,
rdd,
k,
maxIterations=100,
runs=1,
initializationMode="k-means||",
seed=None,
initializationSteps=5,
epsilon=1e-4,
initialModel=None,
):
"""Train a k-means clustering model."""
clusterInitialModel = []
if initialModel is not None:
if not isinstance(initialModel, KMeansModel):
raise Exception(
"initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>"
)
clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters]
model = callMLlibFunc(
"trainKMeansModel",
rdd.map(_convert_to_vector),
k,
maxIterations,
runs,
initializationMode,
seed,
initializationSteps,
epsilon,
clusterInitialModel,
)
centers = callJavaFunc(rdd.context, model.clusterCenters)
return KMeansModel([c.toArray() for c in centers])
开发者ID:BeforeRain,项目名称:spark,代码行数:34,代码来源:clustering.py
示例9: save
def save(self, sc, path):
"""
Save this model to the given path.
"""
java_centers = _py2java(sc, [_convert_to_vector(c) for c in self.centers])
java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers)
java_model.save(sc._jsc.sc(), path)
开发者ID:11wzy001,项目名称:spark,代码行数:7,代码来源:clustering.py
示例10: computeCost
def computeCost(self, rdd):
"""
Return the K-means cost (sum of squared distances of points to
their nearest center) for this model on the given data.
"""
cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector),
[_convert_to_vector(c) for c in self.centers])
return cost
开发者ID:GuoNing89,项目名称:Study,代码行数:8,代码来源:clustering.py
示例11: test_serialize
def test_serialize(self):
from scipy.sparse import lil_matrix
lil = lil_matrix((4, 1))
lil[1, 0] = 1
lil[3, 0] = 2
sv = SparseVector(4, {1: 1, 3: 2})
self.assertEqual(sv, _convert_to_vector(lil))
self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
self.assertEqual(sv, _convert_to_vector(lil.todok()))
def serialize(l):
return ser.loads(ser.dumps(_convert_to_vector(l)))
self.assertEqual(sv, serialize(lil))
self.assertEqual(sv, serialize(lil.tocsc()))
self.assertEqual(sv, serialize(lil.tocsr()))
self.assertEqual(sv, serialize(lil.todok()))
开发者ID:drewrobb,项目名称:spark,代码行数:18,代码来源:test_linalg.py
示例12: predict
def predict(self, x):
"""
Predict the value of the dependent variable given a vector or
an RDD of vectors containing values for the independent variables.
"""
if isinstance(x, RDD):
return x.map(self.predict)
x = _convert_to_vector(x)
return self.weights.dot(x) + self.intercept
开发者ID:BeforeRain,项目名称:spark,代码行数:9,代码来源:regression.py
示例13: setInitialWeights
def setInitialWeights(self, initialWeights):
"""
Set the initial value of weights.
This must be set before running trainOn and predictOn
"""
initialWeights = _convert_to_vector(initialWeights)
self._model = LinearRegressionModel(initialWeights, 0)
return self
开发者ID:BeforeRain,项目名称:spark,代码行数:9,代码来源:regression.py
示例14: predict
def predict(self, x):
"""
Return the most likely class for a data vector
or an RDD of vectors
"""
if isinstance(x, RDD):
return x.map(lambda v: self.predict(v))
x = _convert_to_vector(x)
return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
开发者ID:vijaykiran,项目名称:spark,代码行数:9,代码来源:classification.py
示例15: predict_all
def predict_all(self, x):
if isinstance(x, RDD):
return x.map(lambda v: self.predict_all(v))
x = _convert_to_vector(x)
log_probs = self.pi + x.dot(self.theta.transpose())
scaled_log_probs = scale(log_probs)
int_lables = [int(l_i) for l_i in self.labels]
labels_and_log_probs = zip(int_lables, scaled_log_probs)
return sorted(labels_and_log_probs, key=lambda x: x[1], reverse=True)
开发者ID:AlexFridman,项目名称:Multi-label-classification-with-spark,代码行数:9,代码来源:mlbayes.py
示例16: predict
def predict(self, x):
x = _convert_to_vector(x)
margin = self.weights.dot(x) + self._intercept
if margin > 0:
prob = 1 / (1 + exp(-margin))
else:
exp_margin = exp(margin)
prob = exp_margin / (1 + exp_margin)
return 1 if prob > 0.5 else 0
开发者ID:aman010,项目名称:spark,代码行数:9,代码来源:classification.py
示例17: test_convert_to_vector
def test_convert_to_vector(self):
from scipy.sparse import csc_matrix
# Create a CSC matrix with non-sorted indices
indptr = array([0, 2])
indices = array([3, 1])
data = array([2.0, 1.0])
csc = csc_matrix((data, indices, indptr))
self.assertFalse(csc.has_sorted_indices)
sv = SparseVector(4, {1: 1, 3: 2})
self.assertEqual(sv, _convert_to_vector(csc))
开发者ID:drewrobb,项目名称:spark,代码行数:10,代码来源:test_linalg.py
示例18: predict
def predict(self, x):
"""
Predict values for a single data point or an RDD of points using
the model trained.
"""
if isinstance(x, RDD):
return self.call("predict", x.map(_convert_to_vector))
else:
return self.call("predict", _convert_to_vector(x))
开发者ID:Liuchang0812,项目名称:spark,代码行数:10,代码来源:tree.py
示例19: transform
def transform(self, vector):
"""
Computes the Hadamard product of the vector.
"""
if isinstance(vector, RDD):
vector = vector.map(_convert_to_vector)
else:
vector = _convert_to_vector(vector)
return callMLlibFunc("elementwiseProductVector", self.scalingVector, vector)
开发者ID:ChenZhongPu,项目名称:Simba,代码行数:10,代码来源:feature.py
示例20: findSynonyms
def findSynonyms(self, word, num):
"""
Find "num" number of words closest in similarity to "word".
word can be a string or vector representation.
Returns a dataframe with two fields word and similarity (which
gives the cosine similarity).
"""
if not isinstance(word, basestring):
word = _convert_to_vector(word)
return self._call_java("findSynonyms", word, num)
开发者ID:alope107,项目名称:spark,代码行数:10,代码来源:feature.py
注:本文中的pyspark.mllib.linalg._convert_to_vector函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论