本文整理汇总了Python中sklearn.decomposition.NMF类的典型用法代码示例。如果您正苦于以下问题:Python NMF类的具体用法?Python NMF怎么用?Python NMF使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NMF类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: reduceDimensionality
def reduceDimensionality(n_components=100):
# import the csv into a pandas df
df = pd.read_csv('data/gameData.csv')
# Normalize the numeric columns to values in [0,1]
numericColumns = ['maxPlayers','maxPlaytime','minAge','minPlayers','minPlaytime','playtime']
colsToNormalize = []
for col in numericColumns:
if col in df.columns:
colsToNormalize.append(col)
df[colsToNormalize] = df[colsToNormalize].apply(lambda x: (x - x.min())/(x.max() - x.min())/2)
# Drop string columns
colsToDrop = ['artists','categories','designers','families','publishers','mechanics','boardGameId','yearPublished']
# Convert df to an array for NMF and stor the board game id column to attach later
boardGameIds = df['boardGameId']
arr = df.as_matrix([col for col in df.columns if col not in colsToDrop])
arr = np.nan_to_num(arr)
# Perform NMF with n_dimensions
model = NMF(n_components=n_components)
W = model.fit_transform(arr)
W = np.insert(W, 0, boardGameIds, axis=1)
np.savetxt("data/reducedGameFeatures.csv", W, delimiter=",")
开发者ID:hutchasaurus1,项目名称:boardGameRecommender,代码行数:27,代码来源:dimensionalityReduction.py
示例2: nmf_model2
def nmf_model2(n_topics,document_term_mat):
# print("\n\n---------\n decomposition")
nmf = NMF(n_components=n_topics, l1_ratio=0.0)
W_sklearn = nmf.fit_transform(document_term_mat)
H_sklearn = nmf.components_
# describe_nmf_results(document_term_mat, W_sklearn, H_sklearn)
return W_sklearn, H_sklearn
开发者ID:maryamre,项目名称:final_project,代码行数:7,代码来源:main0.py
示例3: extractTemplate
def extractTemplate(y, w=d_w, h=d_h, n_components=nc):
model = NMF(n_components=n_components, max_iter=max_iter, beta=beta)
S = librosa.core.stft(y, n_fft=w, hop_length=h)
model.fit_transform(np.abs(S).T)
components = model.components_.T
#components, activation = librosa.decompose.decompose(np.abs(S), n_components=3)
return components
开发者ID:LemonATsu,项目名称:VIOCLA-source-separation,代码行数:7,代码来源:NMF.py
示例4: do_NMF
def do_NMF(sparse_matrix):
t0 = time.time()
print("* Performing NMF on sparse matrix ... ")
nmf = NMF(n_components=3)
coordinates = nmf.fit_transform(sparse_matrix)
print("done in %0.3fs." % (time.time() - t0))
return(coordinates)
开发者ID:hclent,项目名称:BioNLP-literature-tool,代码行数:7,代码来源:kmeans1.py
示例5: __Factorize_NMF
def __Factorize_NMF(self,K):
model = NMF(n_components=K,max_iter=self._iteration)
model.fit(self._mat)
user_fmat = model.fit_transform(self._mat)
item_fmat = model.components_.T
return user_fmat,item_fmat
开发者ID:BinXia,项目名称:KeepLearning,代码行数:7,代码来源:MatrixFactorization.py
示例6: applyNMF
def applyNMF(self, number_of_clusters, country_specific_tweets):
train, feature_names = self.extractFeatures(country_specific_tweets,False)
name = "nmf"
# Fit the NMF model
if self.results:
print("Fitting the NMF model", end=" - ")
t0 = time()
nmf = NMF(n_components=number_of_clusters, random_state=1, alpha=.1, l1_ratio=.5).fit(train)
if self.results:
print("done in %0.3fs." % (time() - t0))
if self.results:
print("\nNMF:")
parameters = nmf.get_params()
if self.results:
print("Parameter: " + str(parameters))
topics = nmf.components_
doc_topic = nmf.transform(train)
top10, labels = self.printTopicCluster(topics, doc_topic, feature_names)
labels = numpy.asarray(labels)
if self.results:
print("Silhouette Coefficient {0}: {1}".format(name, metrics.silhouette_score(train, labels)))
return name, parameters, top10, labels
开发者ID:michaelprummer,项目名称:datascience,代码行数:30,代码来源:clustering.py
示例7: nmf_df
def nmf_df(sym, k, coll):
data = [ item for item in coll.find({'text': { '$in' :[re.compile(sym)] }}) ]
sents = [ sentence['text'] for sentence in data ]
dates = [ str(text['created_at']) for text in data ]
d = np.array(dates).T
d = d.reshape(len(dates), 1)
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X = vectorizer.fit_transform(sents)
#features = vectorizer.get_feature_names()
model = NMF(n_components=k, init='random', random_state=0)
latent_features = model.fit_transform(X)
# lat0 = list(latent_features[:,0])
# lat1 = list(latent_features[:,1])
# lat2 = list(latent_features[:,2])
# lat3 = list(latent_features[:,3])
df = pd.DataFrame(latent_features) #np.concatenate((d, latent_features), axis=1)
df.columns = [ 'lat'+ str(n) for n in xrange(len(df.columns)) ]
df['time_stamp'] = d
#print df.head()
df['date'] = pd.to_datetime(df['time_stamp']).apply(pd.datetools.normalize_date)
df.pop('time_stamp')
#print df.head()
grouped_data = df.groupby(['date']).mean()
grouped_data['sym'] = sym
return grouped_data
开发者ID:gravity226,项目名称:NASDAQ,代码行数:31,代码来源:clustering.py
示例8: tfidf_nmf
def tfidf_nmf(release_texts, n_components=10, max_features=None):
'''
Creates and fits tfidf and NMF models.
INPUT:
- n_components: number of latent features for the NMF model to find
- max_features: max number of features (vocabulary size) for the tfidf model to consider
OUTPUT:
- tfidf_vectorizer: tfidf model object
- tfidf_sparse:tfidf sparse matrix
- nmf: NMF model object
- W: Feature matrix output from NMF factorization into W and H matrices
'''
# tfidf model
custom_stop_words = make_stop_words()
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words=custom_stop_words, max_features=max_features)
tfidf_sparse = tfidf_vectorizer.fit_transform(release_texts)
# normalize row-wise so each row sums to one
tfidf_sparse = normalize(tfidf_sparse, axis=1, norm='l1')
# nmf model
nmf = NMF(n_components=n_components, random_state=1)
nmf.fit(tfidf_sparse)
W = nmf.transform(tfidf_sparse)
return tfidf_vectorizer, tfidf_sparse, nmf, W
开发者ID:NashC,项目名称:startup_press_release_analyzer,代码行数:27,代码来源:model.py
示例9: hog2hognmf
def hog2hognmf(hog_feature):
"""Transform HOG feature into HOG-NMF feature.
Parameters
----------
hog_feature: np.ndarray
HOG feature.
"""
mat = np.zeros((500, 8), dtype=np.float32)
NMFmodel = NMF(n_components=2, init="random", random_state=0)
# Transform 3780 into 500 * 8
for i in range(7):
mat[:, i] = hog_feature[i * 500 : (i + 1) * 500]
mat[:280, 7] = hog_feature[3500:]
W = NMFmodel.fit_transform(mat)
H = NMFmodel.components_
hognmf_feature = np.array([], dtype=np.float32)
for i in range(8):
_sum = np.sum(H[:, i])
if _sum == 0:
H[:, i] *= 0.0
else:
H[:, i] /= _sum
hognmf_feature = np.append(hognmf_feature, H[:, i])
for i in range(500):
_sum = np.sum(W[i, :])
if _sum == 0:
W[i, :] *= 0.0
else:
W[i, :] /= _sum
hognmf_feature = np.append(hognmf_feature, W[i, :])
return hognmf_feature
开发者ID:eriche2016,项目名称:yatpd,代码行数:32,代码来源:hog2hognmf.py
示例10: nmf
def nmf(self, **kwargs):
"""Perform dimensionality reduction using NMF."""
nmf = NMF(**kwargs)
reduced_matrix = nmf.fit_transform(self.matrix)
# TODO: it is incorrect to pass self.column_labels! There are not column labels.
return Space(reduced_matrix, self.row_labels, self.column_labels)
开发者ID:masteradamo,项目名称:fowler.corpora,代码行数:7,代码来源:models.py
示例11: get_topics_nmf
def get_topics_nmf(urls, num_topics):
'''Input: URL containing links to each document (pdf) in the
corpus (i.e. arxiv) Output: the num_topics most important latent
topics from the corpus (via NMF)
'''
article_info = []
for url in urls:
article_info.append(get_text(url))
text = []
for thing in article_info:
text.extend(thing[0])
text = clean_pdf_text(text)
tfidf_math = TfidfVectorizer(max_features=100, stop_words=math_stop(),
ngram_range=(1, 1), decode_error='ignore')
M = tfidf_math.fit_transform(text)
feature_names = tfidf_math.get_feature_names()
feature_names = [WordNetLemmatizer().lemmatize(word)
for word in feature_names]
nmf = NMF(n_components=num_topics)
nmf.fit(M)
topics = []
for topic_idx, topic in enumerate(nmf.components_):
topics.append((" ".join([feature_names[i] for i in
topic.argsort()[:-10 - 1:-1]])))
return M, topics, text, title_list, urls
开发者ID:ColinFerguson,项目名称:Project,代码行数:28,代码来源:math_scraping_and_recommending_functions.py
示例12: get_LDA
def get_LDA(X, num_components=10, show_topics=True):
''' Latent Dirichlet Allication by NMF.
21 Nov 2015, Keunwoo Choi
LDA for a song-tag matrix. The motivation is same as get_LSI.
With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
where X ~= X' = W*H as a result of NMF.
It is also good to have non-negative elements, straight-forward for both W and H.
'''
from sklearn.decomposition import NMF
nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
W = nmf.fit_transform(X)
H = nmf.components_
print '='*60
print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))
term_rankings = []
moodnames = cP.load(open(PATH_DATA + FILE_DICT['sorted_tags'], 'r')) #list, 100
for topic_index in range( H.shape[0] ):
top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
term_ranking = [moodnames[i] for i in top_indices]
term_rankings.append(term_ranking)
if show_topics:
print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
print '='*60
cP.dump(nmf, open(PATH_DATA + 'NMF_object.cP', 'w'))
cP.dump(term_rankings, open(PATH_DATA + ('topics_strings_%d_components.cP' % num_components), 'w'))
for row_idx, row in enumerate(W):
if np.max(row) != 0:
W[row_idx] = row / np.max(row)
return W / np.max(W) # return normalised matrix, [0, 1]
''''''
开发者ID:keunwoochoi,项目名称:magnatagatune,代码行数:35,代码来源:main_prepare.py
示例13: test_nmf_fit_close
def test_nmf_fit_close(solver):
rng = np.random.mtrand.RandomState(42)
# Test that the fit is not too far away
pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0,
max_iter=600)
X = np.abs(rng.randn(6, 5))
assert_less(pnmf.fit(X).reconstruction_err_, 0.1)
开发者ID:kjacks21,项目名称:scikit-learn,代码行数:7,代码来源:test_nmf.py
示例14: fit_nmf
def fit_nmf(tfidf):
'''takes in a tfidf sparse vector and finds the top topics'''
nmf = NMF(n_components=n_topics, random_state=1, alpha=.1, l1_ratio=.5)
nmf.fit(tfidf)
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
nmf_topic_dict = print_top_words(nmf, tfidf_feature_names, n_top_words)
return nmf, nmf_topic_dict
开发者ID:scsherm,项目名称:Congress_work,代码行数:7,代码来源:topic_modeling2.py
示例15: produceEncoding
def produceEncoding( trainX, nComponents ):
'''Produces an NMF encoding from the training
data matrix'''
model = NMF( n_components=nComponents, solver='cd', \
tol=1e-4, max_iter=200, alpha=0.0 )
model.fit( trainX )
return model
开发者ID:potachen,项目名称:COS424_Project,代码行数:7,代码来源:nmf.py
示例16: extract_template
def extract_template(comps, music_stft):
K = comps.shape[1]
#initialize transformer (non-negative matrix factorization) with K components
transformer = NMF(n_components = K, init = 'custom')
#W and H are random at first
W = np.random.rand(comps.shape[0], K)
H = np.random.rand(K, music_stft.shape[1])
#set W to be the template components you want to extract
W[:, 0:K] = comps
#don't let W get updated in the non-negative matrix factorization
params = {'W': W, 'H': H, 'update_W': False}
comps_music = transformer.fit_transform(np.abs(music_stft), **params)
acts_music = transformer.components_
#reconstruct the signal
music_reconstruction = comps_music.dot(acts_music)
#mask the input signal
music_stft_max = np.maximum(music_reconstruction, np.abs(music_stft))
mask = np.divide(music_reconstruction, music_stft_max)
mask = np.nan_to_num(mask)
#binary mask
mask = np.round(mask)
#template - extracted template, residual - everything that's leftover.
template = np.multiply(music_stft, mask)
residual = np.multiply(music_stft, 1 - mask)
return template, residual
开发者ID:interactiveaudiolab,项目名称:separation_segmentation_ismir,代码行数:34,代码来源:functions.py
示例17: extract_reconstruction_errors
def extract_reconstruction_errors(comps, music_stft, window_length, hop):
K = comps.shape[1]
#initialize transformer (non-negative matrix factorization) with K components
transformer = NMF(n_components = K, init = 'custom')
#W and H are random at first
W = np.random.rand(comps.shape[0], K)
start = 0
errors = []
while (start + window_length < music_stft.shape[1]):
block = music_stft[:, start:start+window_length]
H = np.random.rand(K, block.shape[1])
W[:, 0:K] = comps
params = {'W': W, 'H': H, 'update_W': False}
comps_block = transformer.fit_transform(np.abs(block), **params)
acts_block = transformer.components_
#reconstruct the signal
block_reconstruction = comps_block.dot(acts_block)
errors.append(transformer.reconstruction_err_)
start = start + hop
return errors
开发者ID:interactiveaudiolab,项目名称:separation_segmentation_ismir,代码行数:25,代码来源:functions.py
示例18: _make_test_matrix
def _make_test_matrix(self, matrix, test_decomp='svd'):
'''
Input: a matrix
Output: a recomposed estimated ratings matrix
Decomposes input matrix according to decomposition type
and then makes an estimated ratings matrix
'''
if test_decomp == 'svd':
_, s1, V = svd(matrix)
how = self.s_option
how = self.test_how
#print "s1", s1
#print "how", how
s = self._get_s(s1, how)
#print s
#print V
#print self.matrix_1.U
return np.dot(self.matrix_1.U, np.dot(s, V))
elif test_decomp == 'nmf':
model = NMF()
H = model.fit_transform(matrix)
print H
W = model.components_
return np.dot(self.matrix_1.H, W)
else:
pass
'''
开发者ID:supwest,项目名称:capstone,代码行数:29,代码来源:matrix_builder.py
示例19: extract_reconstruction_error_beats
def extract_reconstruction_error_beats(comps, music_stft, beats):
K = comps.shape[1]
#initialize transformer (non-negative matrix factorization) with K components
transformer = NMF(n_components = K, init = 'custom')
#W and H are random at first
W = np.random.rand(comps.shape[0], K)
start = 0
errors = []
lookback = 0
weight = np.array([1 for i in range(2, music_stft.shape[0] + 2)])
weight = weight/np.max(weight)
for i in range(lookback+1, len(beats)):
block = music_stft[:, beats[i-(lookback+1)]:beats[i]]
H = np.random.rand(K, block.shape[1])
W[:, 0:K] = comps
params = {'W': W, 'H': H, 'update_W': False}
comps_block = transformer.fit_transform(np.abs(block), **params)
acts_block = transformer.components_
#reconstruct the signal
block_reconstruction = comps_block.dot(acts_block)
block_reconstruction = block_reconstruction.T*weight
block = block.T*weight
distance = norm(block_reconstruction - np.abs(block))
#errors.append(transformer.reconstruction_err_)
errors.append(distance)
return errors
开发者ID:interactiveaudiolab,项目名称:separation_segmentation_ismir,代码行数:30,代码来源:functions.py
示例20: nnMatrixFactorisation
def nnMatrixFactorisation(data, labels, new_dimension):
print "non negative matrix factorisation..."
start = time.time()
mf = NMF(n_components=new_dimension)
reduced = mf.fit_transform(data)
end = time.time()
return (reduced, end-start)
开发者ID:sebastian-alfers,项目名称:master-thesis,代码行数:7,代码来源:dimensionality_reduction.py
注:本文中的sklearn.decomposition.NMF类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论