本文整理汇总了Python中sklearn.neighbors.KernelDensity类的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity类的具体用法?Python KernelDensity怎么用?Python KernelDensity使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KernelDensity类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test2
def test2():
arr = np.concatenate((np.linspace(0, 10, 10), np.linspace(2, 4, 10), np.linspace(7, 10, 10)))[:, np.newaxis]
kde = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(arr)
X = np.linspace(0,10,1000)[:, np.newaxis]
log_dens = kde.score_samples(X)
plt.plot(X, log_dens)
plt.show()
开发者ID:sophistcxf,项目名称:ThirdLibTest,代码行数:7,代码来源:test_kde.py
示例2: kdescatter
def kdescatter(xs, ys, log_color=False, atol=1e-4, rtol=1e-4,
n_jobs=1, n_samp_scaling=100, n_samp_tuning=1000, ax=None,
**kwargs):
if ax is None:
import matplotlib.pyplot as plt
ax = plt
kwargs.setdefault('linewidths', 0)
kwargs.setdefault('s', 20)
kwargs.setdefault('cmap', 'winter')
X = np.asarray([xs, ys]).T
n = X.shape[0]
samp_X = X[np.random.choice(n, min(n_samp_scaling, n), replace=False)]
median_sqdist = np.median(euclidean_distances(samp_X, squared=True))
bws = np.logspace(-2, 2, num=10) * np.sqrt(median_sqdist)
est = GridSearchCV(KernelDensity(), {'bandwidth': bws}, n_jobs=n_jobs)
est.fit(X[np.random.choice(n, min(n_samp_tuning, n), replace=False)])
bw = est.best_params_['bandwidth']
kde = KernelDensity(bandwidth=bw)
kde.fit(X)
densities = kde.score_samples(X)
if not log_color:
np.exp(densities, out=densities)
ax.scatter(xs, ys, c=densities, **kwargs)
开发者ID:dougalsutherland,项目名称:hsfuap,代码行数:26,代码来源:kde_scatter.py
示例3: max_prob
def max_prob(df):
df_tmp = df.copy()
arr = []
for ind in df_tmp.index:
row = df_tmp.loc[ind]
d = row.dropna().values
# d = d.dropna()
if len(d)==0:
centre = np.NaN
arr.append(centre)
continue
# arr = vals.sort(axis=0)
# df_ordered = pd.DataFrame(vals, index=df.index, columns=df.columns)
x_grid = np.linspace(d.min(), d.max(), 50)
x_grid = x_grid.reshape(-1,1)
d = d.reshape(-1,1)
kde = KernelDensity().fit(d)
log_dens = kde.score_samples(x_grid)
vals = np.exp(log_dens).round(4)
centre = x_grid[vals.argmax()][0]
centre2 = round(centre, 4)
# TODO first element adds unnecessary decimal places (use decimal places class to fix)
arr.append(centre2)
return arr
开发者ID:koosha,项目名称:stock-anomaly,代码行数:28,代码来源:functions.py
示例4: surface_density
def surface_density(c, bandwidth=0.2, grid_step=0.02):
"""
Given particle positions as a coordinate object, compute the
surface density using a kernel density estimate.
"""
if not HAS_SKLEARN:
raise ImportError("scikit-learn is required to use this function.")
xgrid = np.arange(2., 9.+0.1, grid_step) # deg
ygrid = np.arange(26.5, 33.5+0.1, grid_step) # deg
shp = (xgrid.size, ygrid.size)
meshies = np.meshgrid(xgrid, ygrid)
grid = np.vstack(map(np.ravel, meshies)).T
x = c.l.degree
y = c.b.degree
skypos = np.vstack((x,y)).T
kde = KernelDensity(bandwidth=bandwidth, kernel='epanechnikov')
kde.fit(skypos)
dens = np.exp(kde.score_samples(grid)).reshape(meshies[0].shape)
log_dens = np.log10(dens)
return grid, log_dens
开发者ID:adrn,项目名称:ophiuchus,代码行数:26,代码来源:plot.py
示例5: plot_sklearn_kde
def plot_sklearn_kde(df, support, column='AirTime', bins=50):
"""
Plots a KDE and a histogram using sklearn.KernelDensity.
Uses Gaussian kernels.
The optimal bandwidth is calculated according to Silverman's rule of thumb.
Parameters
----------
df: A pandas.DataFrame
support: A 1-d numpy array.
Input data points for the probabilit density function.
Returns
-------
A matplotlib.axes.Axes instance.
"""
bw = get_silverman_bandwidth(df, column)
kde = KernelDensity(kernel='gaussian', bandwidth=bw)
x = df[column]
kde.fit(x[:, np.newaxis])
y = kde.score_samples(support[:, np.newaxis])
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(np.ravel(x), bins=bins, alpha=0.5, color=sns.xkcd_rgb["denim blue"], normed=True)
ax.plot(support, np.exp(y))
ax.set_xlabel(column, fontsize=14)
ax.set_ylabel('Density', fontsize=14)
ax.set_title('Kernel Density Plot', fontsize=14)
sns.despine(ax=ax, offset=5, trim=True)
return ax
开发者ID:nwngeek212,项目名称:UnsupervisedLearning,代码行数:35,代码来源:helper.py
示例6: kde_opt4
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["hour"] = df["hour"]
df_new["weekday"] = df["weekday"] + df["hour"] / 24.
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
df_new["x"] = df["x"]
df_new["y"] = df["y"]
return df_new
logging.info("train kde_opt4 model")
df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
n_class = len(np.unique(y_train))
y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
for i in range(n_class):
X = df_cell_train_feats_kde[y_train == i]
y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
for feat in df_cell_train_feats_kde.columns.values:
X_feat = X[feat].values
BGK10_output = kdeBGK10(X_feat)
if BGK10_output is None:
kde = gaussian_kde(X_feat, "scott")
kde = gaussian_kde(X_feat, kde.factor * 0.741379)
y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
else:
bandwidth, mesh, density = BGK10_output
kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
kde.fit(X_feat[:, np.newaxis])
y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
y_test_pred[:, i] += y_test_pred_i
return y_test_pred
开发者ID:aikinogard,项目名称:5th_place_solution_facebook_check_ins,代码行数:31,代码来源:model.py
示例7: kde_sklearn
def kde_sklearn(data, grid, **kwargs):
"""
Kernel Density Estimation with Scikit-learn
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x p` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x p` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde_skl = KernelDensity(**kwargs)
kde_skl.fit(data)
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(grid)
return np.exp(log_pdf)
开发者ID:jwhendy,项目名称:plotnine,代码行数:25,代码来源:density.py
示例8: draw_posterior_kld_hist
def draw_posterior_kld_hist(X_kld, X_vae, f_name, bins=25):
"""
Plot KDE-smoothed histograms.
"""
import matplotlib.pyplot as plt
# make a figure and configure an axis
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('Posterior KLd Density')
ax.set_title('Posterior KLds: Over-regularized vs. Standard')
ax.hold(True)
for (X, style, label) in [(X_kld, '-', 'ORK'), (X_vae, '--', 'VAR')]:
X_samp = X.ravel()[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/4.0)
plot_max = X_max + (X_range/4.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style, label=label)
ax.legend()
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format='pdf', \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
开发者ID:Philip-Bachman,项目名称:NN-Python,代码行数:30,代码来源:WalkoutResults.py
示例9: kde_sklearn
def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
"""Kernel Density Estimation with Scikit-learn"""
kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
kde_skl.fit(x[:, np.newaxis])
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
return np.exp(log_pdf)
开发者ID:wrshoemaker,项目名称:MicroMETE,代码行数:7,代码来源:generateFigures.py
示例10: pdf
def pdf(self, token, years, bandwidth=5):
"""
Estimate a density function from a token's rank series.
Args:
token (str)
years (range)
Returns: OrderedDict {year: density}
"""
series = self.series(token)
data = []
for year, wpm in series.items():
data += [year] * round(wpm)
data = np.array(data)[:, np.newaxis]
pdf = KernelDensity(bandwidth=bandwidth).fit(data)
samples = OrderedDict()
for year in years:
samples[year] = np.exp(pdf.score(year))
return samples
开发者ID:davidmcclure,项目名称:history-of-literature,代码行数:28,代码来源:wpm.py
示例11: plot_kde_histogram2
def plot_kde_histogram2(X1, X2, f_name, bins=25):
"""
Plot KDE-smoothed histogram of the data in X1/X2. Assume data is 1D.
"""
import matplotlib.pyplot as plt
# make a figure and configure an axis
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hold(True)
for (X, style) in [(X1, '-'), (X2, '--')]:
X_samp = X.ravel()[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/3.0)
plot_max = X_max + (X_range/3.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style)
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
开发者ID:Philip-Bachman,项目名称:Sequential-Generation,代码行数:27,代码来源:utils.py
示例12: plot_kde_histogram
def plot_kde_histogram(X, f_name, bins=25):
"""
Plot KDE-smoothed histogram of the data in X. Assume data is univariate.
"""
import matplotlib.pyplot as plt
X = X.ravel()
np.random.shuffle(X)
X = X[0:min(X.shape[0], 1000000)]
X_samp = X[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/3.0)
plot_max = X_max + (X_range/3.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
# make a figure
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)))
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
开发者ID:Philip-Bachman,项目名称:Sequential-Generation,代码行数:28,代码来源:utils.py
示例13: find_kernel
def find_kernel(data, numgrid = 1000, bw = 0.002):
Xtrain = data[:,0:2]
ytrain = data[2]
# Set up the data grid for the contour plot
xgrid = np.linspace(-74.1, -73.65, numgrid=1000)
ygrid = np.linspace(40.5, 40.8, numgrid=1000)
X, Y = np.meshgrid(xgrid, ygrid)
xy = np.vstack([Y.ravel(), X.ravel()]).T
# Plot map of with distributions of each species
fig = plt.figure()
# construct a kernel density estimate of the distribution
kde = KernelDensity(bandwidth=bw,
kernel='gaussian')
kde.fit(Xtrain, y = ytrain)
# evaluate only on the land: -9999 indicates ocean
Z = np.exp(kde.score_samples(xy))
Z = Z.reshape(X.shape)
# plot contours of the density
levels = np.linspace(0, Z.max(), 25)
plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
plt.title('BK CRIME')
plt.show()
return Z
开发者ID:mnlouie,项目名称:routeoptimize,代码行数:27,代码来源:crime_kernel_weighting.py
示例14: KDE_plt
def KDE_plt(categories,inter_arrivals):
KDEs = []
for i in range(0,len(categories)):
X = np.asarray(extract_cat_samples(inter_arrivals,categories,i))#for single inter-arrivals in a category
#X = np_matrix(categories[i][0])#for avg(inter-arrival)/person in a category
kde = KernelDensity(kernel='gaussian', bandwidth=4).fit(X)
KDEs.append(kde) #to use for prob_return()
max_sample = max_interarrival_mean(categories,inter_arrivals,i)
X_plot = np.linspace(0,1.5*max_sample,2000)[:, np.newaxis]
log_dens = kde.score_samples(X_plot)
plt.figure(i)
plt.plot(X_plot[:, 0], np.exp(log_dens), '-',label="kernel = '{0}'".format('gaussian'))
#plt.draw()
#plt.pause(0.001)
#plt.title("Non-Parametric Density Estimation for category=%s Visitors"%(i))
plt.hist(combine_inner_lists(extract_cat_samples(inter_arrivals,categories,i)),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
# plt.hist(np.asarray(categories[i][0]),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
plt.xlabel("inter-arrival time (days)")
plt.ylabel("PDF")
plt.legend()
save_as='./app/static/img/cat_result/kde/kdeplt_cat'+str(i)+'.png' # dump result into kde folder
plt.savefig(save_as)
plt.show(block=False)
plt.close(plt.figure(i))
return KDEs
开发者ID:huangbow,项目名称:TigerInsight,代码行数:27,代码来源:prediction.py
示例15: test_kernel_density_sampling
def test_kernel_density_sampling(n_samples=100, n_features=3):
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features)
bandwidth = 0.2
for kernel in ['gaussian', 'tophat']:
# draw a tophat sample
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
samp = kde.sample(100)
assert_equal(X.shape, samp.shape)
# check that samples are in the right range
nbrs = NearestNeighbors(n_neighbors=1).fit(X)
dist, ind = nbrs.kneighbors(X, return_distance=True)
if kernel == 'tophat':
assert np.all(dist < bandwidth)
elif kernel == 'gaussian':
# 5 standard deviations is safe for 100 samples, but there's a
# very small chance this test could fail.
assert np.all(dist < 5 * bandwidth)
# check unsupported kernels
for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
assert_raises(NotImplementedError, kde.sample, 100)
# non-regression test: used to return a scalar
X = rng.randn(4, 1)
kde = KernelDensity(kernel="gaussian").fit(X)
assert_equal(kde.sample().shape, (1, 1))
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:32,代码来源:test_kde.py
示例16: kde
def kde(self, term, bandwidth=2000, samples=1000, kernel='gaussian'):
"""
Estimate the kernel density of the instances of term in the text.
Args:
term (str): A stemmed term.
bandwidth (int): The kernel bandwidth.
samples (int): The number of evenly-spaced sample points.
kernel (str): The kernel function.
Returns:
np.array: The density estimate.
"""
# Get the offsets of the term instances.
terms = np.array(self.terms[term])[:, np.newaxis]
# Fit the density estimator on the terms.
kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(terms)
# Score an evely-spaced array of samples.
x_axis = np.linspace(0, len(self.tokens), samples)[:, np.newaxis]
scores = kde.score_samples(x_axis)
# Scale the scores to integrate to 1.
return np.exp(scores) * (len(self.tokens) / samples)
开发者ID:ChengQikai,项目名称:textplot,代码行数:27,代码来源:text.py
示例17: kde_fit_quantiles
def kde_fit_quantiles(rtquants, nsamples=1000, bw=.1):
""" takes quantile estimates and fits cumulative density function
returns samples to pass to sns.kdeplot()
"""
kdefit = KernelDensity(kernel='gaussian', bandwidth=bw).fit(rtquants)
samples = kdefit.sample(n_samples=nsamples).flatten()
return samples
开发者ID:dunovank,项目名称:radd_kd,代码行数:7,代码来源:analyze.py
示例18: EstimateDensity
def EstimateDensity(self,name,df,histogram,f,s,ax):
# if the desired output is in Histogram format
if(histogram):
finRes = []
lab = []
for i in xrange(5):
res = np.array(df[ df[f] == i][s])
if(res.shape[0]>0):
finRes.append(res)
lab.append(name[0]+ ' = ' + str(i))
pl.hist(finRes, bins=2, normed=True, histtype='bar',label = lab)
# if the desired output is simple plot
else:
for i in xrange(5):
res = np.array(df[ df[f] == i][s])
if(res.shape[0]>0):
res = res.reshape(res.shape[0],1)
X_plot = np.array(np.linspace(-1, 5,20)).reshape(20,1)
kde= KernelDensity(kernel='exponential', bandwidth=0.05)
kde.fit(res)
log_dens = kde.score_samples(X_plot)
ax.plot(X_plot,np.exp(log_dens),label=name[0]+ ' = ' + str(i))
ax.legend()
ax.set_title(name[1] + " distrubution for changing " + name[0])
开发者ID:ugur47,项目名称:AllState_Purchase_Prediction_Kaggle_Challange,代码行数:25,代码来源:reporting.py
示例19: xy_kde
def xy_kde(xy,bandwidth,N_grid=100,levels=[0.8,0.6,0.4,0.2]):
x_edges = np.linspace(np.min(xy[:,0]),np.max(xy[:,0]),N_grid+1)
y_edges = np.linspace(np.min(xy[:,1]),np.max(xy[:,1]),N_grid+1)
x_centres = np.array([x_edges[b] + (x_edges[b+1]-x_edges[b])/2
for b in range(N_grid)])
y_centres = np.array([y_edges[b] + (y_edges[b+1]-y_edges[b])/2
for b in range(N_grid)])
x_grid, y_grid = np.meshgrid(x_centres,y_centres)
xy_grid = np.array([np.ravel(x_grid),np.ravel(y_grid)]).T
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(xy)
H = np.exp(kde.score_samples(xy_grid).reshape(N_grid,N_grid))
# this bit is taken from the corner_plot.py method.
######################################
Hflat = H.flatten()
inds = np.argsort(Hflat)[::-1]
Hflat = Hflat[inds]
sm = np.cumsum(Hflat)
sm /= sm[-1]
V = np.empty(len(levels))
for i, v0 in enumerate(levels):
try:
V[i] = Hflat[sm <= v0][-1]
except:
V[i] = Hflat[0]
#####################################
V = np.sort(V)
return H, V, x_grid, y_grid, bandwidth
开发者ID:RossHart,项目名称:astro_codes,代码行数:29,代码来源:contours.py
示例20: sklearn_density
def sklearn_density(sample_points, evaluation_points):
"""
Estimate the probability density function from which a set of sample
points was drawn and return the estimated density at the evaluation points.
"""
from sklearn.neighbors import KernelDensity
# Silverman bandwidth estimator
n, d = sample_points.shape
bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))
# Standardize data so that we can use uniform bandwidth.
# Note that we will need to scale the resulting density by sigma to
# correct the area.
mu, sigma = mean(sample_points, axis=0), std(sample_points, axis=0)
data, points = (sample_points - mu)/sigma, (evaluation_points - mu)/sigma
#print("starting grid search for bandwidth over %d points"%n)
#from sklearn.grid_search import GridSearchCV
#from numpy import logspace
#params = {'bandwidth': logspace(-1, 1, 20)}
#fitter = GridSearchCV(KernelDensity(), params)
#fitter.fit(data)
#kde = fitter.best_estimator_
#print("best bandwidth: {0}".format(kde.bandwidth))
#import time; T0 = time.time()
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
rtol=1e-6, atol=1e-6)
#print("T:%6.3f fitting"%(time.time()-T0))
kde.fit(data)
#print("T:%6.3f estimating"%(time.time()-T0))
log_pdf = kde.score_samples(points)
#print("T:%6.3f done"%(time.time()-T0))
return exp(log_pdf)/np.prod(sigma) # undo the x scaling on the data points
开发者ID:aschankler,项目名称:bumps,代码行数:34,代码来源:entropy.py
注:本文中的sklearn.neighbors.KernelDensity类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论