• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python kde.KernelDensity类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.neighbors.kde.KernelDensity的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity类的具体用法?Python KernelDensity怎么用?Python KernelDensity使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了KernelDensity类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: simplify3

def simplify3(nk):
	result=[]
	nk=np.array(nk)
	xk = nk/float(np.sum(nk))
	#print nk
	
	#X_plot = np.linspace(0, len(nk), 1000)[:, np.newaxis]
	sdiv=1000
	X_plot = np.linspace(0, len(xk), sdiv)[:, np.newaxis]
	custm = stats.rv_discrete(name='custm',a=0,b=7, values=(range(len(xk)), xk))
	yk= custm.rvs(size=100000)
	#yk.flatten()
	#fig, ax = plt.subplots(1, 1)
	#ax.hist(yk, normed=True, histtype='stepfilled', alpha=0.2)
	# gaussian KDE
	X=yk.reshape(-1, 1)
	kde = KernelDensity(kernel='gaussian', bandwidth=0.6).fit(X)
	log_dens = kde.score_samples(X_plot)
	mi, ma = argrelextrema(log_dens, np.less)[0], argrelextrema(log_dens, np.greater)[0]
	mi=np.rint(mi*float(len(xk))/float(sdiv))
	ma=np.rint(ma*float(len(xk))/float(sdiv))
	start=0	
	#print mi
	for i in mi:
		i=int(i)
		if start!=i:
			val=np.average(nk[start:i])
			for j in xrange(start,i):
				result.append(val)
		start=i	
	val=np.average(nk[start:])
	for j in xrange(start,len(nk)):
			result.append(val)
	return np.array(result)
开发者ID:leaguilar,项目名称:playground,代码行数:34,代码来源:test4.py


示例2: kdewrap

def kdewrap(indata, kernel):
    grid = GridSearchCV(KernelDensity(),
                    {'bandwidth': np.linspace(0.1, 1.0, 30)},
                    cv=10) # 10-fold cross-validation
    grid.fit(indata[:, None])
    kde = KernelDensity(kernel=kernel, bandwidth=grid.best_params_["bandwidth"]).fit(indata[:, np.newaxis])
    return kde.score_samples(indata[:, np.newaxis])
开发者ID:Upward-Spiral-Science,项目名称:orange-panda,代码行数:7,代码来源:benchmarking.py


示例3: OneClassKDE

class OneClassKDE(BaseClassifier):
    _fit_params = ["bandwidth"]
    _predict_params = []
    def __init__(self, *args, **kwargs):
        self.bandwidth = kwargs["bandwidth"]
        self.perc_keep = kwargs["perc_keep"]
    
    def fit(self, data, **kwargs):
        #self.train_data = data
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
        
        idx = numpy.random.randint(2, size=len(data)).astype(numpy.bool)
        print idx
        
        
        self.kde.fit(data[idx, :])
        self.training_score = self.kde.score_samples(data[~idx, :])
        self.direct_thresh = numpy.percentile(self.training_score, 100-self.perc_keep)
        
        print 'training', self.training_score.min(), self.training_score.mean(), self.training_score.max(), self.direct_thresh
        
        print self.direct_thresh
    
    def predict(self, data):
        score = self.kde.score_samples(data)
        self.score = score
        res = (score < self.direct_thresh)
        print 'test', self.score.min(), self.score.mean(), self.score.max()
        print res.sum(), "of", len(self.score), 'outliers'
        
        return res.astype(numpy.uint8)*-2+1
    
    def decision_function(self, data=None):
        return self.score
开发者ID:CellH5,项目名称:cellh5apps,代码行数:34,代码来源:learner.py


示例4: _importance_preprocess_uni

def _importance_preprocess_uni(states, rewards, gradients, p_tar, p_gen):
    res = _create_episode_info()

    flat_states = [s for traj in states for s in traj]
    # TODO Pass in as args?
    kde = KernelDensity(kernel='gaussian', bandwidth=0.25)
    kde.fit(flat_states)

    for ss, rs, gs, ps, qs in izip(states, rewards, gradients, p_tar, p_gen):

        state_probs = kde.score_samples(ss)
        traj_p = np.cumsum(ps)  # + np.mean(state_probs)
        traj_q = np.cumsum(qs) + state_probs
        traj_grads = np.cumsum(gs, axis=0)
        r_acc = np.cumsum(rs[::-1])[::-1]
        r_grad = (r_acc * traj_grads.T).T

        res.r_grads.extend(r_grad)
        res.traj_p_tar.extend(traj_p)
        res.traj_p_gen.extend(traj_q)
        res.traj_grads.extend(traj_grads)
        res.traj_r.extend(r_acc)

        # Used for estimating fisher
        res.act_grads.extend(gs)
        res.state_act_p_tar.extend(traj_p)
        res.state_act_p_gen.extend(traj_q)

    return res
开发者ID:Humhu,项目名称:percepto,代码行数:29,代码来源:policy_gradient.py


示例5: xy_kde

def xy_kde(xy,bandwidth,N_grid=100,levels=[0.8,0.6,0.4,0.2]):  
    
    x_edges = np.linspace(np.min(xy[:,0]),np.max(xy[:,0]),N_grid+1)
    y_edges = np.linspace(np.min(xy[:,1]),np.max(xy[:,1]),N_grid+1)
    x_centres = np.array([x_edges[b] + (x_edges[b+1]-x_edges[b])/2 
                          for b in range(N_grid)])
    y_centres = np.array([y_edges[b] + (y_edges[b+1]-y_edges[b])/2 
                          for b in range(N_grid)])
    x_grid, y_grid = np.meshgrid(x_centres,y_centres)
    xy_grid = np.array([np.ravel(x_grid),np.ravel(y_grid)]).T
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(xy)
    H = np.exp(kde.score_samples(xy_grid).reshape(N_grid,N_grid))
    # this bit is taken from the corner_plot.py method.
    ######################################
    Hflat = H.flatten()
    inds = np.argsort(Hflat)[::-1]
    Hflat = Hflat[inds]
    sm = np.cumsum(Hflat)
    sm /= sm[-1]
    V = np.empty(len(levels))
    for i, v0 in enumerate(levels):
        try:
            V[i] = Hflat[sm <= v0][-1]
        except:
            V[i] = Hflat[0]
    #####################################
    V = np.sort(V)
    
    return H, V, x_grid, y_grid
开发者ID:RossHart,项目名称:astro_codes,代码行数:29,代码来源:kde_plotting.py


示例6: estimate_distribution

def estimate_distribution(samples, h=0.1, n_points=100):
	kde = KernelDensity(bandwidth=h)
	samples = samples[:, np.newaxis]
	kde.fit(samples)
	xs = np.linspace(-1.0, 1.0, n_points)
	ys = [np.exp(kde.score([x])) for x in xs]
	return xs, ys
开发者ID:rnowling,项目名称:pop-gen-models,代码行数:7,代码来源:plot_sampled_phis.py


示例7: kernel_estimation

def kernel_estimation(test,train_n,train_p):    
    relevance_score=[]
    result_n=[]
    result_p=[]   

    X_n=np.array(train_n)   
    X_p=np.array(train_p)
    Y=np.array(test)
    
    #params = {'bandwidth': np.logspace(-1, 1, 20)}
    #grid = GridSearchCV(KernelDensity(), params)
    #grid.fit(X_n)
    
    #print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))    
    
    kde_n = KernelDensity(kernel='gaussian', bandwidth=0.999).fit(X_n)
    kde_p = KernelDensity(kernel='gaussian', bandwidth=4.772).fit(X_p)
    for i in range(len(Y)):  
        result_n.append(np.exp(float(str(kde_n.score_samples(Y[i])).replace('[','').replace(']',''))))
        result_p.append(np.exp(float(str(kde_p.score_samples(Y[i])).replace('[','').replace(']',''))))
        if i%1000==0:
            print i      
    
    for i in range(len(result_n)): 
        if result_n[i]==0.0:
            relevance_score.append(np.log(result_p[i]/1.8404e-17+1))
        else:
            relevance_score.append(np.log(result_p[i]/result_n[i]+1))

    return relevance_score
开发者ID:heyunh2015,项目名称:ecnu_cds2015,代码行数:30,代码来源:ltr_kde.py


示例8: sklearn_kde_plot

def sklearn_kde_plot(dataframe, choose_choice, topic_name, fold_num):
    # print(dataframe)
    N = dataframe.values.size
    X = dataframe.values[:, np.newaxis]

    # X_plot = np.linspace(min(dataframe.values), max(dataframe.values), num=500)[:, np.newaxis]
    X_plot = np.linspace(min(dataframe.values), 10, num=500)[:, np.newaxis]                                     # SET THISS
    # X_plot = np.linspace(min(dataframe.values), 10, num=500)[:, np.newaxis]
    # print(min(dataframe.values))
    # print(max(dataframe.values))
    # print(dataframe)

    true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0]) + 0.7 * norm(5, 1).pdf(X_plot[:, 0]))
    fig, ax = plt.subplots()
    # ax.fill(X_plot, true_dens, fc='black', alpha=0.2, label='input distribution')

    # kde = KernelDensity(kernel='gaussian', bandwidth=0.005).fit(X)  # 'tophat', 'epanechnikov'
    kde = KernelDensity(kernel='gaussian', bandwidth=0.01).fit(X)  # 'tophat', 'epanechnikov'              SET THISSSSSSSS
    log_dens = kde.score_samples(X_plot)
    ax.plot(X_plot[:, 0], np.exp(log_dens), '-', label="kernel = '{0}'".format('gaussian'))

    ax.text(6, 0.38, "N={0} points".format(N))
    ax.legend(loc='upper right')
    # ax.plot(X[:, 0], -0.005 - 0.0005 * np.random.random(X.shape[0]), '+k')
    ax.plot(X[:, 0], -0.005 - 0.005 * np.random.random(X.shape[0]), '+k')

    # ax.set_xlim(min(dataframe.values), max(dataframe.values))
    ax.set_xlim(0, 10)                                                                                      # SET THISSSSSSSS
    # ax.set_ylim(-0.02, 1)
    ax.set_ylim(-0.02, 1.0)                                                                                 # SET THISSSSSSSS
    ax.set_xlabel("Delta Follower")
    ax.set_ylabel("Density")
    plt.title('Density - ' + choose_choice + ' (' + topic_name + ', ' + fold_num + ')')
    plt.show()
    return
开发者ID:shredktp,项目名称:follower_tweet_analysis,代码行数:35,代码来源:08_Kernel_Density_Estimation.py


示例9: basic_properties

def basic_properties( sequences , axess=None, labl = None, logscale=[False], markr='.', clr='k',offset=0, alfa = 0.8,
                      distir = [False,False,False, False], bandwidths = [3, 0.1,0.01,1], limits = [(1,50),(0,1),(0,1),(1,25)] ):
    if axess is None:
        fig,axess = plt.subplots( 3, len(sequences),False,False, squeeze=False,figsize=(len(sequences)*3,8))#'col'
    plt.subplots_adjust(left=0.12, bottom=0.05, right=0.95, top=0.94,   wspace=0.28, hspace=0.1)
    plt.subplots_adjust(left=0.45, bottom=0.05, right=0.95, top=0.94,   wspace=0.28, hspace=1.2)

    for i in range(0,len(sequences)):
        ax = axess[offset][i]
        seq = sequences[i]
        smax =max(seq)
        smin =min(seq)

        if distir[i]==0:
            #print seq
            freqs , bin_edges = np.histogram(seq,  smax+1 if smax>1 else 100, range = (0,smax+1) if smax>1 else (0,smax))#, normed = True, density=True)
            bin_centers =  (bin_edges[:-1] + bin_edges[1:])/2.
            vals = range(0,smax+1) if smax>1 else bin_centers
            freqs=freqs*1.0/sum(freqs)
            #remove zeros
            y = np.array(freqs)
            nz_indexes = np.nonzero(y)
            y = y[nz_indexes]
            x = np.array(vals)[nz_indexes]
            ax.plot(x, y,':', label=labl, alpha =alfa, color = clr ,  marker ='.')
        else :
            X = np.array(seq)
            X = [ x for x in X if x>=limits[i][0] and x<=limits[i][1]]
    #         X= (np.abs(X))
#             print len(X)
            X = np.random.choice(X, size=min(10000, len(X)))
            X = X[:, np.newaxis]
            kde = KernelDensity(kernel = 'gaussian', bandwidth=bandwidths[i]).fit(X)#,atol=atols[i],kernel = 'tophat'kernel='gaussian'
#             if 'x' in logscale[i] : 
#                 X_plot = np.logspace( limits[i][0],  limits[i][1], 1000)[:, np.newaxis]
#             else :
            X_plot = np.linspace(limits[i][0], limits[i][1], 1000)[:, np.newaxis]
    
            log_dens = kde.score_samples(X_plot) #
    #         ax.fill(X_plot[:, 0], np.exp(log_dens), alpha =0.5, label=labl)
            Y  =  np.exp(log_dens)
            if  distir[i]==2: Y = np.cumsum(Y)
            ax.plot(X_plot[:, 0],Y, '-',label=labl, alpha =alfa, color = clr ,markersize=2,  marker ='')
    
            verts = [(limits[i][0]-1e-6, 0)] + list(zip(X_plot[:, 0],Y)) + [(limits[i][1]+1e-6, 0)]
            poly = Polygon(verts, facecolor=clr,  alpha =alfa ) #, edgecolor='0.5')
            ax.add_patch(poly)
    #         ax.set_yticks([])
    #         ax.set_ylim(bottom=-0.02)
            ax.set_xlim(limits[i][0],limits[i][1])
            
        if len(logscale)==len(sequences): 
            if 'x' in logscale[i] : 
                ax.set_xscale('log')
            if 'y' in logscale[i] : 
                ax.set_yscale('log')
                if i<3: ax.set_ylim(bottom=0.001)
#         ax.legend()
#         plt.show(block=False)
    return axess
开发者ID:rabbanyk,项目名称:FARZ,代码行数:60,代码来源:draw_plots.py


示例10: pdf

def pdf(data: list):
    # hist, bin = np.histogram(data, bins=50)
    # return hist
    kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit([[x] for x in data])
    b = [[x] for x in np.linspace(min(data), max(data), 100)]
    a = np.exp(kde.score_samples(b))

    return a
开发者ID:lkkim-skku,项目名称:OneClass,代码行数:8,代码来源:GraphPlot.py


示例11: find_max_density_point

def find_max_density_point(point_list):
    point_list, _ = remove_nan(point_list)
    if point_list.shape[0] == 0:
        return [float('nan'),float('nan'),float('nan')]
    kde = KernelDensity(kernel='gaussian', bandwidth=0.01).fit(point_list)
    prob_list = kde.score_samples(point_list)
    max_point = point_list[np.argmax(prob_list)]
    # print "max", max_point
    return max_point
开发者ID:goolygu,项目名称:ros-deep-vision,代码行数:9,代码来源:distribution.py


示例12: histLine

def histLine(axes, data, minmax, color):
	(xmin, xmax) = minmax
	data = data.reshape(-1, 1)
	kde = KernelDensity(bandwidth=(xmax-xmin)/100.0).fit(data)
	x = np.linspace(xmin, xmax, 100).reshape(-1, 1)
	foo = kde.score_samples(x)
	density = np.exp(foo)

	axes.plot(x, density, color=color)
开发者ID:dr-dos-ok,项目名称:kaggle-higgs-boson,代码行数:9,代码来源:plot2.py


示例13: createfeatmat

def createfeatmat(N):
    grid = getgridcoords(N).T
    featmat = np.zeros((len(vals), N ** 2))
    for i in range(len(vals)):
        m = np.array([vals[i][0], vals[i][1]]).T
        k = KernelDensity(bandwidth=0.5 / (N - 1), kernel="gaussian")
        k.fit(m)
        featmat[i, :] = k.score_samples(grid)
    return featmat
开发者ID:elifriedman,项目名称:CauseEffectChallenge,代码行数:9,代码来源:distgridmodel.py


示例14: estimate_distribution

def estimate_distribution(samples, h=0.1, n_points=100):
	kde = KernelDensity(bandwidth=h)
	min_xs = min(samples)
	max_xs = max(samples)
	samples = samples[:, np.newaxis]
	kde.fit(samples)
	xs = np.linspace(min_xs, max_xs, n_points)
	ys = np.exp(kde.score_samples(xs[:, np.newaxis]))
	print xs.shape, ys.shape, sum(ys)
	return xs, ys
开发者ID:rnowling,项目名称:pop-gen-models,代码行数:10,代码来源:plot_sampled_loci_log_prob.py


示例15: kde_sklearn

def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
    """Kernel Density Estimation with Scikit-learn"""
    kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
    kde_skl.fit(x[:, np.newaxis])
    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
    
    N = np.trapz(np.exp(log_pdf), x_grid)

    return np.exp(log_pdf)/N
开发者ID:astroclark,项目名称:grbeams,代码行数:10,代码来源:beamingangle.py


示例16: fit

    def fit(self, X, y):
        a = np.zeros((24, 7))
        hours = np.copy(X[:, 1])
        weekdays = np.copy(X[:, 2])
        hours = 23 * normalize(hours)
        weekdays = 6 * normalize(weekdays)

        if self.strategy == 'mean':
            counts = a.copy()
            for i, row in enumerate(zip(hours, weekdays)):
                hour = int(row[0])
                day = int(row[1])
                counts[hour, day] += 1
                a[hour, day] += y[i]

            counts[counts == 0] = 1
            self._model = a / counts

        elif self.strategy in ('median', 'kernel'):

            # this is a 3d array 
            groups = [[[] for i in range(7)] for j in range(24)]

            for i, row in enumerate(zip(hours, weekdays)):
                hour = int(row[0])
                day = int(row[1])
                groups[hour][day].append(y[i])

            if self.strategy == 'median':
                for i, j in np.ndindex((24, 7)):
                    if groups[i][j]:
                        a[i,j] = np.median(groups[i][j])
                    else:
                        a[i,j] = np.nan
            elif self.strategy == 'kernel':
                # kernel method computes a kernel density for each of the
                # bins and determines the most probably value ('mode' of sorts)
                grid = np.linspace(np.min(y), np.max(y), 1000)[:, np.newaxis]
                for i, j in np.ndindex((24, 7)):
                    if groups[i][j]:
                        npgroups = np.array(groups[i][j])[np.newaxis]
                        kernel = KernelDensity(kernel='gaussian', \
                                                bandwidth=0.2).fit(npgroups.T)
                        density = kernel.score_samples(grid)
                        dmax = np.max(density)
                        imax = np.where(density==dmax)
                        a[i,j] = grid[imax, 0]
                    else:
                        a[i,j] = np.nan

            self._model = a

        # smooth the model here if there are nans
        return self
开发者ID:CenterForTheBuiltEnvironment,项目名称:mave,代码行数:54,代码来源:estimators.py


示例17: plot_stan_trc

def plot_stan_trc(dftrc):
    """
       Create simple plots of parameter distributions and traces from 
       output of pystan sampling. Emulates pymc traceplots.
    """

    fig, ax2d = plt.subplots(nrows=dftrc.shape[1], ncols=2, figsize=(14, 1.8*dftrc.shape[1]),
                                facecolor='0.99', edgecolor='k')
    fig.suptitle('Distributions and traceplots for {} samples'.format(
                                dftrc.shape[0]),fontsize=14)
    fig.subplots_adjust(wspace=0.2, hspace=0.5)

    k = 0
    
    # create density and traceplot, per parameter coeff
    for i, (ax1d, col) in enumerate(zip(ax2d, dftrc.columns)):

        samples = dftrc[col].values
        scale = (10**np.round(np.log10(samples.max() - samples.min()))) / 20
        kde = KernelDensity(bandwidth=scale).fit(samples.reshape(-1, 1))
        x = np.linspace(samples.min(), samples.max(), 100).reshape(-1, 1)
        y = np.exp(kde.score_samples(x))
        clr = sns.color_palette()[0]

        # density plot
        ax1d[0].plot(x, y, color=clr, linewidth=1.4)
        ax1d[0].vlines(np.percentile(samples, [2.5, 97.5]), ymin=0, ymax=y.max()*1.1,
                       alpha=1, linestyles='dotted', colors=clr, linewidth=1.2)
        mn = np.mean(samples)
        ax1d[0].vlines(mn, ymin=0, ymax=y.max()*1.1,
                       alpha=1, colors='r', linewidth=1.2)
        ax1d[0].annotate('{:.2f}'.format(mn), xy=(mn,0), xycoords='data'
                    ,xytext=(5,10), textcoords='offset points', rotation=90
                    ,va='bottom', fontsize='large', color='#AA0022')    
        ax1d[0].set_title('{}'.format(col), fontdict={'fontsize':10})


        # traceplot
        ax1d[1].plot(np.arange(len(samples)),samples, alpha=0.2, color=clr, linestyle='solid'
                              ,marker=',', markerfacecolor=clr, markersize=10)
        ax1d[1].hlines(np.percentile(samples,[2.5, 97.5]), xmin=0, xmax=len(samples),
                       alpha=1, linestyles='dotted', colors=clr)
        ax1d[1].hlines(np.mean(samples), xmin=0, xmax=len(samples), alpha=1, colors='r')

        k += 1
                
        ax1d[0].set_title('{}'.format(col), fontdict={'fontsize':14})#,'fontweight':'bold'})
        #ax1d[0].legend(loc='best', shadow=True)
        
        _ = [ax1d[j].axes.grid(True, linestyle='-', color='lightgrey') for j in range(2)]
            
    plt.subplots_adjust(top=0.94)
    plt.show()
开发者ID:Volodymyrk,项目名称:pymc3_vs_pystan,代码行数:53,代码来源:convenience_functions.py


示例18: densityEst

def densityEst(a,x,p,knn=1,Mode='G'):
	""" This is a density estimation currently supporting
	 one-dimensional Data.
	 There are two modes of operation:
	 knn==0 (Default) use fixed bandwidth.
	 knn==1 use k nearest neigbors.
	 Tow types of kernel are supported:
	 Mode=='T' (Default) for triangular.
	 Mode=='G' for Gaussian.
	 a is a vector of samples.
	 p is the parameter of model (bandwidth when knn=0 of number of neighbors
	 otherwise.
	 x is  points of estimation
	"""
	N=len(x)
	x.resize(N,1)
	l=len(a)
	a=num.array(a)
	a.resize(l,1)
	if knn==0:
		try:
			from sklearn.neighbors.kde import KernelDensity
		except ImportError:
			print 'Error:Please install sklearn package...'
			return
		if Mode=='T':
			S='linear'
		elif Mode=='G':
			S='gaussian'
		else:
			print 'Currently only G(gaussian) and T(triangular) Modes are supported'
			return
		kde = KernelDensity(kernel=S, bandwidth=p).fit(a)
		return (x,num.exp(kde.score_samples(x)))
	elif knn==1:
		try:
			from sklearn.neighbors import NearestNeighbors
		except ImportError:
			print 'Error:Please install sklearn package...'
			return
		neigh = NearestNeighbors(n_neighbors=p)
		neigh.fit(a)
		dist,index=neigh.kneighbors(x)
		H=dist[:,-1]
		est=[0.0]*N
		for i,point_v in enumerate(x):
			point=point_v[0]
			h=H[i]
			est[i]=sum(kernel((a-point)/h,Mode))/(l*h)
		return (x,est)
	else:
		print 'knn must be 0 or 1'
		return
开发者ID:kamalshadi,项目名称:NDTdataProcessing,代码行数:53,代码来源:spd.py


示例19: train_rlos

def train_rlos(data, show_chart=False):
    """Train LOS estimator"""
    """Train patient LOS for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        rlos = int(row["rlos"])

        if rlos == 0:
            print "RLOS equals zero for sex %d, age %d, SL %s" % (sex, age, sline)

        tuple = (sex, age, sline)
        freq.setdefault(tuple, [])
        freq[tuple].append(rlos)

    result = {}
    for tuple, train_data in freq.items():
        (sex, age, sline) = tuple
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("RLOS train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(samples)

            plt.show()

    return result
开发者ID:andrewshir,项目名称:CollIntel,代码行数:53,代码来源:DensityEstimation.py


示例20: pda_single

def pda_single(synth_data, data, bandwidth=.1):
    #synth_data = np.log(np.abs(synth_data))[:, np.newaxis]
    #data_log = np.log(np.abs(data))[:, np.newaxis]
    synth_data = synth_data[:, np.newaxis]
    data = data[:, np.newaxis]
    if bandwidth == 'silverman':
        lower, upper = scoreatpercentile(synth_data, [25, 75])
        iqr = upper - lower
        sd = np.std(synth_data)
        bandwidth = .9 * min(sd, iqr/1.34) * len(data)**(-1./5)

    kde = KernelDensity(kernel='epanechnikov', bandwidth=bandwidth).fit(synth_data)
    return kde.score_samples(data)
开发者ID:twiecki,项目名称:accumodel,代码行数:13,代码来源:likelihoods.py



注:本文中的sklearn.neighbors.kde.KernelDensity类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python neural_network.BernoulliRBM类代码示例发布时间:2022-05-27
下一篇:
Python kd_tree.KDTree类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap