• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python numpy.bincount函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中numpy.bincount函数的典型用法代码示例。如果您正苦于以下问题:Python bincount函数的具体用法?Python bincount怎么用?Python bincount使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了bincount函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_stratified_shuffle_split_iter

def test_stratified_shuffle_split_iter():
    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
          np.array([-1] * 800 + [1] * 50)
          ]

    for y in ys:
        sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33,
                                          random_state=0)
        test_size = np.ceil(0.33 * len(y))
        train_size = len(y) - test_size
        for train, test in sss:
            assert_array_equal(np.unique(y[train]), np.unique(y[test]))
            # Checks if folds keep classes proportions
            p_train = (np.bincount(np.unique(y[train],
                                   return_inverse=True)[1]) /
                       float(len(y[train])))
            p_test = (np.bincount(np.unique(y[test],
                                  return_inverse=True)[1]) /
                      float(len(y[test])))
            assert_array_almost_equal(p_train, p_test, 1)
            assert_equal(len(train) + len(test), y.size)
            assert_equal(len(train), train_size)
            assert_equal(len(test), test_size)
            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:27,代码来源:test_cross_validation.py


示例2: stats

 def stats(self, out_counts, out_adj, adj_index = string.ascii_letters + ' '):
   """Given two input arrays this adds to them the statistics of the contained text. The first array is of length 256, and counts the instances of character codes. The second array is 2D, with ['a', 'b'] being the number of times a 'b' follows an 'a'. It is indexed by adj_index however, and character pairs that contain a character not included are not counted."""
   
   # Counts are relativly easy - convert and histogram...
   text_codes = numpy.fromstring(self.text.encode('utf8'), dtype=numpy.uint8)
   out_counts += numpy.bincount(text_codes, minlength=256)
   
   # Adjacencies require a little more sneakyness...
   # First convert the codes array into an index into the adj_index, with entrys that are not in it set to -1...
   adj_codes = numpy.fromstring(adj_index, dtype=numpy.uint8)
   
   cap = len(adj_index) * len(adj_index)
   conversion = numpy.empty(256, dtype=numpy.int64)
   conversion[:] = cap
   conversion[adj_codes] = numpy.arange(adj_codes.shape[0])
   
   text_codes = conversion[text_codes]
   
   # Now take adjacent pairs, and calculate the 1D index in out_adj matrix...
   pos = (text_codes[:-1] * len(adj_index)) + text_codes[1:]
   
   # Lose values that are too large - they are pairs we do not record...
   pos = pos[pos < cap]
   
   # Histogram and sum into the adjacency matrix...
   if pos.shape[0]>0:
     out_adj += numpy.bincount(pos, minlength=cap).reshape((len(adj_index),len(adj_index)))
开发者ID:eosbamsi,项目名称:helit,代码行数:27,代码来源:block.py


示例3: check_min_samples_leaf

def check_min_samples_leaf(name):
    X, y = hastie_X, hastie_y

    # Test if leaves contain more than leaf_count training examples
    ForestEstimator = FOREST_ESTIMATORS[name]

    # test boundary value
    assert_raises(ValueError,
                  ForestEstimator(min_samples_leaf=-1).fit, X, y)
    assert_raises(ValueError,
                  ForestEstimator(min_samples_leaf=0).fit, X, y)

    est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), 4,
                   "Failed with {0}".format(name))

    est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
                          random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), len(X) * 0.25 - 1,
                   "Failed with {0}".format(name))
开发者ID:henrywoo,项目名称:scikit-learn,代码行数:30,代码来源:test_forest.py


示例4: test_stratified_shuffle_split_iter

def test_stratified_shuffle_split_iter():
    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
          np.array([-1] * 800 + [1] * 50),
          np.concatenate([[i] * (100 + i) for i in range(11)]),
          [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
          ['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],
          ]

    for y in ys:
        sss = StratifiedShuffleSplit(6, test_size=0.33,
                                     random_state=0).split(np.ones(len(y)), y)
        y = np.asanyarray(y)  # To make it indexable for y[train]
        # this is how test-size is computed internally
        # in _validate_shuffle_split
        test_size = np.ceil(0.33 * len(y))
        train_size = len(y) - test_size
        for train, test in sss:
            assert_array_equal(np.unique(y[train]), np.unique(y[test]))
            # Checks if folds keep classes proportions
            p_train = (np.bincount(np.unique(y[train],
                                   return_inverse=True)[1]) /
                       float(len(y[train])))
            p_test = (np.bincount(np.unique(y[test],
                                  return_inverse=True)[1]) /
                      float(len(y[test])))
            assert_array_almost_equal(p_train, p_test, 1)
            assert_equal(len(train) + len(test), y.size)
            assert_equal(len(train), train_size)
            assert_equal(len(test), test_size)
            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
开发者ID:alexandercbooth,项目名称:scikit-learn,代码行数:33,代码来源:test_split.py


示例5: update_nogrid

    def update_nogrid(self, params):

        endog = self.model.endog_li
        cached_means = self.model.cached_means
        varfunc = self.model.family.variance

        dep_params = np.zeros(self.max_lag + 1)
        dn = np.zeros(self.max_lag + 1)
        for i in range(self.model.num_group):

            expval, _ = cached_means[i]
            stdev = np.sqrt(varfunc(expval))
            resid = (endog[i] - expval) / stdev

            j1, j2 = np.tril_indices(len(expval))
            dx = np.abs(self.time[i][j1] - self.time[i][j2])
            ii = np.flatnonzero(dx <= self.max_lag)
            j1 = j1[ii]
            j2 = j2[ii]
            dx = dx[ii]

            vs = np.bincount(dx, weights=resid[
                             j1] * resid[j2], minlength=self.max_lag + 1)
            vd = np.bincount(dx, minlength=self.max_lag + 1)

            ii = np.flatnonzero(vd > 0)
            dn[ii] += 1
            if len(ii) > 0:
                dep_params[ii] += vs[ii] / vd[ii]

        dep_params /= dn
        self.dep_params = dep_params[1:] / dep_params[0]
开发者ID:Bonfils-ebu,项目名称:statsmodels,代码行数:32,代码来源:cov_struct.py


示例6: uniform_paa

            def uniform_paa(directory, min_time, max_time, bin_count, timeseries_index, attribute_index):
                """
                Create waveforms using a piecewise aggregate approximation.

                :param directory: working directory for the timeseries
                :param min_time:
                :param max_time:
                :param bin_count:
                :param timeseries_index:
                :param attribute_index:
                :return: computed time series
                """
                import h5py
                import numpy
                import os
                import slycat.hdf5

                bin_edges = numpy.linspace(min_time, max_time, bin_count + 1)
                bin_times = (bin_edges[:-1] + bin_edges[1:]) / 2
                with h5py.File(os.path.join(directory, "timeseries-%s.hdf5" % timeseries_index), "r") as file:
                    original_times = slycat.hdf5.ArraySet(file)[0].get_data(0)[:]
                    original_values = slycat.hdf5.ArraySet(file)[0].get_data(attribute_index + 1)[:]
                bin_indices = numpy.digitize(original_times, bin_edges[1:])
                bin_counts = numpy.bincount(bin_indices, minlength=bin_count + 1)[1:]
                bin_sums = numpy.bincount(bin_indices, original_values, minlength=bin_count + 1)[1:]
                lonely_bins = (bin_counts < 2)
                bin_counts[lonely_bins] = 1
                bin_sums[lonely_bins] = numpy.interp(bin_times, original_times, original_values)[lonely_bins]
                bin_values = bin_sums / bin_counts
                return {
                    "input-index": timeseries_index,
                    "times": bin_times,
                    "values": bin_values,
                }
开发者ID:sandialabs,项目名称:slycat,代码行数:34,代码来源:slycat-agent-compute-timeseries.py


示例7: relaxation

def relaxation(nodes, links):
    """ Gauss-Seidel relaxation for links """

    sources_idx = links["source"]
    targets_idx = links["target"]
    sources = nodes[sources_idx]
    targets = nodes[targets_idx]
    distances = links["distance"]
    strengths = links["strength"]

    D = targets["position"] - sources["position"]
    L = np.sqrt((D * D).sum(axis=1))

    # This avoid to test L != 0 (I = np.where(L>0))
    L = np.where(L, L, np.NaN)
    L = strengths * (L - distances) / L

    # Replace nan by 0, i.e. where L was 0
    L = np.nan_to_num(L)

    D *= L.reshape(len(L), 1)
    K = sources["weight"] / (sources["weight"] + targets["weight"])
    K = K.reshape(len(K), 1)

    # Note that a direct  nodes['position'][links['source']] += K*D*(1-F)
    # would't work as expected because of repeated indices
    F = nodes["fixed"][sources_idx].reshape(len(links), 1)
    W = K * D * (1 - F) * 0.1
    nodes["position"][:, 0] += np.bincount(sources_idx, W[:, 0], minlength=len(nodes))
    nodes["position"][:, 1] += np.bincount(sources_idx, W[:, 1], minlength=len(nodes))

    F = nodes["fixed"][targets_idx].reshape(len(links), 1)
    W = (1 - K) * D * (1 - F) * 0.1
    nodes["position"][:, 0] -= np.bincount(targets_idx, W[:, 0], minlength=len(nodes))
    nodes["position"][:, 1] -= np.bincount(targets_idx, W[:, 1], minlength=len(nodes))
开发者ID:Eric89GXL,项目名称:gl-agg,代码行数:35,代码来源:demo-graph.py


示例8: sanity_checks

def sanity_checks(R):
    #extract out condition names and assess that we have nice uniform time point distributions
    condnames = np.array([x[0] for x in list(R.columns)])
    tps = np.array([x[1] for x in list(R.columns)])
    conds = np.unique(condnames)
    #commence sanity checks
    if len(conds)!=2:
        sys.stderr.write('ERROR: More than two treatment specifications detected. Exiting\n')
        sys.exit(1)
    if np.sum(condnames==conds[0])!=np.sum(condnames==conds[1]):
        sys.stderr.write('ERROR: Unbalanced number of data points between the two treatments. Exiting\n')
        sys.exit(1)
    #okay, so if we made it this far we have the same number of data points and only two condition names
    tp_conds = []
    for cond in conds:
        cond_tps = tps[condnames==cond]
        #we should have the same exact number of reps per time point
        #so if we count up how many reps we have per time point, there should only be one unique value
        tpholder, inverse = np.unique(cond_tps, return_inverse=True)
        if len(np.unique(np.bincount(inverse)))!=1:
            sys.stderr.write('ERROR: Non-uniformity of time points for replicates detected in condition '+cond+'. Exiting\n')
            sys.exit(1)
        #well, if not, then we're fine and can store information
        tp_conds.append(tpholder)
        Nrepl = np.bincount(inverse)[0]
    #one last sanity check - are the time points the same?
    if not np.array_equal(tp_conds[0],tp_conds[1]):
        sys.stderr.write('ERROR: Different time points specified across the two treatments. Exiting\n')
        sys.exit(1)
开发者ID:cyversewarwick,项目名称:gp2s,代码行数:29,代码来源:run_two_sample.py


示例9: infer_labels

def infer_labels(x, wu, wp, z=None, y=None):
    t_max = []
    E_max = -1000000000
    for t0 in [1,2,3]:
        for t1 in [1,2,3]:
            for t2 in [1,2,3]:
                for t3 in [1,2,3]:
                    for t4 in [1,2,3]:
                        t = [t0,t1,t2,t3,t4]
                        if z is not None:
                            if not np.all(np.bincount(t) == z):
                                continue
                        E = compute_energy(x,wu,wp,t)
                        if y is not None:
                            if y.full_labeled:
                                E += np.sum(t!=y.full)
                            else:
                                w1 = np.zeros(4)
                                w2 = np.zeros(4)
                                tw = np.bincount(t)
                                w1[:tw.shape[0]] = tw
                                w2[:y.weak.shape[0]] = y.weak
                                E += np.sum(np.abs(w1 - w2))
                        if E > E_max:
                            t_max = t
                            E_max = E
    
    return t_max
开发者ID:aiwagan,项目名称:latent_ssvm,代码行数:28,代码来源:simple_dataset.py


示例10: _elbo_grad_common

    def _elbo_grad_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd,
                          vc_mean, vc_sd):

        # p(vc | vcp) contributions
        m = vcp_mean[self.ident]
        s = vcp_sd[self.ident]
        u = vc_mean**2 + vc_sd**2
        ve = np.exp(2*(s**2 - m))
        dm = u * ve - 1
        ds = -2 * u * ve * s
        vcp_mean_grad = np.bincount(self.ident, weights=dm)
        vcp_sd_grad = np.bincount(self.ident, weights=ds)

        vc_mean_grad = -vc_mean.copy() * ve
        vc_sd_grad = -vc_sd.copy() * ve

        # p(vcp) contributions
        vcp_mean_grad -= vcp_mean / self.vcp_p**2
        vcp_sd_grad -= vcp_sd / self.vcp_p**2

        # p(b) contributions
        fep_mean_grad = -fep_mean.copy() / self.fe_p**2
        fep_sd_grad = -fep_sd.copy() / self.fe_p**2

        return (fep_mean_grad, fep_sd_grad, vcp_mean_grad, vcp_sd_grad,
                vc_mean_grad, vc_sd_grad)
开发者ID:BranYang,项目名称:statsmodels,代码行数:26,代码来源:bayes_mixed_glm.py


示例11: _bincount_mapper

def _bincount_mapper(ex, tiles, minlength=None):
  if len(tiles) > 1:
    result = np.bincount(tiles[0], weights=tiles[1], minlength=minlength)
  else:
    result = np.bincount(tiles[0], minlength=minlength)
  result_ex = extent.from_shape(result.shape)
  yield result_ex, result
开发者ID:rgardner,项目名称:spartan,代码行数:7,代码来源:builtins.py


示例12: joint_and_marginals

def joint_and_marginals(labels1,labels2,smoothing=0.0):
    """
    marginal and joint distributions for a sequence of observations
    from a pair of disrete random variables, with additive smoothing on the
    joint distribution and the marginals in such a way that 
    marginal(smooth(conditional)) = smooth(marginal)
    """
    smoothing = float(smoothing)
    if len(labels1) != len(labels2):
        raise ValueError("label lists must have the same length")
    
    set1 = set(labels1)
    set2 = set(labels2)
    n1 = len(set1)
    n2 = len(set2)
    l2i1 = dict(zip(set1,range(len(set1))))
    l2i2 = dict(zip(set2,range(len(set2))))
    l1 = array([l2i1[l] for l in labels1])
    l2 = array([l2i2[l] for l in labels2])
    
    d1 = bincount(l1) + smoothing*n2
    d1 = d1/d1.sum()
    d2 = bincount(l2) + smoothing*n1
    d2 = d2/d2.sum()
    
    a_true = full((n1,n2),smoothing)
    for i,j in zip(l1,l2):
        a_true[i,j] += 1.0
    a_true = a_true/a_true.sum()
    
    return d1,d2,a_true
开发者ID:mattHawthorn,项目名称:carefree-automated-language-models,代码行数:31,代码来源:distributions.py


示例13: get_indicator

    def get_indicator(tm):
        if isinstance(tm,np.ndarray):
            edges_plain = np.zeros_like(image)

            counts = np.bincount(tm.ravel())
            for c in xrange(image.shape[2]):
                vals = np.bincount(tm.ravel(),image[:,:,c].ravel())
                edges_plain[:,:,c] = (vals/counts)[tm]
                
            return edges_plain
        else:
            indicator = np.zeros(image_lab.shape[:2]+(3,),np.float32)
            indicator_map = tm.copy_map_for_image(indicator)
            
            tm_color = tm.copy_map_for_image(image)        
            
            #data_management.add_array('diff_mat',diff_mat) steps,precondition_runs,accept_ratio
            for loc in xrange(len(tm)):
                key = tm.key_from_index(loc)
                im_data = np.reshape(tm_color[key],(-1,3))
                color = np.mean(im_data,axis=0)
                
                for c in xrange(color.shape[0]):
                    indicator_map[key][:,:,c] = color[c]
                
            return indicator
开发者ID:ylockerman,项目名称:multi-scale-label-map-extraction,代码行数:26,代码来源:SLIC_compare_video.py


示例14: make_batch_prediction_ensemble

 def make_batch_prediction_ensemble(self,phi_x):
     m, nsub, nfeat = np.shape(phi_x);
     hat = np.zeros(m);
     sub_hat = self.predictor.predict(np.reshape(phi_x,(m*nsub,nfeat)));
     #TODO:return_rec
     for i in range(m):
         votes = sub_hat[i*nsub:(i+1)*nsub]
         vote_bins = np.bincount(votes);
         vote_bins = np.append(vote_bins,np.zeros(7-np.size(vote_bins)));
         vote_bins_sort = np.sort(vote_bins);
         vote_bins_sort = vote_bins_sort[::-1]; #Descending
         #if vote_bins_sort[0] - vote_bins_sort[1] <= 1:
             #Small margit vote. use back-up predictor
         if vote_bins_sort[0] - vote_bins_sort[1] == 1:
             #Retest ties
             tie_votes = self.tie_predictor.predict(phi_x[i,:,:]);
             tie_vote_bins = np.bincount(tie_votes) #Ensemble: aggregate votes
             tie_vote_bins = np.append(tie_vote_bins,np.zeros(7-np.size(tie_vote_bins)));
             total_vote_bins = tie_vote_bins + 1.1*vote_bins; #tie breaker is rbf
             tie_maxvote = np.max(total_vote_bins); #Get highest vote total
             tie_argmaxx = np.where(np.array(total_vote_bins)==tie_maxvote)[0]; #Find all regions with that vote total
             if np.size(tie_argmaxx)>1:
                 hat[i] = np.random.choice(tie_argmaxx);
             else:
                 #No Tie
                 hat[i] = tie_argmaxx[0];
         else:
             hat[i]=np.argmax(vote_bins);
     return hat
开发者ID:jsun2013,项目名称:AudioLocator,代码行数:29,代码来源:audiolearning.py


示例15: plot_val_train

def plot_val_train(list_train,fig_name,epoch):
   import matplotlib.pyplot as plt

   if(epoch==-1):
       nd=numpy.array([[int(b), int(c), d] for (b, c, d) in list_train]) #all error
       idx=map(int,nd[:,0])
       err=nd[:,2]
       y=numpy.bincount(idx, err)[1:len(idx)+1] / np.bincount(idx)[1:len(idx)+1]
       x =[x+1 for x in range(len(y))]
       plt.title('Train Error change with epoch')
       plt.xlabel('epoch (x)')
   else:
       y = numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,2] #all error
       x =numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,1]  #all error
       plt.title('Train Error change with minibatch')
       plt.xlabel('minibatc (x)')


   plt.plot(x, y)
   plt.ylabel('error (y)')
   plt.grid(True)
   #f.subplots_adjust(hspace=0)
   plt.savefig(fig_name)
   #plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
   plt.show()
开发者ID:Seleucia,项目名称:CNNRNet,代码行数:25,代码来源:plot_data.py


示例16: remove_wrongly_sized_connected_components

 def remove_wrongly_sized_connected_components(self, a, min_size, max_size, in_place):
     """
     Adapted from http://github.com/jni/ray/blob/develop/ray/morpho.py
     (MIT License)
     """
     bin_out = self.BinaryOut.value
     
     original_dtype = a.dtype
         
     if not in_place:
         a = a.copy()
     if min_size == 0 and (max_size is None or max_size > numpy.prod(a.shape)): # shortcut for efficiency
         return a
     
     try:
         component_sizes = numpy.bincount( a.ravel() )
     except TypeError:
         # On 32-bit systems, must explicitly convert from uint32 to int
         # (This fix is just for VM testing.)
         component_sizes = numpy.bincount( numpy.asarray(a.ravel(), dtype=int) )
     bad_sizes = component_sizes < min_size
     if max_size is not None:
         numpy.logical_or( bad_sizes, component_sizes > max_size, out=bad_sizes )
     
     bad_locations = bad_sizes[a]
     a[bad_locations] = 0
     if (bin_out):
         # Replace non-zero values with 1
         numpy.place(a,a,1)
     return numpy.array(a, dtype=original_dtype)
开发者ID:JensNRAD,项目名称:lazyflow,代码行数:30,代码来源:opFilterLabels.py


示例17: _make_cm

def _make_cm(X,M,R):
    N = len(X)

    # we pregenerate all indices
    
    i_idx,j_idx  = np.triu_indices(N - M)
    
    # We start by making Cm
    Em = _embed_seq(X, 1, M)
    dif =  np.abs(Em[i_idx] - Em[j_idx])
    max_dist = np.max(dif, 1)
    inrange_cm = max_dist <= R


    in_range_i = i_idx[inrange_cm]
    in_range_j = j_idx[inrange_cm]


    Cm = np.bincount(in_range_i, minlength=N-M+1)
    Cm += np.bincount(in_range_j, minlength=N-M+1)

    inrange_last = np.max(np.abs(Em[:-1] - Em[-1]),1) <= R
    Cm[inrange_last] += 1
    # all matches + self match
    Cm[-1] += np.sum(inrange_last) + 1

    return Cm.astype(np.float), in_range_i, in_range_j
开发者ID:StellaAthena,项目名称:pyrem,代码行数:27,代码来源:univariate.py


示例18: get_events_number

def get_events_number(data, id_column='event_id'):
    """
    :return: number of B events
    """
    _, data_ids = numpy.unique(data[id_column], return_inverse=True)
    weights = numpy.bincount(data_ids, weights=data.N_sig_sw) / numpy.bincount(data_ids)
    return numpy.sum(weights)
开发者ID:tata-antares,项目名称:tagging_LHCb,代码行数:7,代码来源:utils.py


示例19: display_roc

def display_roc():
    thresholds = np.linspace(0, 1, 21)
    for hash_name in hash_names:
        tpr = []
        fpr = []
        with open(hash_name + ".same", 'r+b') as f:
            same_family_dm = np.array(cPickle.load(f))
        same_family_uniqw, same_family_inverse = np.unique(same_family_dm, return_inverse=True)
        same_family_dmlist = dict(zip(same_family_uniqw, np.bincount(same_family_inverse)))
        with open(hash_name + ".diff", 'r+b') as f:
            diff_family_dm = np.array(cPickle.load(f))
        diff_family_uniqw, diff_family_inverse = np.unique(diff_family_dm, return_inverse=True)
        diff_family_dmlist = dict(zip(diff_family_uniqw, np.bincount(diff_family_inverse)))
        for threshold in thresholds:
            tp = fp = 0
            for dm in same_family_dmlist:
                if dm <= threshold:
                    tp += same_family_dmlist[dm]
            for dm in diff_family_dmlist:
                if dm <= threshold:
                    fp += diff_family_dmlist[dm]
            tpr.append(tp*1.0/same_family_dm.size)
            fpr.append(fp*1.0/diff_family_dm.size)
        print sm.auc(fpr, tpr)
        print "Fuzzy hashing algorithm: %s, AUC: %f" %(hash_name, sm.auc(fpr, tpr))
        plt.figure(0)
        plt.plot(fpr, tpr, label=hash_name)
        plt.ylim(0.75, 1)
        plt.legend(loc='best')
        plt.title("ROC curve for different algorithms")
        plt.xlabel("False posive rate")
        plt.ylabel("True posive rate")
    plt.show()
开发者ID:xia0pin9,项目名称:malcluster,代码行数:33,代码来源:fzeval.py


示例20: compute_B_prob_using_part_prob

def compute_B_prob_using_part_prob(data, probs, weight_column='N_sig_sw', event_id_column='event_id', signB_column='signB',
                                   sign_part_column='signTrack', normed_signs=False, prior_probs=None, functor=None):
    """
    Compute p(B+) using probs for parts of event (tracks/vertices).
    
    :param data: pandas.DataFrame, data
    :param probs: probabilities for parts of events, numpy.array of shape [n_samples]
    :param weight_column: column for weights in data
    :param event_id_column: column for event id in data
    :param signB_column: column for event B sign in data
    :param sign_part_column: column for part sign in data
    
    :return: B sign array, B weight array, B+ prob array, B event id
    """
    result_event_id, data_ids = numpy.unique(data[event_id_column].values, return_inverse=True)
    if prior_probs is None:
        log_probs = numpy.log(probs) - numpy.log(1 - probs)
    else:
        new_probs = prior_probs * (1 - probs) + (1 - prior_probs) * probs
        log_probs = numpy.log(new_probs) - numpy.log(1 - new_probs)
    sign_weights = numpy.ones(len(log_probs))
    if normed_signs:
        for sign in [-1, 1]:
            maskB = (data[signB_column].values == sign)
            maskPart = (data[sign_part_column].values == 1)
            sign_weights[maskB * maskPart] = sum(maskB * (~maskPart)) * 1. /  sum(maskB * maskPart)
    log_probs *= sign_weights * data[sign_part_column].values
    result_logprob = numpy.bincount(data_ids, weights=log_probs)
    # simply reconstructing original
    result_label = numpy.bincount(data_ids, weights=data[signB_column].values) / numpy.bincount(data_ids)
    result_weight = numpy.bincount(data_ids, weights=data[weight_column]) / numpy.bincount(data_ids)
    return result_label, result_weight, expit(result_logprob), result_event_id
开发者ID:tata-antares,项目名称:tagging_LHCb,代码行数:32,代码来源:utils.py



注:本文中的numpy.bincount函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python numpy.bitwise_and函数代码示例发布时间:2022-05-27
下一篇:
Python numpy.binary_repr函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap