本文整理汇总了Python中numpy.bincount函数的典型用法代码示例。如果您正苦于以下问题:Python bincount函数的具体用法?Python bincount怎么用?Python bincount使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了bincount函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_stratified_shuffle_split_iter
def test_stratified_shuffle_split_iter():
ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
np.array([-1] * 800 + [1] * 50)
]
for y in ys:
sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33,
random_state=0)
test_size = np.ceil(0.33 * len(y))
train_size = len(y) - test_size
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train],
return_inverse=True)[1]) /
float(len(y[train])))
p_test = (np.bincount(np.unique(y[test],
return_inverse=True)[1]) /
float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(len(train) + len(test), y.size)
assert_equal(len(train), train_size)
assert_equal(len(test), test_size)
assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:27,代码来源:test_cross_validation.py
示例2: stats
def stats(self, out_counts, out_adj, adj_index = string.ascii_letters + ' '):
"""Given two input arrays this adds to them the statistics of the contained text. The first array is of length 256, and counts the instances of character codes. The second array is 2D, with ['a', 'b'] being the number of times a 'b' follows an 'a'. It is indexed by adj_index however, and character pairs that contain a character not included are not counted."""
# Counts are relativly easy - convert and histogram...
text_codes = numpy.fromstring(self.text.encode('utf8'), dtype=numpy.uint8)
out_counts += numpy.bincount(text_codes, minlength=256)
# Adjacencies require a little more sneakyness...
# First convert the codes array into an index into the adj_index, with entrys that are not in it set to -1...
adj_codes = numpy.fromstring(adj_index, dtype=numpy.uint8)
cap = len(adj_index) * len(adj_index)
conversion = numpy.empty(256, dtype=numpy.int64)
conversion[:] = cap
conversion[adj_codes] = numpy.arange(adj_codes.shape[0])
text_codes = conversion[text_codes]
# Now take adjacent pairs, and calculate the 1D index in out_adj matrix...
pos = (text_codes[:-1] * len(adj_index)) + text_codes[1:]
# Lose values that are too large - they are pairs we do not record...
pos = pos[pos < cap]
# Histogram and sum into the adjacency matrix...
if pos.shape[0]>0:
out_adj += numpy.bincount(pos, minlength=cap).reshape((len(adj_index),len(adj_index)))
开发者ID:eosbamsi,项目名称:helit,代码行数:27,代码来源:block.py
示例3: check_min_samples_leaf
def check_min_samples_leaf(name):
X, y = hastie_X, hastie_y
# Test if leaves contain more than leaf_count training examples
ForestEstimator = FOREST_ESTIMATORS[name]
# test boundary value
assert_raises(ValueError,
ForestEstimator(min_samples_leaf=-1).fit, X, y)
assert_raises(ValueError,
ForestEstimator(min_samples_leaf=0).fit, X, y)
est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
est.fit(X, y)
out = est.estimators_[0].tree_.apply(X)
node_counts = np.bincount(out)
# drop inner nodes
leaf_count = node_counts[node_counts != 0]
assert_greater(np.min(leaf_count), 4,
"Failed with {0}".format(name))
est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
random_state=0)
est.fit(X, y)
out = est.estimators_[0].tree_.apply(X)
node_counts = np.bincount(out)
# drop inner nodes
leaf_count = node_counts[node_counts != 0]
assert_greater(np.min(leaf_count), len(X) * 0.25 - 1,
"Failed with {0}".format(name))
开发者ID:henrywoo,项目名称:scikit-learn,代码行数:30,代码来源:test_forest.py
示例4: test_stratified_shuffle_split_iter
def test_stratified_shuffle_split_iter():
ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
np.array([-1] * 800 + [1] * 50),
np.concatenate([[i] * (100 + i) for i in range(11)]),
[1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],
]
for y in ys:
sss = StratifiedShuffleSplit(6, test_size=0.33,
random_state=0).split(np.ones(len(y)), y)
y = np.asanyarray(y) # To make it indexable for y[train]
# this is how test-size is computed internally
# in _validate_shuffle_split
test_size = np.ceil(0.33 * len(y))
train_size = len(y) - test_size
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train],
return_inverse=True)[1]) /
float(len(y[train])))
p_test = (np.bincount(np.unique(y[test],
return_inverse=True)[1]) /
float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(len(train) + len(test), y.size)
assert_equal(len(train), train_size)
assert_equal(len(test), test_size)
assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
开发者ID:alexandercbooth,项目名称:scikit-learn,代码行数:33,代码来源:test_split.py
示例5: update_nogrid
def update_nogrid(self, params):
endog = self.model.endog_li
cached_means = self.model.cached_means
varfunc = self.model.family.variance
dep_params = np.zeros(self.max_lag + 1)
dn = np.zeros(self.max_lag + 1)
for i in range(self.model.num_group):
expval, _ = cached_means[i]
stdev = np.sqrt(varfunc(expval))
resid = (endog[i] - expval) / stdev
j1, j2 = np.tril_indices(len(expval))
dx = np.abs(self.time[i][j1] - self.time[i][j2])
ii = np.flatnonzero(dx <= self.max_lag)
j1 = j1[ii]
j2 = j2[ii]
dx = dx[ii]
vs = np.bincount(dx, weights=resid[
j1] * resid[j2], minlength=self.max_lag + 1)
vd = np.bincount(dx, minlength=self.max_lag + 1)
ii = np.flatnonzero(vd > 0)
dn[ii] += 1
if len(ii) > 0:
dep_params[ii] += vs[ii] / vd[ii]
dep_params /= dn
self.dep_params = dep_params[1:] / dep_params[0]
开发者ID:Bonfils-ebu,项目名称:statsmodels,代码行数:32,代码来源:cov_struct.py
示例6: uniform_paa
def uniform_paa(directory, min_time, max_time, bin_count, timeseries_index, attribute_index):
"""
Create waveforms using a piecewise aggregate approximation.
:param directory: working directory for the timeseries
:param min_time:
:param max_time:
:param bin_count:
:param timeseries_index:
:param attribute_index:
:return: computed time series
"""
import h5py
import numpy
import os
import slycat.hdf5
bin_edges = numpy.linspace(min_time, max_time, bin_count + 1)
bin_times = (bin_edges[:-1] + bin_edges[1:]) / 2
with h5py.File(os.path.join(directory, "timeseries-%s.hdf5" % timeseries_index), "r") as file:
original_times = slycat.hdf5.ArraySet(file)[0].get_data(0)[:]
original_values = slycat.hdf5.ArraySet(file)[0].get_data(attribute_index + 1)[:]
bin_indices = numpy.digitize(original_times, bin_edges[1:])
bin_counts = numpy.bincount(bin_indices, minlength=bin_count + 1)[1:]
bin_sums = numpy.bincount(bin_indices, original_values, minlength=bin_count + 1)[1:]
lonely_bins = (bin_counts < 2)
bin_counts[lonely_bins] = 1
bin_sums[lonely_bins] = numpy.interp(bin_times, original_times, original_values)[lonely_bins]
bin_values = bin_sums / bin_counts
return {
"input-index": timeseries_index,
"times": bin_times,
"values": bin_values,
}
开发者ID:sandialabs,项目名称:slycat,代码行数:34,代码来源:slycat-agent-compute-timeseries.py
示例7: relaxation
def relaxation(nodes, links):
""" Gauss-Seidel relaxation for links """
sources_idx = links["source"]
targets_idx = links["target"]
sources = nodes[sources_idx]
targets = nodes[targets_idx]
distances = links["distance"]
strengths = links["strength"]
D = targets["position"] - sources["position"]
L = np.sqrt((D * D).sum(axis=1))
# This avoid to test L != 0 (I = np.where(L>0))
L = np.where(L, L, np.NaN)
L = strengths * (L - distances) / L
# Replace nan by 0, i.e. where L was 0
L = np.nan_to_num(L)
D *= L.reshape(len(L), 1)
K = sources["weight"] / (sources["weight"] + targets["weight"])
K = K.reshape(len(K), 1)
# Note that a direct nodes['position'][links['source']] += K*D*(1-F)
# would't work as expected because of repeated indices
F = nodes["fixed"][sources_idx].reshape(len(links), 1)
W = K * D * (1 - F) * 0.1
nodes["position"][:, 0] += np.bincount(sources_idx, W[:, 0], minlength=len(nodes))
nodes["position"][:, 1] += np.bincount(sources_idx, W[:, 1], minlength=len(nodes))
F = nodes["fixed"][targets_idx].reshape(len(links), 1)
W = (1 - K) * D * (1 - F) * 0.1
nodes["position"][:, 0] -= np.bincount(targets_idx, W[:, 0], minlength=len(nodes))
nodes["position"][:, 1] -= np.bincount(targets_idx, W[:, 1], minlength=len(nodes))
开发者ID:Eric89GXL,项目名称:gl-agg,代码行数:35,代码来源:demo-graph.py
示例8: sanity_checks
def sanity_checks(R):
#extract out condition names and assess that we have nice uniform time point distributions
condnames = np.array([x[0] for x in list(R.columns)])
tps = np.array([x[1] for x in list(R.columns)])
conds = np.unique(condnames)
#commence sanity checks
if len(conds)!=2:
sys.stderr.write('ERROR: More than two treatment specifications detected. Exiting\n')
sys.exit(1)
if np.sum(condnames==conds[0])!=np.sum(condnames==conds[1]):
sys.stderr.write('ERROR: Unbalanced number of data points between the two treatments. Exiting\n')
sys.exit(1)
#okay, so if we made it this far we have the same number of data points and only two condition names
tp_conds = []
for cond in conds:
cond_tps = tps[condnames==cond]
#we should have the same exact number of reps per time point
#so if we count up how many reps we have per time point, there should only be one unique value
tpholder, inverse = np.unique(cond_tps, return_inverse=True)
if len(np.unique(np.bincount(inverse)))!=1:
sys.stderr.write('ERROR: Non-uniformity of time points for replicates detected in condition '+cond+'. Exiting\n')
sys.exit(1)
#well, if not, then we're fine and can store information
tp_conds.append(tpholder)
Nrepl = np.bincount(inverse)[0]
#one last sanity check - are the time points the same?
if not np.array_equal(tp_conds[0],tp_conds[1]):
sys.stderr.write('ERROR: Different time points specified across the two treatments. Exiting\n')
sys.exit(1)
开发者ID:cyversewarwick,项目名称:gp2s,代码行数:29,代码来源:run_two_sample.py
示例9: infer_labels
def infer_labels(x, wu, wp, z=None, y=None):
t_max = []
E_max = -1000000000
for t0 in [1,2,3]:
for t1 in [1,2,3]:
for t2 in [1,2,3]:
for t3 in [1,2,3]:
for t4 in [1,2,3]:
t = [t0,t1,t2,t3,t4]
if z is not None:
if not np.all(np.bincount(t) == z):
continue
E = compute_energy(x,wu,wp,t)
if y is not None:
if y.full_labeled:
E += np.sum(t!=y.full)
else:
w1 = np.zeros(4)
w2 = np.zeros(4)
tw = np.bincount(t)
w1[:tw.shape[0]] = tw
w2[:y.weak.shape[0]] = y.weak
E += np.sum(np.abs(w1 - w2))
if E > E_max:
t_max = t
E_max = E
return t_max
开发者ID:aiwagan,项目名称:latent_ssvm,代码行数:28,代码来源:simple_dataset.py
示例10: _elbo_grad_common
def _elbo_grad_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd,
vc_mean, vc_sd):
# p(vc | vcp) contributions
m = vcp_mean[self.ident]
s = vcp_sd[self.ident]
u = vc_mean**2 + vc_sd**2
ve = np.exp(2*(s**2 - m))
dm = u * ve - 1
ds = -2 * u * ve * s
vcp_mean_grad = np.bincount(self.ident, weights=dm)
vcp_sd_grad = np.bincount(self.ident, weights=ds)
vc_mean_grad = -vc_mean.copy() * ve
vc_sd_grad = -vc_sd.copy() * ve
# p(vcp) contributions
vcp_mean_grad -= vcp_mean / self.vcp_p**2
vcp_sd_grad -= vcp_sd / self.vcp_p**2
# p(b) contributions
fep_mean_grad = -fep_mean.copy() / self.fe_p**2
fep_sd_grad = -fep_sd.copy() / self.fe_p**2
return (fep_mean_grad, fep_sd_grad, vcp_mean_grad, vcp_sd_grad,
vc_mean_grad, vc_sd_grad)
开发者ID:BranYang,项目名称:statsmodels,代码行数:26,代码来源:bayes_mixed_glm.py
示例11: _bincount_mapper
def _bincount_mapper(ex, tiles, minlength=None):
if len(tiles) > 1:
result = np.bincount(tiles[0], weights=tiles[1], minlength=minlength)
else:
result = np.bincount(tiles[0], minlength=minlength)
result_ex = extent.from_shape(result.shape)
yield result_ex, result
开发者ID:rgardner,项目名称:spartan,代码行数:7,代码来源:builtins.py
示例12: joint_and_marginals
def joint_and_marginals(labels1,labels2,smoothing=0.0):
"""
marginal and joint distributions for a sequence of observations
from a pair of disrete random variables, with additive smoothing on the
joint distribution and the marginals in such a way that
marginal(smooth(conditional)) = smooth(marginal)
"""
smoothing = float(smoothing)
if len(labels1) != len(labels2):
raise ValueError("label lists must have the same length")
set1 = set(labels1)
set2 = set(labels2)
n1 = len(set1)
n2 = len(set2)
l2i1 = dict(zip(set1,range(len(set1))))
l2i2 = dict(zip(set2,range(len(set2))))
l1 = array([l2i1[l] for l in labels1])
l2 = array([l2i2[l] for l in labels2])
d1 = bincount(l1) + smoothing*n2
d1 = d1/d1.sum()
d2 = bincount(l2) + smoothing*n1
d2 = d2/d2.sum()
a_true = full((n1,n2),smoothing)
for i,j in zip(l1,l2):
a_true[i,j] += 1.0
a_true = a_true/a_true.sum()
return d1,d2,a_true
开发者ID:mattHawthorn,项目名称:carefree-automated-language-models,代码行数:31,代码来源:distributions.py
示例13: get_indicator
def get_indicator(tm):
if isinstance(tm,np.ndarray):
edges_plain = np.zeros_like(image)
counts = np.bincount(tm.ravel())
for c in xrange(image.shape[2]):
vals = np.bincount(tm.ravel(),image[:,:,c].ravel())
edges_plain[:,:,c] = (vals/counts)[tm]
return edges_plain
else:
indicator = np.zeros(image_lab.shape[:2]+(3,),np.float32)
indicator_map = tm.copy_map_for_image(indicator)
tm_color = tm.copy_map_for_image(image)
#data_management.add_array('diff_mat',diff_mat) steps,precondition_runs,accept_ratio
for loc in xrange(len(tm)):
key = tm.key_from_index(loc)
im_data = np.reshape(tm_color[key],(-1,3))
color = np.mean(im_data,axis=0)
for c in xrange(color.shape[0]):
indicator_map[key][:,:,c] = color[c]
return indicator
开发者ID:ylockerman,项目名称:multi-scale-label-map-extraction,代码行数:26,代码来源:SLIC_compare_video.py
示例14: make_batch_prediction_ensemble
def make_batch_prediction_ensemble(self,phi_x):
m, nsub, nfeat = np.shape(phi_x);
hat = np.zeros(m);
sub_hat = self.predictor.predict(np.reshape(phi_x,(m*nsub,nfeat)));
#TODO:return_rec
for i in range(m):
votes = sub_hat[i*nsub:(i+1)*nsub]
vote_bins = np.bincount(votes);
vote_bins = np.append(vote_bins,np.zeros(7-np.size(vote_bins)));
vote_bins_sort = np.sort(vote_bins);
vote_bins_sort = vote_bins_sort[::-1]; #Descending
#if vote_bins_sort[0] - vote_bins_sort[1] <= 1:
#Small margit vote. use back-up predictor
if vote_bins_sort[0] - vote_bins_sort[1] == 1:
#Retest ties
tie_votes = self.tie_predictor.predict(phi_x[i,:,:]);
tie_vote_bins = np.bincount(tie_votes) #Ensemble: aggregate votes
tie_vote_bins = np.append(tie_vote_bins,np.zeros(7-np.size(tie_vote_bins)));
total_vote_bins = tie_vote_bins + 1.1*vote_bins; #tie breaker is rbf
tie_maxvote = np.max(total_vote_bins); #Get highest vote total
tie_argmaxx = np.where(np.array(total_vote_bins)==tie_maxvote)[0]; #Find all regions with that vote total
if np.size(tie_argmaxx)>1:
hat[i] = np.random.choice(tie_argmaxx);
else:
#No Tie
hat[i] = tie_argmaxx[0];
else:
hat[i]=np.argmax(vote_bins);
return hat
开发者ID:jsun2013,项目名称:AudioLocator,代码行数:29,代码来源:audiolearning.py
示例15: plot_val_train
def plot_val_train(list_train,fig_name,epoch):
import matplotlib.pyplot as plt
if(epoch==-1):
nd=numpy.array([[int(b), int(c), d] for (b, c, d) in list_train]) #all error
idx=map(int,nd[:,0])
err=nd[:,2]
y=numpy.bincount(idx, err)[1:len(idx)+1] / np.bincount(idx)[1:len(idx)+1]
x =[x+1 for x in range(len(y))]
plt.title('Train Error change with epoch')
plt.xlabel('epoch (x)')
else:
y = numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,2] #all error
x =numpy.array([[b, c, d] for (b, c, d) in list_train if b==epoch ])[:,1] #all error
plt.title('Train Error change with minibatch')
plt.xlabel('minibatc (x)')
plt.plot(x, y)
plt.ylabel('error (y)')
plt.grid(True)
#f.subplots_adjust(hspace=0)
plt.savefig(fig_name)
#plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
plt.show()
开发者ID:Seleucia,项目名称:CNNRNet,代码行数:25,代码来源:plot_data.py
示例16: remove_wrongly_sized_connected_components
def remove_wrongly_sized_connected_components(self, a, min_size, max_size, in_place):
"""
Adapted from http://github.com/jni/ray/blob/develop/ray/morpho.py
(MIT License)
"""
bin_out = self.BinaryOut.value
original_dtype = a.dtype
if not in_place:
a = a.copy()
if min_size == 0 and (max_size is None or max_size > numpy.prod(a.shape)): # shortcut for efficiency
return a
try:
component_sizes = numpy.bincount( a.ravel() )
except TypeError:
# On 32-bit systems, must explicitly convert from uint32 to int
# (This fix is just for VM testing.)
component_sizes = numpy.bincount( numpy.asarray(a.ravel(), dtype=int) )
bad_sizes = component_sizes < min_size
if max_size is not None:
numpy.logical_or( bad_sizes, component_sizes > max_size, out=bad_sizes )
bad_locations = bad_sizes[a]
a[bad_locations] = 0
if (bin_out):
# Replace non-zero values with 1
numpy.place(a,a,1)
return numpy.array(a, dtype=original_dtype)
开发者ID:JensNRAD,项目名称:lazyflow,代码行数:30,代码来源:opFilterLabels.py
示例17: _make_cm
def _make_cm(X,M,R):
N = len(X)
# we pregenerate all indices
i_idx,j_idx = np.triu_indices(N - M)
# We start by making Cm
Em = _embed_seq(X, 1, M)
dif = np.abs(Em[i_idx] - Em[j_idx])
max_dist = np.max(dif, 1)
inrange_cm = max_dist <= R
in_range_i = i_idx[inrange_cm]
in_range_j = j_idx[inrange_cm]
Cm = np.bincount(in_range_i, minlength=N-M+1)
Cm += np.bincount(in_range_j, minlength=N-M+1)
inrange_last = np.max(np.abs(Em[:-1] - Em[-1]),1) <= R
Cm[inrange_last] += 1
# all matches + self match
Cm[-1] += np.sum(inrange_last) + 1
return Cm.astype(np.float), in_range_i, in_range_j
开发者ID:StellaAthena,项目名称:pyrem,代码行数:27,代码来源:univariate.py
示例18: get_events_number
def get_events_number(data, id_column='event_id'):
"""
:return: number of B events
"""
_, data_ids = numpy.unique(data[id_column], return_inverse=True)
weights = numpy.bincount(data_ids, weights=data.N_sig_sw) / numpy.bincount(data_ids)
return numpy.sum(weights)
开发者ID:tata-antares,项目名称:tagging_LHCb,代码行数:7,代码来源:utils.py
示例19: display_roc
def display_roc():
thresholds = np.linspace(0, 1, 21)
for hash_name in hash_names:
tpr = []
fpr = []
with open(hash_name + ".same", 'r+b') as f:
same_family_dm = np.array(cPickle.load(f))
same_family_uniqw, same_family_inverse = np.unique(same_family_dm, return_inverse=True)
same_family_dmlist = dict(zip(same_family_uniqw, np.bincount(same_family_inverse)))
with open(hash_name + ".diff", 'r+b') as f:
diff_family_dm = np.array(cPickle.load(f))
diff_family_uniqw, diff_family_inverse = np.unique(diff_family_dm, return_inverse=True)
diff_family_dmlist = dict(zip(diff_family_uniqw, np.bincount(diff_family_inverse)))
for threshold in thresholds:
tp = fp = 0
for dm in same_family_dmlist:
if dm <= threshold:
tp += same_family_dmlist[dm]
for dm in diff_family_dmlist:
if dm <= threshold:
fp += diff_family_dmlist[dm]
tpr.append(tp*1.0/same_family_dm.size)
fpr.append(fp*1.0/diff_family_dm.size)
print sm.auc(fpr, tpr)
print "Fuzzy hashing algorithm: %s, AUC: %f" %(hash_name, sm.auc(fpr, tpr))
plt.figure(0)
plt.plot(fpr, tpr, label=hash_name)
plt.ylim(0.75, 1)
plt.legend(loc='best')
plt.title("ROC curve for different algorithms")
plt.xlabel("False posive rate")
plt.ylabel("True posive rate")
plt.show()
开发者ID:xia0pin9,项目名称:malcluster,代码行数:33,代码来源:fzeval.py
示例20: compute_B_prob_using_part_prob
def compute_B_prob_using_part_prob(data, probs, weight_column='N_sig_sw', event_id_column='event_id', signB_column='signB',
sign_part_column='signTrack', normed_signs=False, prior_probs=None, functor=None):
"""
Compute p(B+) using probs for parts of event (tracks/vertices).
:param data: pandas.DataFrame, data
:param probs: probabilities for parts of events, numpy.array of shape [n_samples]
:param weight_column: column for weights in data
:param event_id_column: column for event id in data
:param signB_column: column for event B sign in data
:param sign_part_column: column for part sign in data
:return: B sign array, B weight array, B+ prob array, B event id
"""
result_event_id, data_ids = numpy.unique(data[event_id_column].values, return_inverse=True)
if prior_probs is None:
log_probs = numpy.log(probs) - numpy.log(1 - probs)
else:
new_probs = prior_probs * (1 - probs) + (1 - prior_probs) * probs
log_probs = numpy.log(new_probs) - numpy.log(1 - new_probs)
sign_weights = numpy.ones(len(log_probs))
if normed_signs:
for sign in [-1, 1]:
maskB = (data[signB_column].values == sign)
maskPart = (data[sign_part_column].values == 1)
sign_weights[maskB * maskPart] = sum(maskB * (~maskPart)) * 1. / sum(maskB * maskPart)
log_probs *= sign_weights * data[sign_part_column].values
result_logprob = numpy.bincount(data_ids, weights=log_probs)
# simply reconstructing original
result_label = numpy.bincount(data_ids, weights=data[signB_column].values) / numpy.bincount(data_ids)
result_weight = numpy.bincount(data_ids, weights=data[weight_column]) / numpy.bincount(data_ids)
return result_label, result_weight, expit(result_logprob), result_event_id
开发者ID:tata-antares,项目名称:tagging_LHCb,代码行数:32,代码来源:utils.py
注:本文中的numpy.bincount函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论