本文整理汇总了Python中numpy.histogram函数的典型用法代码示例。如果您正苦于以下问题:Python histogram函数的具体用法?Python histogram怎么用?Python histogram使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了histogram函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: pp_plot
def pp_plot(f, p, nbins, ax=None):
""" P-P plot of the empirical CDFs of values in two lists, f and p. """
if ax is None:
ax = plt.gca()
uniqe_vals_f = list(set(f))
uniqe_vals_p = list(set(p))
combine = uniqe_vals_f
combine.extend(uniqe_vals_p)
combine = list(set(combine))
if len(uniqe_vals_f) > nbins:
bins = nbins
else:
bins = sorted(combine)
bins.append(bins[-1]+bins[-1]-bins[-2])
ff, edges = np.histogram(f, bins=bins, density=True)
fp, _ = np.histogram(p, bins=edges, density=True)
Ff = np.cumsum(ff*(edges[1:]-edges[:-1]))
Fp = np.cumsum(fp*(edges[1:]-edges[:-1]))
plt.plot([0, 1], [0, 1], c='dodgerblue', lw=2, alpha=.8)
plt.plot(Ff, Fp, c='black', lw=2, alpha=.9)
plt.xlim([0, 1])
plt.ylim([0, 1])
开发者ID:BaxterEaves,项目名称:ijcai-iml-2016,代码行数:28,代码来源:plot_utils.py
示例2: metal_con
def metal_con(filename, distances, real_dist, bins=35, limits=(-3.1, 0.2),
avgs=1, detection=1, tag="out"):
""" main bit """
if filename[-4:] == '.csv': delim = ','
else: delim = None
data = shift_data(fi.read_data(filename, delim), real_dist, distances[0])
mod_actual = 5.*(ma.log10(real_dist*1000) - 1.)
mod_new = 5.*(ma.log10(distances[0]*1000) - 1.)
mod = mod_actual - mod_new
print "Effective Magnitude Shift = {0}, average g={1}".format(mod, sc.mean(data[:,2]))
new_data = cut_data(data, 4,2,3,5, deff=0, modulus=mod, full=1)
FeH = get_photo_metal(new_data[:,4],new_data[:,2],new_data[:,3])
ref_hist = np.histogram(FeH, bins, limits)
hist = []
#Also iterate over several runs and average
for i in range(len(distances)):
print "#- Convolving to distance {0} kpc".format(distances[i])
if i==0: deff=0
else: deff=detection
temp_hist = []
for j in range(avgs):
#holds dist constant, applies appropriate errors for new distance
new_data = con.convolve(data, real_dist, distances[i])
#shift data so detection efficiency works correctly; has no noticable effect if deff=0
new_data = shift_data(new_data, distances[0], distances[i])
# apply color cuts and detection efficiency to shifted and convolved data
new_data = cut_data(new_data, 4,2,3,5, deff=deff, modulus=None, full=0)
print "Average g = {0}, total stars = {1}".format(sc.mean(new_data[:,2]), len(new_data[:,0]))
FeH = get_photo_metal(new_data[:,4],new_data[:,2],new_data[:,3])
temp_hist.append(np.histogram(FeH, bins, limits))
new_hist = avg_hists(temp_hist)
hist.append(new_hist)
plot_hists(hist, ref_hist, distances, tag)
return hist
开发者ID:MNewby,项目名称:Newby-tools,代码行数:34,代码来源:metal_CA.py
示例3: spectrum
def spectrum(self, shape, surface_point, bound):
"""Returns the counts histogram (bins,counts) for """
wavelengths = []
key = shape.surface_identifier(surface_point)
if not self.store.has_key(key):
return None
entries = self.store[key]
if len(entries) == 0:
return None
for entry in entries:
if entry[2] == bound:
wavelengths.append(float(entry[1]))
if len(wavelengths) is 0:
return None
wavelengths = np.array(wavelengths)
min = wavelengths.min()
max = wavelengths.max()
if len(wavelengths) is 1:
bins = np.arange(np.floor( wavelengths[0] - 1), np.ceil(wavelengths[0] + 2))
freq, bins = np.histogram(wavelengths, bins=bins)
else:
bins = np.arange(np.floor( wavelengths.min()-1), np.ceil(wavelengths.max()+2))
freq, bins = np.histogram(wavelengths, bins=bins)
return Spectrum(bins[0:-1], freq)
开发者ID:GuzSku,项目名称:mcclanahoochie,代码行数:30,代码来源:Devices.py
示例4: generate_f_score_gate
def generate_f_score_gate(
neg_smaple,
pos_sample,
chan,
beta=1,
theta=2,
high=True):
"""
given a negative and a positive sample, calculate the 'optimal' threshold gate
position from aproximate f-score calculation
"""
neg_hist, bins = numpy.histogram(neg_smaple[:, chan], 1000, normed=True)
pos_hist, bins = numpy.histogram(pos_sample[:, chan], bins, normed=True)
xs = (bins[1:] + bins[:-1]) / 2.0
x0 = numpy.argmax(neg_hist)
dfa = diff_pseudo_f1(neg_hist[x0:], pos_hist[x0:], beta=beta, theta=theta)
f_cutoff = xs[x0 + numpy.argmax(dfa)]
if high:
return ThresholdGate(f_cutoff, chan, 'g')
else:
return ThresholdGate(f_cutoff, chan, 'l')
开发者ID:jfrelinger,项目名称:fcm,代码行数:27,代码来源:gate.py
示例5: test_plot
def test_plot():
import math
from numpy.random import normal
from scipy import stats
global data
def f(x):
return 2*x + 1
mean = 2
var = 3
std = math.sqrt(var)
data = normal(loc=2, scale=std, size=50000)
d2 = f(data)
n = scipy.stats.norm(mean, std)
kde1 = stats.gaussian_kde(data, bw_method='silverman')
kde2 = stats.gaussian_kde(d2, bw_method='silverman')
xs = np.linspace(-10, 10, num=200)
#plt.plot(data)
plt.plot(xs, kde1(xs))
plt.plot(xs, kde2(xs))
plt.plot(xs, n.pdf(xs), color='k')
num_bins=100
h = np.histogram(data, num_bins, density=True)
plt.plot(h[1][1:], h[0], lw=4)
h = np.histogram(d2, num_bins, density=True)
plt.plot(h[1][1:], h[0], lw=4)
开发者ID:andreas-koukorinis,项目名称:Kalman-and-Bayesian-Filters-in-Python,代码行数:33,代码来源:nonlinear_plots.py
示例6: armar_vector_gris
def armar_vector_gris(self):
img = cv2.imread(self.filename,0)
equ = cv2.equalizeHist(img)
res = np.hstack((img,equ)) #stacking images side-by-side
hist,bins = np.histogram(img.flatten(),256,[0,256])
histequ,binsequ = np.histogram(equ.flatten(),256,[0,256])
return histequ
开发者ID:leanahabedian,项目名称:AAP-TP2,代码行数:7,代码来源:armar_vector.py
示例7: hist_average_quality
def hist_average_quality(self, fontsize=16, bins=None):
"""
bins is from 0 to 94
"""
hq_qv = [pylab.mean([ord(X)-33 for X in read['quality'].decode()])
for read in self.hq_sequence]
lq_qv = [pylab.mean([ord(X) -33 for X in read['quality'].decode()])
for read in self.lq_sequence]
if bins is None:
bins = range(0,94)
Y1, X = np.histogram(hq_qv, bins=bins)
Y2, X = np.histogram(lq_qv, bins=bins)
pylab.bar(X[1:], Y1, width=1, label="HQ")
pylab.bar(X[1:], Y2, bottom=Y1, width=1, label="LQ")
pylab.xlim([0.5, 93.5])
pylab.xlabel("Isoform average QV")
pylab.ylabel("# Isoform")
pylab.legend(fontsize=fontsize)
ax = pylab.twinx()
N = np.sum(Y1+Y2)
ax.plot(X, [N] + list(N-np.cumsum(Y1+Y2)), "k")
开发者ID:sequana,项目名称:sequana,代码行数:26,代码来源:isoseq.py
示例8: calculate_spectrum
def calculate_spectrum(self):
if self.tardis_config.sn_distance is None:
logger.info('Distance to supernova not selected assuming 10 pc for calculation of spectra')
distance = units.Quantity(10, 'pc').to('cm').value
else:
distance = self.tardis_config.sn_distance
self.spec_flux_nu = np.histogram(self.montecarlo_nu[self.montecarlo_nu > 0],
weights=self.montecarlo_energies[self.montecarlo_energies > 0],
bins=self.spec_nu_bins)[0]
flux_scale = (self.time_of_simulation * (self.spec_nu[1] - self.spec_nu[0]) * (4 * np.pi * distance ** 2))
self.spec_flux_nu /= flux_scale
self.spec_virtual_flux_nu /= flux_scale
self.spec_reabsorbed_nu = \
np.histogram(self.montecarlo_nu[self.montecarlo_nu < 0],
weights=self.montecarlo_energies[self.montecarlo_nu < 0], bins=self.spec_nu_bins)[0]
self.spec_reabsorbed_nu /= flux_scale
self.spec_angstrom = units.Unit('Hz').to('angstrom', self.spec_nu, units.spectral())
self.spec_flux_angstrom = (self.spec_flux_nu * self.spec_nu ** 2 / constants.c.cgs.value / 1e8)
self.spec_reabsorbed_angstrom = (self.spec_reabsorbed_nu * self.spec_nu ** 2 / constants.c.cgs.value / 1e8)
self.spec_virtual_flux_angstrom = (self.spec_virtual_flux_nu * self.spec_nu ** 2 / constants.c.cgs.value / 1e8)
开发者ID:nathanielatom,项目名称:tardis,代码行数:27,代码来源:model_radial_oned.py
示例9: makeCompression
def makeCompression(X,bin_size,plotting=False):
"""
Collects spectra frequencies
"""
print "Compressing spectrum"
tutto=[]
for ind in X.index:
row=[]
row.append(ind)
data = X.ix[ind,:].values
start_bins = X.ix[ind,:].values.shape[0]
base = np.linspace(0,start_bins ,start_bins)
bins = np.linspace(0,start_bins ,bin_size+1)
bin_means1 = np.histogram(base, bins=bins, weights=data)[0]
tmp = np.histogram(base, bins=bin_size)[0]
bin_means1= bin_means1 / tmp
for el in bin_means1:
row.append(el)
tutto.append(row)
newdf = pd.DataFrame(tutto).set_index(0)
colnames = [ "z"+str(x) for x in xrange(newdf.shape[1]) ]
newdf.columns=colnames
if plotting:
newdf.iloc[3,:].plot()
plt.show()
print newdf.head(10)
###print newdf.describe()
return(newdf)
开发者ID:chrissly31415,项目名称:amimanera,代码行数:32,代码来源:soil.py
示例10: convert_3dps_to_1dps
def convert_3dps_to_1dps(self, k_edges):
print "convert 3d power ot 1d power",
print self.boxshape
k_bin_x, k_bin_y, k_bin_z = self.get_k_bin_centre()
k_bin_r = np.sqrt( (k_bin_x**2)[:, None, None] +
(k_bin_y**2)[None, :, None] +
(k_bin_z**2)[None, None, :] )
ps_3d_flatten = copy.deepcopy(self.ps_3d.flatten())
k_bin_r = k_bin_r.flatten()[np.isfinite(ps_3d_flatten)]
ps_3d_flatten = ps_3d_flatten[np.isfinite(ps_3d_flatten)]
kn_1d, edges = np.histogram(k_bin_r, k_edges)
ps_1d, edges = np.histogram(k_bin_r, k_edges, weights=ps_3d_flatten)
kn_1d = kn_1d.astype(float)
#kn_1d[kn_1d==0] = np.inf
ps_1d[kn_1d != 0] /= kn_1d[kn_1d != 0]
ps_1d[kn_1d == 0] = 0.
#kn_1d[kn_1d==np.inf] = 0.
self.kn_1d = kn_1d
self.ps_1d = ps_1d
开发者ID:POFK,项目名称:Tide,代码行数:25,代码来源:functions.py
示例11: process_two_time
def process_two_time(lev, bufno,n ,
g12, buf, num, num_buf,noqs,qind,nopr, dly ):
'''a function for autocor_two_time'''
num[lev]+=1
if lev==0:imin=0
else:imin= int(num_buf/2 )
for i in range(imin, min(num[lev],num_buf) ):
ptr=lev*int(num_buf/2)+i
delayno=(bufno-i)%num_buf #//cyclic buffers
IP=buf[lev,delayno]
IF=buf[lev,bufno]
I_t12 = (np.histogram(qind, bins=noqs, weights= IF*IP))[0]
I_t1 = (np.histogram(qind, bins=noqs, weights= IP))[0]
I_t2 = (np.histogram(qind, bins=noqs, weights= IF))[0]
tind1 = (n-1)
tind2=(n -dly[ptr] -1)
if not isinstance( n, int ):
nshift = 2**(lev-1)
for i in range( -nshift+1, nshift +1 ):
#print tind1+i
g12[ int(tind1 + i), int(tind2 + i) ] =I_t12/( I_t1 * I_t2) * nopr
else:
#print tind1
g12[ tind1, tind2 ] = I_t12/( I_t1 * I_t2) * nopr
开发者ID:yugangzhang,项目名称:chxanalys,代码行数:25,代码来源:Time_Correlation_Functions.py
示例12: get_DT
def get_DT(T,s,e): # returns the Diversity Trajectory of s,e at times T (x10 faster)
B=np.sort(np.append(T,T[0]+1))+.0001 # the + .0001 prevents problems with identical ages
ss1 = np.histogram(s,bins=B)[0]
ee2 = np.histogram(e,bins=B)[0]
DD=(ss1-ee2)[::-1]
#return np.insert(np.cumsum(DD),0,0)[0:len(T)]
return np.cumsum(DD)[0:len(T)]
开发者ID:carlosp420,项目名称:PyRate,代码行数:7,代码来源:lib_DD_likelihood.py
示例13: make_surrogates_epochs
def make_surrogates_epochs(epochs, check_pdf=False, random_state=None):
'''
Make surrogate epochs using sklearn. Destroy each trial by shuffling the time points only.
The shuffling is performed in the time domain only. The probability density function is
preserved.
Parameters
----------
epochs : Epochs Object.
check_pdf : Condition to test for equal probability density. (bool)
random_state : Seed for random generator.
Output
------
Surrogate Epochs object
'''
from sklearn.utils import check_random_state
rng = check_random_state(random_state)
surrogate = epochs.copy()
surr = surrogate.get_data()
for trial in range(len(surrogate)):
for channel in range(len(surrogate.ch_names)):
order = np.argsort(rng.randn(len(surrogate.times)))
surr[trial, channel, :] = surr[trial, channel, order]
surrogate._data = surr
if check_pdf:
hist, _ = np.histogram(data_trials.flatten())
hist_dt = np.histogram(dt.flatten())
assert np.array_equal(hist, hist_dt), 'The histogram values are unequal.'
return surrogate
开发者ID:dongqunxi,项目名称:jumeg,代码行数:33,代码来源:jumeg_utils.py
示例14: get_weights
def get_weights(target, actual, bins = 10, cap = 10, match = True):
'''
re-weights a actual distribution to a target.
Args:
target (array/list): observations drawn from target distribution
actual (array/list): observations drawn from distribution to
match to the target.
bins (numeric or list/array of numerics): bins to use to do weighting
cap (numeric): maximum weight value.
match (bool): whether to make the sum of weights in actual equal to the
number of samples in target
Returns:
numpy.array: returns array of shape len(actual).
'''
target_counts, target_bins = np.histogram(target, bins=bins)
counts, _ = np.histogram(actual, bins=target_bins)
counts = (1.0 * counts)
counts = np.array([max(a, 0.0001) for a in counts])
multiplier = target_counts / counts
weights = np.array([min(multiplier[target_bins.searchsorted(point) - 1], cap) for point in actual])
if match:
weights *= (len(target) / np.sum(weights))
return weights
开发者ID:makagan,项目名称:deep-jets,代码行数:32,代码来源:performance.py
示例15: createThetaHistogramBinList
def createThetaHistogramBinList(theta_bins, d0_resolution_data):
theta_histogram_bins_list = []
for theta, d0_resolution_datum in zip(theta_bins, d0_resolution_data ):
if len(d0_resolution_datum) > 60:
hist, bins = np.histogram(d0_resolution_datum, int(len(d0_resolution_datum)/60.))
max_value = max(hist)
max_index = hist.tolist().index(max_value)
change = 1
while change > 0:
oldLen = len(d0_resolution_datum)
left_cut = 0
right_cut = -1
try:
left_cut = hist[max_index:0].index(0, max_index, 0)
except:
pass
try:
right_cut = hist[max_index:].index(0, max_index)
except:
pass
hist, bins = np.histogram(filter(lambda x : bins[left_cut] < x < bins[right_cut], d0_resolution_datum), int(len(d0_resolution_datum)/60.))
change = len(d0_resolution_datum) - oldLen
theta_histogram_bins_list.append((theta, hist, bins))
else:
print len(d0_resolution_datum)
return theta_histogram_bins_list
开发者ID:Bristol-SiD-Development,项目名称:scripts,代码行数:29,代码来源:pylcioPrintD0ResolutionTheta.py
示例16: visualize_performance
def visualize_performance(self):
intra = self._intra
inter = self._inter
labels = [1]*len(intra) + [-1]*len(inter)
scores = intra+inter
self._common_visualize_performance( labels, scores)
plt.figure()
plt.boxplot([intra, inter])
plt.xticks([1, 2], ['intra', 'inter'])
plt.title('Distribution of scores')
plt.savefig('comparison_score_distribution.pdf')
plt.figure()
start = np.min(np.min(intra), np.min(inter))
end = np.max(np.max(intra), np.max(inter))
intra_hist, intra_bin = np.histogram(intra,50, (start, end))
inter_hist, inter_bin = np.histogram(inter,50, (start, end))
plt.plot(intra_bin[:-1], intra_hist/float(intra_hist.sum()), label='intra', color='blue')
plt.plot(inter_bin[:-1], inter_hist/float(inter_hist.sum()), label='inter', color='red')
plt.legend()
plt.xlabel('Comparison scores')
plt.ylabel('Probability')
plt.title('Score distribution')
开发者ID:cbib,项目名称:SuperClass,代码行数:29,代码来源:classify.py
示例17: colour_hist
def colour_hist(data, cidx, spikes):
pn = param_names[cidx]
un = param_units[cidx]
grouped_sd = {}
grouped_md = {}
grouped_rms = {}
maxsd = 0
maxmd = 0
maxsq = 0
for config in data:
k = config[cidx]
if ((len(data[config]["OU"]["spikes"]) > 0) ^ spikes):
continue
if k in grouped_sd:
grouped_sd[k].append(data[config]["sd"])
grouped_md[k].append(data[config]["md"]*1e3)
grouped_rms[k].append(data[config]["rms"]*1e3)
else:
grouped_sd[k] = [data[config]["sd"]]
grouped_md[k] = [data[config]["md"]*1e3]
grouped_rms[k] = [data[config]["sq"]*1e3]
maxsd = max(maxsd, data[config]["sd"])
maxmd = max(maxsd, data[config]["md"]*1e3)
maxsq = max(maxsd, data[config]["sq"]*1e3)
for k in sorted(grouped_sd.iterkeys()):
plt.figure("Spike distance histogram")
y, x = np.histogram(grouped_sd[k], bins=np.linspace(0, maxsd, 6), normed=True)
plt.plot(x[:-1], y, label="{} = {}".format(pn, display_in_unit(k, un)))
plt.figure("Max difference histogram")
y, x = np.histogram(grouped_md[k], bins=np.linspace(0, maxmd, 6), normed=True)
plt.plot(x[:-1], y, label="{} = {}".format(pn, display_in_unit(k, un)))
plt.figure("RMSE histogram")
y, x = np.histogram(grouped_rms[k], bins=np.linspace(0, maxsq, 6), normed=True)
plt.plot(x[:-1], y, label="{} = {}".format(pn, display_in_unit(k, un)))
if spikes:
sx = "spks"
else:
sx = "nspk"
plt.figure("Spike distance histogram")
plt.legend()
plt.xlabel("SPIKE-distance")
plt.ylabel("Number of samples")
plt.savefig("sdhist_{}_{}.pdf".format(pn.replace("$",""), sx))
plt.clf()
plt.figure("Max difference histogram")
plt.legend()
plt.xlabel("Maximum difference (mV)")
plt.ylabel("Number of samples")
plt.savefig("mdhist_{}_{}.pdf".format(pn.replace("$",""), sx))
plt.clf()
plt.figure("RMSE histogram")
plt.legend()
plt.xlabel("Root mean squared error (mV)")
plt.ylabel("Number of samples")
plt.savefig("rmshist_{}_{}.pdf".format(pn.replace("$",""), sx))
plt.clf()
开发者ID:achilleas-k,项目名称:ou-lif-brian,代码行数:60,代码来源:print_stats.py
示例18: limitingMag
def limitingMag(self, raftId, ccdId):
if hasMinuit:
try:
return self.limitingMagMinuit(raftId, ccdId)
except:
pass
matchedStar = num.array(self.matchedStar.get(raftId, ccdId))
blendedStar = num.array(self.blendedStar.get(raftId, ccdId))
undetectedStar = num.array(self.undetectedStar.get(raftId, ccdId))
allStars = num.concatenate((matchedStar, blendedStar, undetectedStar))
foundStars = num.concatenate((matchedStar, blendedStar))
histAll = num.histogram(allStars, bins=self.bins)
histFound = num.histogram(foundStars, bins=self.bins)
magbins = 0.5 * (histAll[1][1:] + histAll[1][:-1])
w = num.where(histAll[0] != 0)
x = magbins[w]
n = 1.0 * histFound[0][w]
d = 1.0 * histAll[0][w]
y = n / d
binsize = self.bins[1] - self.bins[0]
x = num.append(x, x[-1] + binsize)
y = num.append(y, 0.0)
for i in num.arange(len(y) - 1, 1, -1):
if y[i] <= 0.5 and y[i-1] > 0.5:
return (0.5 - y[i-1]) / (y[i] - y[i-1]) * (x[i] - x[i-1]) + x[i-1]
return 0.0
开发者ID:HyperSuprime-Cam,项目名称:testing_pipeQA,代码行数:31,代码来源:CompletenessQaTask.py
示例19: mark_lalo_anomoly
def mark_lalo_anomoly(lat, lon):
"""mask pixels with abnormal values (0, etc.)
This is found on sentinelStack multiple swath lookup table file.
"""
# ignore pixels with zero value
zero_mask = np.multiply(lat != 0., lon != 0.)
# ignore anomaly non-zero values
# by get the most common data range (d_min, d_max) based on histogram
mask = np.array(zero_mask, np.bool_)
for data in [lat, lon]:
bin_value, bin_edge = np.histogram(data[mask], bins=10)
# if there is anomaly, histogram won't be evenly distributed
while np.max(bin_value) > np.sum(zero_mask) * 0.3:
# find the continous bins where the largest bin is --> normal data range
bin_value_thres = ut.median_abs_deviation_threshold(bin_value, cutoff=3)
bin_label = ndimage.label(bin_value > bin_value_thres)[0]
idx = np.where(bin_label == bin_label[np.argmax(bin_value)])[0]
# convert to min/max data value
bin_step = bin_edge[1] - bin_edge[0]
d_min = bin_edge[idx[0]] - bin_step / 2.
d_max = bin_edge[idx[-1]+1] + bin_step / 2.
mask *= np.multiply(data >= d_min, data <= d_max)
bin_value, bin_edge = np.histogram(data[mask], bins=10)
lat[mask == 0] = 90.
lon[mask == 0] = 0.
return lat, lon, mask
开发者ID:hfattahi,项目名称:PySAR,代码行数:27,代码来源:resample.py
示例20: get_grids
def get_grids(train, test, outputFile = False, train_output = None, test_output = None, n = 10, m = 10, x = 'x', y = 'y'):
if isinstance(train, basestring):
train = pd.read_csv(train)
if isinstance(test, basestring):
test = pd.read_csv(test)
# getting the cutoff values for x and y axis, using training set ONLY - because of the IMPORTANT ASSUMPTION -
# TESTING SET IS SUBSET OF TRAINING SET IN TERMS OF X AND Y COORDINATES
x_count, x_cutoff = np.histogram(train[x], bins = n)
y_count, y_cutoff = np.histogram(train[y], bins = m)
# transform cutoff values into step-wise tuples
x_bin_tuple = [(floor, ceiling) for floor, ceiling in pairwise(x_cutoff)]
y_bin_tuple = [(floor, ceiling) for floor, ceiling in pairwise(y_cutoff)]
train_x_splits = split_df_rows_on_col_ranges(train, x, x_bin_tuple) # getting list of N bars based on x values for train
test_x_splits = split_df_rows_on_col_ranges(test, x, x_bin_tuple) # getting list of N bars based on x values for test
# within each bar (overall N) splitted based on x, there will be M splits based on y - each one is a grid
trainDict = cut_y_bars_in_x_bar(train_x_splits, y, y_bin_tuple)
testDict = cut_y_bars_in_x_bar(test_x_splits, y, y_bin_tuple)
if outputFile:
for key in trainDict:
filename = 'train_' + 'x' + str(key[0]) + '_y' + str(key[1]) + '.csv'
fullpath = os.path.join(train_output, filename)
trainDict[key].to_csv(fullpath, index = False)
for key in testDict:
filename = 'test_' + 'x' + str(key[0]) + '_y' + str(key[1]) + '.csv'
fullpath = os.path.join(test_output, filename)
testDict[key].to_csv(fullpath, index = False)
return (trainDict, testDict)
开发者ID:howardx,项目名称:FacebookV,代码行数:32,代码来源:fb_split_grid.py
注:本文中的numpy.histogram函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论