本文整理汇总了Python中numpy.recfromcsv函数的典型用法代码示例。如果您正苦于以下问题:Python recfromcsv函数的具体用法?Python recfromcsv怎么用?Python recfromcsv使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了recfromcsv函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, tasks_roots, domain):
"""Initialize."""
# scan for CSV files
train_paths = []
for tasks_root in tasks_roots:
train_paths.extend(cargo.files_under(tasks_root, domain.extensions))
logger.info("using %i tasks for training", len(train_paths))
# fetch training data from each file
self._run_lists = {}
self._feature_vectors = {}
for path in train_paths:
# load run records
run_data = numpy.recfromcsv("{0}.runs.csv".format(path), usemask = True)
run_list = []
for (run_solver, run_budget, run_cost, run_succeeded, run_answer) in run_data.tolist():
record = RunRecord(run_solver, run_budget, run_cost, run_succeeded)
run_list.append(record)
self._run_lists[path] = run_list
# load feature data
feature_vector = numpy.recfromcsv("{0}.features.csv".format(path)).tolist()
self._feature_vectors[path] = feature_vector
开发者ID:buhman,项目名称:borg,代码行数:31,代码来源:storage.py
示例2: sort_battles
def sort_battles(self, results_filename='csv/mz_results_boulders.csv',
images_filename='csv/mz_images_boulders.csv',
out_filename='csv/mz_boulders_rank.csv'):
p = np.recfromcsv(images_filename, names=True)
objid = p.field('id')
rank = np.zeros(objid.shape, np.int) - 1
fracrank = np.zeros(objid.shape) - 1
battles = np.recfromcsv(results_filename, names=True)
# currently does not do anything with inconclusive battles
battles = battles[battles.field('winner') > 0]
first = battles['first_asset_id']
second = battles['second_asset_id']
winner = battles['winner']
w = np.where(winner == 1, first, second)
l = np.where(winner == 1, second, first)
competitors = np.unique(np.concatenate((w, l)))
self.competitors = self._asarray(competitors)
self.winners = self._asarray(w)
self.losers = self._asarray(l)
self._consistency_check()
self._setup_internal_variables()
print('ncomp = %i, nwars = %i'%(self.ncomp, self.nwars))
self.iterate()
for r, id in enumerate(self.ranking):
idx = (objid == id).nonzero()[0]
if len(idx) < 1:
print('Could not find objid match for id={}, rank={}'.format(id, r))
idx = idx[0]
rank[idx] = r
fracrank[idx] = float(r) / self.ncomp
np.savetxt(out_filename, np.asarray((objid, rank, fracrank)).T,
fmt='%d,%d,%.3f',
header=("objid,rank,fracrank"))
开发者ID:zooniverse,项目名称:Moon-Zoo-Reduction,代码行数:33,代码来源:boulder_wars_sort.py
示例3: otherfunc
def otherfunc(roifiles, subjects):
import numpy as np
from matplotlib.mlab import rec2csv
import os
first = np.recfromcsv(roifiles[0])
numcons = len(first.dtype.names) - 1
roinames = ["subject_id"] + first["roi"].tolist()
formats = ["a20"] + ["f4" for f in roinames[1:]]
confiles = []
for con in range(0, numcons):
recarray = np.zeros(len(roifiles), dtype={"names": roinames, "formats": formats})
for i, file in enumerate(roifiles):
recfile = np.recfromcsv(file)
recarray["subject_id"][i] = subjects[i]
for roi in roinames[1:]:
value = recfile["con%02d" % (con + 1)][recfile["roi"] == roi]
if value:
recarray[roi][i] = value
else:
recarray[roi][i] = 999
filename = os.path.abspath("grouped_con%02d.csv" % (con + 1))
rec2csv(recarray, filename)
confiles.append(filename)
return confiles
开发者ID:INCF,项目名称:BrainImagingPipelines,代码行数:25,代码来源:group_segstats.py
示例4: get_regressors
def get_regressors(csv,ids):
import numpy as np
if csv == '':
return None
reg = {}
design = np.recfromcsv(csv)
design_str = np.recfromcsv(csv,dtype=str)
names = design_str.dtype.names
csv_ids = []
for i in design_str["id"]:
csv_ids.append(str(i))
csv_ids = np.asarray(csv_ids)
for n in names:
if not n=="id":
reg[n] = []
for sub in ids:
if sub in csv_ids:
for key in reg.keys():
reg[key].append(design[key][csv_ids==sub][0])
else:
raise Exception("%s is missing from the CSV file!"%sub)
cov = []
for key,item in reg.iteritems():
cov.append({'name':key,'vector':item,'centering':0})
print cov
return cov
开发者ID:INCF,项目名称:BrainImagingPipelines,代码行数:26,代码来源:spm_group_analysis.py
示例5: from_paths
def from_paths(solver_names, task_paths, domain, suffix=".runs.csv"):
"""Collect run data from task paths."""
training = RunData(solver_names)
for path in task_paths:
# load run records
run_data = numpy.recfromcsv(path + suffix, usemask=True)
rows = run_data.tolist()
if run_data.shape == ():
rows = [rows]
for (run_solver, run_budget, run_cost, run_succeeded, run_answer) in rows:
record = RunRecord(run_solver, run_budget, run_cost, run_succeeded)
training.add_run(path, record)
# load feature data
feature_records = numpy.recfromcsv("{0}.features.csv".format(path))
feature_dict = dict(zip(feature_records.dtype.names, feature_records.tolist()))
training.add_feature_vector(path, feature_dict)
return training
开发者ID:smhjn,项目名称:borg,代码行数:25,代码来源:storage.py
示例6: test_recfromcsv
def test_recfromcsv(self):
#
data = StringIO.StringIO('A,B\n0,1\n2,3')
test = np.recfromcsv(data, missing='N/A',
names=True, case_sensitive=True)
control = np.array([(0, 1), (2, 3)],
dtype=[('A', np.int), ('B', np.int)])
self.failUnless(isinstance(test, np.recarray))
assert_equal(test, control)
#
data = StringIO.StringIO('A,B\n0,1\n2,N/A')
test = np.recfromcsv(data, dtype=None, missing='N/A',
names=True, case_sensitive=True, usemask=True)
control = ma.array([(0, 1), (2, -1)],
mask=[(False, False), (False, True)],
dtype=[('A', np.int), ('B', np.int)])
assert_equal(test, control)
assert_equal(test.mask, control.mask)
assert_equal(test.A, [0, 2])
#
data = StringIO.StringIO('A,B\n0,1\n2,3')
test = np.recfromcsv(data, missing='N/A',)
control = np.array([(0, 1), (2, 3)],
dtype=[('a', np.int), ('b', np.int)])
self.failUnless(isinstance(test, np.recarray))
assert_equal(test, control)
开发者ID:GunioRobot,项目名称:numpy-refactor,代码行数:26,代码来源:test_io.py
示例7: get_regressors
def get_regressors(csv,ids):
import numpy as np
if csv == '':
return None
reg = {}
design = np.recfromcsv(csv)
design_str = np.recfromcsv(csv,dtype=str)
names = design_str.dtype.names
csv_ids = []
for i in design_str["id"]:
csv_ids.append(str(i))
csv_ids = np.asarray(csv_ids)
for n in names:
if not n=="id":
reg[n] = []
for sub in ids:
if sub in csv_ids:
for key in reg.keys():
reg[key].append(design[key][csv_ids==sub][0])
else:
raise Exception("%s is missing from the CSV file!"%sub)
if 'group' in names:
data = np.asarray(reg['group'])
vals = np.unique(data)
for i, v in enumerate(vals):
data[data==v] = i+1
group = data.astype(int).tolist()
reg.pop('group')
else:
group = [1]*len(reg[names[-1]])
return reg, group
开发者ID:INCF,项目名称:BrainImagingPipelines,代码行数:32,代码来源:fsl_multiple_regression.py
示例8: compare
def compare(fileA, fileB):
mooseData = np.recfromcsv(fileA, delimiter=',')
nrnData = np.recfromcsv(fileB, delimiter=',')
mooseData = zip(*mooseData)
nrnData = zip(*nrnData)
print mooseData[0]
pylab.plot([1e3*x for x in mooseData[0]], [ 1e3*x for x in mooseData[1]]
, label = 'moose')
pylab.plot(nrnData[0], nrnData[1],
label = 'neuron')
#pylab.plot(mooseData)
#pylab.plot(nrnData)
pylab.show()
开发者ID:dilawar,项目名称:Scripts,代码行数:13,代码来源:compare.py
示例9: np_combine_csv_files
def np_combine_csv_files(csvpaths, verbose=False):
"""Combine a collection of CSV files into a single numpy record
array. Can take a while! CSV files with different fields
(different headers, different number of fields) are merged
together correctly, data type inferral and promotion takes a
while.
Treats the first line as a header, uses to name the fields.
Giving it files without headers will cause weird things to
happen.
Arguments:
csvpaths: List of text files to read into the array
Returns: numpy.recarray
"""
big_csv = numpy.recfromcsv(
open(csvpaths[0]), case_sensitive=True, deletechars='',
replace_space=' ', autostrip=True
)
if 'File ID' not in big_csv.dtype.names and big_csv['Input'].size > 1:
big_csv = numpy.lib.recfunctions.append_fields(
big_csv, 'File ID',
[os.path.splitext(os.path.basename(x))[0]
for x in big_csv['Input']],
usemask=False, asrecarray=True
)
for i, csvpath in enumerate(csvpaths[1:]):
csv_arr = numpy.recfromcsv(
open(csvpath), case_sensitive=True, deletechars='',
replace_space=' ', autostrip=True
)
if 'File ID' not in csv_arr.dtype.names and csv_arr['Input'].size > 1:
csv_arr = numpy.lib.recfunctions.append_fields(
csv_arr, 'File ID',
[os.path.splitext(os.path.basename(x))[0]
for x in csv_arr['Input']],
usemask=False, asrecarray=True
)
for field_name in csv_arr.dtype.names:
if field_name not in big_csv.dtype.names:
big_csv = numpy.lib.recfunctions.append_fields(
big_csv, field_name, [], usemask=False, asrecarray=True
)
big_csv = numpy.lib.recfunctions.stack_arrays(
(big_csv, csv_arr), usemask=False, asrecarray=True,
autoconvert=True
)
if verbose:
print('Loaded %d/%d files' % (i + 1, len(csvpaths)), end='\r')
return big_csv
开发者ID:erinaceous,项目名称:shadows,代码行数:51,代码来源:graph.py
示例10: new_tables
def new_tables():
sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 2.5})
fig, ax = plt.subplots(1)
with open('../results/sdss/query_number_num_new_tables.csv') as f:
data = np.recfromcsv(f)
c = data['num_new_tables'].astype(float)
c /= sum(c)
q = data['query_number'].astype(float)
q /= q[-1]
ax.plot(q, np.cumsum(c), label="SDSS", color=colors['sdss'], linewidth=2, drawstyle='steps-post')
# ax.scatter(q[0: -1], np.cumsum(c)[0: -1], color=colors['sdss'], marker="o", s=50, alpha=.7)
with open('../results/tpch/query_number_num_new_tables.csv') as f:
data = np.recfromcsv(f)
c = data['num_new_tables'].astype(float)
c /= sum(c)
q = data['query_number'].astype(float)
q /= q[-1]
ax.plot(q, np.cumsum(c), label="TPC-H", color=colors['tpch'], linewidth=2, drawstyle='steps-post')
# ax.scatter(q[0: -1], np.cumsum(c)[0: -1], color=colors['tpch'], marker="o", s=50, alpha=.7)
# sns.rugplot([0.1, 0.2, 10, 100], ax=ax)
with open('../results/sqlshare/table_coverage.csv') as f:
data = np.recfromcsv(f)
c = data['tables'].astype(float)
c /= c[-1]
q = data['query_id'].astype(float)
q /= q[-1]
ax.plot(q, c, label="SQLShare", color=colors['sqlshare'], linewidth=2, drawstyle='steps-post')
# ax.scatter(q[0: -1], c[0: -1], color=colors['sqlshare'], marker="o", s=20, alpha=.01)
ax.yaxis.set_major_formatter(formatter)
ax.xaxis.set_major_formatter(formatter)
plt.title("CDF of new tables")
ax.set_xlabel('\% of queries')
ax.set_ylabel('\% of newly used table')
ax.set_ylim(0, 1.01)
ax.set_xlim(-0.01, 1)
ax.title.set_position((ax.title._x, 1.04))
plt.legend(loc=4)
plt.tight_layout()
plt.savefig(root_path + 'plot_table_coverage.eps', format='eps')
开发者ID:uwescience,项目名称:query-workload-analysis,代码行数:50,代码来源:new_plot.py
示例11: sort_results_csv
def sort_results_csv(input_file='../../results/baseline_classifier_results.csv',output_file=''):
"""
Sorts the results csv file and writes to the same file.
Sort on classifier name first (1th column), then on features (6th column)
"""
if output_file =='': output_file = input_file
#import header first
with open(input_file, 'r') as f:
header = f.readline()
#load csv into table (automatically with correct datatypes)
table = np.recfromcsv(input_file,delimiter=',')
#only sort if we have more then one element (to prevent bugs)
if np.size(table) > 1:
#sort on features
table = sorted(table, key=lambda tup: tup[5])
#sort on classifier
table = sorted(table, key=lambda tup: tup[0])
#store sorted file
with open(output_file,'w') as fd:
fd.write(header)
[fd.write(settings_to_string(tup[0],tup[1],tup[2],tup[3],tup[4],tup[5],tup[6],tup[7]) + "\n") for tup in table]
开发者ID:HarrieO,项目名称:Natural-Language-Processing-1,代码行数:27,代码来源:baseline.py
示例12: main
def main():
filename = '../../dataset/sea_dataset/normalized_sea.csv'
data = np.recfromcsv(filename)
data_tuplelist = data.tolist()
data_list = [list(i) for i in data_tuplelist]
nop = 100
nod = shape(data_list)[1]
print nod
sigmai = [0.1] * nod
chunk_size = 50
old_index = np.random.normal(loc=0, scale=math.pow(sigmai[1], 1), size=(nop, nod))
old_param = np.random.normal(loc=0, scale=sigmai[1], size=(1, nod))
# print old_param
#print old_index
chunk_accuracy_list = []
for i in range(0, 60000, chunk_size):
print i
chunk_data = data_list[i:i + chunk_size]
chunk_data = [[1] + x for x in chunk_data]
[chunk_params, current_parameters] = compute_chunk_n(chunk_data, nop, sigmai, old_param, old_index)
#print chunk_params
#print 'gg'
#print current_parameters
old_param = [chunk_params]
old_index = current_parameters
#print old_param
#print current_parameters
#print chunk_params
#print chunk_params
chunk_accuracy_list.append(compute_accuracy(chunk_data, chunk_params))
plot_accuracy(chunk_accuracy_list)
开发者ID:asp188,项目名称:Incremental-Classification,代码行数:34,代码来源:mainalgo.py
示例13: compute_features
def compute_features(self, task, cpu_seconds = None):
"""Read or compute features of an instance."""
# grab precomputed feature data
csv_path = task + ".features.csv"
assert os.path.exists(csv_path)
features_array = numpy.recfromcsv(csv_path)
features = features_array.tolist()
names = features_array.dtype.names
# accumulate their cost
assert names[0] == "cpu_cost"
cpu_cost = features[0]
borg.get_accountant().charge_cpu(cpu_cost)
# handle timeout logic, and we're done
if cpu_seconds is not None:
if cpu_cost >= cpu_seconds:
return (["cpu_cost"], [cpu_seconds])
else:
assert len(names) > 1
return (names, features)
开发者ID:buhman,项目名称:borg,代码行数:27,代码来源:run_validation.py
示例14: yield_runs
def yield_runs():
suite = borg.load_solvers(suite_path)
logger.info("scanning paths under %s", tasks_root)
paths = list(borg.util.files_under(tasks_root, suite.domain.extensions))
if not paths:
raise ValueError("no paths found under specified root")
if only_solver is None:
solver_names = suite.solvers.keys()
else:
solver_names = [only_solver]
for path in paths:
run_data = None
if only_missing and os.path.exists(path + suffix):
run_data = numpy.recfromcsv(path + suffix, usemask=True)
for solver_name in solver_names:
if only_missing and run_data is not None:
count = max(0, runs - numpy.sum(run_data.solver == solver_name))
else:
count = runs
logger.info("scheduling %i run(s) of %s on %s", count, solver_name, os.path.basename(path))
for _ in xrange(count):
seed = numpy.random.randint(sys.maxint)
yield (run_solver_on, [suite_path, solver_name, path, budget, store_answers, seed])
开发者ID:smhjn,项目名称:borg,代码行数:33,代码来源:run_solvers.py
示例15: fetch_coords_dosenbach_2010
def fetch_coords_dosenbach_2010():
"""Load the Dosenbach et al. 160 ROIs. These ROIs cover
much of the cerebral cortex and cerebellum and are assigned to 6
networks.
Returns
-------
data: sklearn.datasets.base.Bunch
dictionary-like object, contains:
- "rois": coordinates of 160 ROIs in MNI space
- "labels": ROIs labels
- "networks": networks names
References
----------
Dosenbach N.U., Nardos B., et al. "Prediction of individual brain maturity
using fMRI.", 2010, Science 329, 1358-1361.
"""
dataset_name = 'dosenbach_2010'
fdescr = _get_dataset_descr(dataset_name)
package_directory = os.path.dirname(os.path.abspath(__file__))
csv = os.path.join(package_directory, "data", "dosenbach_2010.csv")
out_csv = np.recfromcsv(csv)
# We add the ROI number to its name, since names are not unique
names = out_csv['name']
numbers = out_csv['number']
labels = np.array(['{0} {1}'.format(name, number) for (name, number) in
zip(names, numbers)])
params = dict(rois=out_csv[['x', 'y', 'z']],
labels=labels,
networks=out_csv['network'], description=fdescr)
return Bunch(**params)
开发者ID:hanke,项目名称:nilearn,代码行数:34,代码来源:atlas.py
示例16: selectOnSharpeRatio
def selectOnSharpeRatio(self, ls_symbols, top_n_equities=10):
''' Choose the best portfolio over the stock universe,
according to their sharpe ratio'''
#TODO: change this to a DataAccess utilitie --------------
symbols, files = getAllFromCSV()
datalength = len(recfromcsv(files[0])['close'])
print('Datalength: {}'.format(datalength))
#---------------------------------------------------------
#Initiaing data arrays
closes = np.recarray((datalength,), dtype=[(symbol, 'float') for symbol in symbols])
daily_ret = np.recarray((datalength - 1,), dtype=[(symbol, 'float') for symbol in symbols])
average_returns = np.zeros(len(files))
return_stdev = np.zeros(len(files))
sharpe_ratios = np.zeros(len(files))
cumulative_returns = np.recarray((datalength-1,), dtype=[(symbol, 'float') for symbol in symbols])
# Here is the meat
#TODO: data = dataobj.getData(ls_symbols)
for i, symbol in enumerate(ls_symbols):
if len(data) != datalength:
continue
print('Processing {} file'.format(file))
closes[symbols[i]] = data['close'][::-1]
daily_ret[symbols[i]] = dailyReturns()
# We now can compute:
average_returns[i] = daily_ret[symbols[i]].mean()
return_stdev[i] = daily_ret[symbols[i]].stdev()
sharpe_ratios[i] = (average_returns[i] / return_stdev[i]) * np.sqrt(datalength) # compare to course
print('\tavg: {}, stdev: {}, sharpe ratio: {}'.format(average_returns[i], return_stdev[i], sharpe_ratios[i]))
sorted_sharpe_indices = np.argsort(sharpe_ratios)[::-1][0:top_n_equities]
#TODO: return a disct as {symbol: sharpe_ratio}, or a df with all 3 components
return sorted_sharpe_indices
开发者ID:Mark1988huang,项目名称:ppQuanTrade,代码行数:33,代码来源:portfolio.py
示例17: q3a_pm
def q3a_pm(base_path, csv_fn):
filename = "".join([base_path, csv_fn])
names = ['genre', 'gender', 'movies', 'type', 'age']
my_data = np.recfromcsv(filename, names = ['genre', 'gender', 'movies', 'type', 'age'])
vhs = get_arr_for_col_del(my_data, 'type', names, "VHS")
dvd = get_arr_for_col_del(my_data, 'type', names, "DVD")
bluray = get_arr_for_col_del(my_data, 'type', names, "BLURAY")
names.pop(3)
vhs_f = get_arr_for_col_del(vhs, 'gender', names, "F")
vhs_m = get_arr_for_col_del(vhs, 'gender', names, "M")
dvd_f = get_arr_for_col_del(dvd, 'gender', names, "F")
dvd_m = get_arr_for_col_del(dvd, 'gender', names, "M")
bluray_f = get_arr_for_col_del(bluray, 'gender', names, "F")
bluray_m = get_arr_for_col_del(bluray, 'gender', names, "M")
plot_q3a(vhs_f, names, 'VHS copies', 'Movie distr. F')
plot_q3a(vhs_m, names, 'VHS copies', 'Movie distr. M')
plot_q3a(dvd_f, names, 'DVD copies', 'Movie distr. F')
plot_q3a(dvd_m, names, 'DVD copies', 'Movie distr. M')
plot_q3a(bluray_f, names, 'Bluray copies', 'Movie distr. F')
plot_q3a(bluray_m, names, 'Bluray copies', 'Movie distr. M')
return
开发者ID:FAB4D,项目名称:db-integration,代码行数:30,代码来源:plot_csv_template.py
示例18: show_predictions
def show_predictions(alpha="alpha", symbol="GE", xtn=".PNG"):
if type(alpha) == str:
print ("Loading file named " + alpha + ".mat")
a = mat.loadmat(
alpha + ".mat", mat_dtype=False
) # load a matlab style set of matrices from the file named by the string alpha
if a.has_key(alpha):
alpha = a.get(alpha).reshape(-1) # get the variable with the name of the string in alpha
else:
alpha = a.get(a.keys()[2]).reshape(-1) # get the first non-hidden key and reshape into a 1-D array
print ("Loading financial data for stock symbol", symbol)
r = np.recfromcsv("/home/hobs/Desktop/References/quant/lyle/data/" + symbol + "_yahoo.csv", skiprows=1)
r.sort()
r.high = r.high * r.adj_close / r.close # adjust the high and low prices for stock splits
r.low = r.low * r.adj_close / r.close # adjust the high and low prices for stock splits
daily_returns = r.adj_close[1:] / r.adj_close[0:-1] - 1
predictions = lfilt(alpha, daily_returns)
print (
"Plotting a scatter plot of",
len(daily_returns),
"returns vs",
len(predictions),
"predictions using a filter of length",
len(alpha),
)
(ax, fig) = plot(predictions, daily_returns[len(alpha) :], s="bo", xtn=".PNG")
ax.set_xlabel("Predicted Returns")
ax.set_ylabel("Actual Returns")
big_mask = np.abs(predictions) > np.std(predictions) * 1.2
bigs = predictions[big_mask]
true_bigs = daily_returns[big_mask]
(ax, fig) = plot(bigs, true_bigs, s="r.", xtn=".PNG")
fig.show()
return (predictions, daily_returns, bigs, true_bigs, big_mask)
开发者ID:hobson,项目名称:tagim,代码行数:34,代码来源:finance.py
示例19: graph
def graph():
# parse(MY_FILE, ",")
data = np.recfromcsv('../data/crabs.csv')
trans = []
itrans = []
x = []
i = 1
for row in data:
trans.append(row['trans'])
itrans.append(row['itrans'])
x.append(i)
i += 1
# create the figure
fig = plt.figure(figsize=(7, 3))
# create a grid of 1 row and 1 column for the plot
# gs = mpl.gridspec.GridSpec(1, 1)
# put a plot in the first row, first column
# ax = fig.add_subplots(gs[0])
plt.title('transVSitrans')
plt.plot(x,trans,color='red')
plt.plot(x, itrans, color='blue')
fig.savefig('transVSitrans.png')
开发者ID:susanjiang03,项目名称:Terrestrial_Hydrology_Visualization,代码行数:25,代码来源:transVSitrans.py
示例20: fetch_abide
def fetch_abide(data_dir=None, verbose=0, **kwargs):
"""
"""
exclude_ids = ['UM_1_0050289', 'Yale_0050571', 'KKI_0050822',
'SDSU_0050204', 'CMU_a_0050664']
strategy = 'nofilt_noglobal'
pipeline = 'cpac'
dataset_name = 'ABIDE_pcp'
csv = 'Phenotypic_V1_0b_preprocessed1.csv'
kwargs['qc_rater_1'] = b'OK'
kwargs['qc_anat_rater_2'] = [b'OK', b'maybe']
kwargs['qc_func_rater_2'] = [b'OK', b'maybe']
kwargs['qc_anat_rater_3'] = b'OK'
kwargs['qc_func_rater_3'] = b'OK'
path_csv = os.path.join(data_dir, dataset_name, csv)
with open(path_csv, 'r') as pheno_f:
pheno = ['i' + pheno_f.readline()]
for line in pheno_f:
pheno.append(re.sub(r',(?=[^"]*"(?:[^"]*"[^"]*")*[^"]*$)', ";", line))
# bytes (encode()) needed for python 2/3 compat with numpy
pheno = '\n'.join(pheno).encode()
pheno = BytesIO(pheno)
pheno = np.recfromcsv(pheno, comments='$', case_sensitive=True)
# First, filter subjects with no filename
pheno = pheno[pheno['FILE_ID'] != b'no_filename']
# Apply user defined filters
user_filter = datasets.utils._filter_columns(pheno, kwargs)
pheno = pheno[user_filter]
for id_ in exclude_ids:
pheno = pheno[pheno['FILE_ID'] != id_]
data_dir = os.path.join(data_dir, dataset_name, pipeline, strategy)
results = {}
file_ids = [file_id.decode() for file_id in pheno['FILE_ID']]
ext = '.nii.gz'
derivative = 'func_preproc'
files = []
for file_id in file_ids:
file_ = (file_id + '_' + derivative + ext)
check_file = os.path.join(data_dir, file_)
if os.path.isfile(check_file):
files.append(check_file)
else:
print("File is missing %s" % file_)
results['phenotypic'] = pheno
results[derivative] = files
return Bunch(**results)
开发者ID:KamalakerDadi,项目名称:Data-Processing,代码行数:60,代码来源:datasets.py
注:本文中的numpy.recfromcsv函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论