本文整理汇总了Python中pysnptools.snpreader.Bed类的典型用法代码示例。如果您正苦于以下问题:Python Bed类的具体用法?Python Bed怎么用?Python Bed使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Bed类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self,args):
if args.window_type not in ['BP','SNP']:
raise ValueError('Window type not supported')
bed_1 = Bed(args.bfile) #
af1 = self.get_allele_frequency(bed_1,args) #
print(len(af1), "SNPs in file 1")
snps_1 = (af1>args.maf)&(af1<1-args.maf) #
print(np.sum(snps_1), "SNPs in file 1 after MAF filter")
if (args.from_bp is not None) and (args.to_bp is not None):
k = (bed_1.pos[:,2]>args.from_bp)&(bed_1.pos[:,2]<args.to_bp)
snps_1 = snps_1&k
snps_to_use = bed_1.sid[snps_1]
if args.extract is not None:
keep = np.array([l.strip() for l in open(args.extract,'r')])
snps_to_use = np.intersect1d(snps_to_use,keep)
print(len(snps_to_use),"SNPs remaining after extraction")
bed_1_index = np.sort(bed_1.sid_to_index(snps_to_use)) #
pos = bed_1.pos[bed_1_index] #
bim_1=pd.read_table(bed_1.filename+'.bim',header=None,
names=['chm','id','pos_mb','pos_bp','a1','a2'])
af = af1[bed_1_index] #
if args.afile is not None:
a1 = pd.read_table(args.afile,header=None,sep='\s*',
names=['id1','id2','theta'])
else:
a1 = None
self.af = af
self.M = len(bed_1_index) #
self.windows = self.get_windows(pos,args) #
self.chr = pos[:,0]
self.pos = pos[:,2]
self.id = bed_1.sid[bed_1_index]
self.A1 = bim_1['a1'].loc[bed_1_index]
self.A2 = bim_1['a2'].loc[bed_1_index]
self.scores = self.compute(bed_1,bed_1_index,af,a1,args) #
开发者ID:dtnchen,项目名称:Popcorn,代码行数:35,代码来源:compute.py
示例2: setUpClass
def setUpClass(self):
self.currentFolder = os.path.dirname(os.path.realpath(__file__))
#TODO: get data set with NANs!
snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)
self.pheno_fn = self.currentFolder + "/examples/toydata.phe"
self.snpdata = snpreader.read(order='F',force_python_only=True)
self.snps = self.snpdata.val
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:7,代码来源:test.py
示例3: setUpClass
def setUpClass(self):
currentFolder = os.path.dirname(os.path.realpath(__file__))
self.snp_fn = currentFolder + "/../../tests/datasets/mouse/alldata"
self.pheno_fn = currentFolder + "/../../tests/datasets/mouse/pheno_10_causals.txt"
#self.cov_fn = currentFolder + "/examples/toydata.cov"
# load data
###################################################################
snp_reader = Bed(self.snp_fn)
pheno = pstpheno.loadOnePhen(self.pheno_fn)
#cov = pstpheno.loadPhen(self.cov_fn)
# intersect sample ids
snp_reader, pheno = pysnptools.util.intersect_apply([snp_reader, pheno])
self.G = snp_reader.read(order='C').val
self.G = stdizer.Unit().standardize(self.G)
self.G.flags.writeable = False
self.y = pheno['vals'][:,0]
self.y.flags.writeable = False
# load pcs
#self.G_cov = cov['vals']
self.G_cov = np.ones((len(self.y), 1))
self.G_cov.flags.writeable = False
开发者ID:guokai8,项目名称:FaST-LMM,代码行数:25,代码来源:test.py
示例4: divideData
def divideData(filename,direct,num=5,mph=3,delet=True):
print "Estimating heritability using "+str(num)+" components"
[yFil,sFil]=getData(filename,mph=mph);
n=sFil.iid_count
reOrd=perm(n);
yFil=yFil[reOrd,:];
sFil=sFil[reOrd,:];
div=[int(math.ceil( i*n/float(num) )) for i in range(0,num+1)];
varEsts=[];
for i in range(0,num):
print "For component "+str(i);
sFilTemp=sFil[div[i]:div[i+1],:];
yFilTemp=yFil[div[i]:div[i+1],:];
fileTemp=direct+"/tempFile_"+str(i);
Bed.write(fileTemp,sFilTemp.read());
Pheno.write(fileTemp+".phen",yFilTemp.read())
varEsts.append(varRes(fileTemp,direct));
if delet:
os.system("rm "+direct+"/tempFile_"+str(i)+"*");
return varEsts;
开发者ID:seanken,项目名称:PrivSTRAT,代码行数:30,代码来源:estHerit.py
示例5: test_some_std
def test_some_std(self):
k0 = self.snpdata.read_kernel(standardizer=Unit()).val
from pysnptools.kernelreader import SnpKernel
k1 = self.snpdata.read_kernel(standardizer=Unit())
np.testing.assert_array_almost_equal(k0, k1.val, decimal=10)
from pysnptools.snpreader import SnpData
snpdata2 = SnpData(iid=self.snpdata.iid,sid=self.snpdata.sid,pos=self.snpdata.pos,val=np.array(self.snpdata.val))
s = str(snpdata2)
snpdata2.standardize()
s = str(snpdata2)
snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)
k2 = snpreader.read_kernel(standardizer=Unit(),block_size=500).val
np.testing.assert_array_almost_equal(k0, k2, decimal=10)
from pysnptools.standardizer.identity import Identity
from pysnptools.standardizer.diag_K_to_N import DiagKtoN
for dtype in [sp.float64,sp.float32]:
for std in [Unit(),Beta(1,25),Identity(),DiagKtoN()]:
s = str(std)
np.random.seed(0)
x = np.array(np.random.randint(3,size=[60,100]),dtype=dtype)
x2 = x[:,::2]
x2b = np.array(x2)
#LATER what's this about? It doesn't do non-contiguous?
#assert not x2.flags['C_CONTIGUOUS'] and not x2.flags['F_CONTIGUOUS'] #set up to test non contiguous
#assert x2b.flags['C_CONTIGUOUS'] or x2b.flags['F_CONTIGUOUS'] #set up to test non contiguous
#a,b = std.standardize(x2b),std.standardize(x2)
#np.testing.assert_array_almost_equal(a,b)
logging.info("done")
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:31,代码来源:test.py
示例6: test_match_cpp
def test_match_cpp(self):
'''
match
FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100
'''
logging.info("TestSingleSnp test_match_cpp")
snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"), count_A1=False)
pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
sim_idx = snps.sid_to_index(sim_sid)
test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
test_idx = snps.sid_to_index(test_sid)
for G0,G1 in [(snps[:,sim_idx],KernelIdentity(snps.iid)),(KernelIdentity(snps.iid),snps[:,sim_idx])]:
frame_h2 = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,h2=.5,leave_out_one_chrom=False,count_A1=False)
frame_log_delta = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,log_delta=0,leave_out_one_chrom=False,count_A1=False)
for frame in [frame_h2, frame_log_delta]:
referenceOutfile = TestFeatureSelection.reference_file("single_snp/topsnps.single.txt")
reference = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
assert len(frame) == len(reference)
for _, row in reference.iterrows():
sid = row.SNP
pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
reldiff = abs(row.Pvalue - pvalue)/row.Pvalue
assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(sid,None,row.Pvalue,pvalue,reldiff)
开发者ID:MicrosoftGenomics,项目名称:FaST-LMM,代码行数:27,代码来源:test_single_snp.py
示例7: read_plink
def read_plink(self, fn_plink = None):
"""
plink reader
"""
PL = Bed(fn_plink)
PLOB = PL.read()
self.GT = PLOB.val
self.POS = PLOB.pos[:,[0,1]]
self.SID = PLOB.iid[:,1]
self.isNormalised = False
开发者ID:kuod,项目名称:pygcta,代码行数:10,代码来源:genotypes.py
示例8: process_data
def process_data(input_path, output_path, name):
snpreader = Bed(os.path.join(input_path, name))
data = snpreader.read()
values = data.val
preproc_vals = pysnp_genpreproc(values)
assert(np.any(np.isnan(preproc_vals)) == False)
saved = os.path.join(output_path, name + ".h5py")
path, keys = h5_save(path=saved, data_obj={name:preproc_vals}, dt='f')
return {'n_subjects':data.iid_count, 'subject_ids':data.iid,
'n_snps':data.sid_count, 'snp_ids':data.sid,
'data_preprocessed_location': {'path':path, 'key':keys}}
开发者ID:YSanchezAraujo,项目名称:genus,代码行数:11,代码来源:utils.py
示例9: factory
def factory(snpreader, num_snps_in_memory, standardizer, blocksize):
if isinstance(snpreader, str):
snpreader = Bed(snpreader)
if num_snps_in_memory >= snpreader.sid_count:
in_memory = InMemory(snpreader.read(order='C').standardize(standardizer), standardizer, blocksize)
in_memory._snpreader.val.flags.writeable = False
in_memory._val = in_memory._snpreader.val
return in_memory
else:
return FromDisk(snpreader, num_snps_in_memory, standardizer, blocksize, None)
开发者ID:42binwang,项目名称:FaST-LMM,代码行数:11,代码来源:feature_selection_cv.py
示例10: test_write_x_x_cpp
def test_write_x_x_cpp(self):
snpreader = Bed(self.currentFolder + "/examples/toydata")
for order in ['C','F']:
for dtype in [np.float32,np.float64]:
snpdata = snpreader.read(order=order,dtype=dtype)
snpdata.val[-1,0] = float("NAN")
output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
create_directory_if_necessary(output)
Bed.write(snpdata, output)
snpdata2 = Bed(output).read()
assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?
开发者ID:amcdavid,项目名称:PySnpTools,代码行数:11,代码来源:test.py
示例11: test_write_x_x_cpp
def test_write_x_x_cpp(self):
snpreader = Bed(self.currentFolder + "/examples/toydata")
for order in ['C','F']:
for dtype in [np.float32,np.float64]:
snpdata = snpreader.read(order=order,dtype=dtype)
snpdata.val[-1,0] = float("NAN")
output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
create_directory_if_necessary(output)
Bed.write(output, snpdata)
snpdata2 = Bed(output).read()
np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)
开发者ID:MMesbahU,项目名称:PySnpTools,代码行数:11,代码来源:test.py
示例12: test_subset_view
def test_subset_view(self):
snpreader2 = Bed(self.currentFolder + "/examples/toydata",count_A1=False)[:,:]
result = snpreader2.read(view_ok=True)
self.assertFalse(snpreader2 is result)
result2 = result[:,:].read()
self.assertFalse(sp.may_share_memory(result2.val,result.val))
result3 = result[:,:].read(view_ok=True)
self.assertTrue(sp.may_share_memory(result3.val,result.val))
result4 = result3.read()
self.assertFalse(sp.may_share_memory(result4.val,result3.val))
result5 = result4.read(view_ok=True)
self.assertTrue(sp.may_share_memory(result4.val,result5.val))
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:12,代码来源:test.py
示例13: test_npz
def test_npz(self):
logging.info("in test_npz")
snpreader = Bed(self.currentFolder + "/../examples/toydata",count_A1=False)
kerneldata1 = snpreader.read_kernel(standardizer=stdizer.Unit())
s = str(kerneldata1)
output = "tempdir/kernelreader/toydata.kernel.npz"
create_directory_if_necessary(output)
KernelNpz.write(output,kerneldata1)
kernelreader2 = KernelNpz(output)
kerneldata2 = kernelreader2.read()
np.testing.assert_array_almost_equal(kerneldata1.val, kerneldata2.val, decimal=10)
logging.info("done with test")
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:12,代码来源:test.py
示例14: main
def main(args):
print('reading seeed snps')
seed_snps = pd.read_csv(args.seed_snps, header=None, names=['SNP'], index_col='SNP')
seed_snps['ibs_length'] = 0
seed_snps['ibd'] = 0
print('reading typed snps')
typed_snps = pd.read_csv(args.typed_snps, header=None, names=['SNP'])
print('reading genotypes')
data = Bed(args.bfile)
X = data.read().val
typed_snps_indices = np.sort(data.sid_to_index(typed_snps.SNP))
typed_snps_bp = data.col_property[typed_snps_indices,2]
print(len(seed_snps), 'snps in list')
print(data.iid_count, data.sid_count, 'are dimensions of X')
def analyze_snp(i):
# find first typed snp after query snp
snp_bp = data.col_property[i,2]
v = np.where(typed_snps_bp > snp_bp)[0]
if len(v) > 0:
typed_i = v[0]
else:
typed_i = len(typed_snps_indices)-1
n1, n2 = np.where(X[:,i] == 1)[0]
if (X[n1,typed_snps_indices[typed_i]] - X[n2, typed_snps_indices[typed_i]])**2 == 4:
return 0, 0
typed_il, typed_ir = fis.find_boundaries(
X[n1,typed_snps_indices],
X[n2,typed_snps_indices],
typed_i)
typed_ir -= 1
il = typed_snps_indices[typed_il]
ir = typed_snps_indices[typed_ir]
cM = data.col_property[ir, 1] - \
data.col_property[il, 1]
ibd = (np.mean(X[n1,il:ir] == X[n2,il:ir]) > 0.99)
return cM, int(ibd)
for (i, snp) in iter.show_progress(
it.izip(data.sid_to_index(seed_snps.index), seed_snps.index),
total=len(seed_snps)):
# total=10):
seed_snps.ix[snp, ['ibs_length', 'ibd']] = analyze_snp(i)
print(seed_snps.iloc[:100])
seed_snps.to_csv(args.outfile, sep='\t')
开发者ID:hilaryfinucane,项目名称:ibd,代码行数:52,代码来源:analyze_snps.py
示例15: test_subset
def test_subset(self):
logging.info("in test_subset")
snpreader = Bed(self.currentFolder + "/../examples/toydata",count_A1=False)
snpkernel = SnpKernel(snpreader,stdizer.Unit())
krsub = snpkernel[::2,::2]
kerneldata1 = krsub.read()
expected = snpreader.read_kernel(stdizer.Unit())[::2].read()
np.testing.assert_array_almost_equal(kerneldata1.val, expected.val, decimal=10)
krsub2 = snpkernel[::2]
kerneldata2 = krsub2.read()
np.testing.assert_array_almost_equal(kerneldata2.val, expected.val, decimal=10)
logging.info("done with test")
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:13,代码来源:test.py
示例16: too_slow_test_write_bedbig
def too_slow_test_write_bedbig(self):
iid_count = 100000
sid_count = 50000
from pysnptools.snpreader import SnpData
iid = np.array([[str(i),str(i)] for i in range(iid_count)])
sid = np.array(["sid_{0}".format(i) for i in range(sid_count)])
pos = np.array([[i,i,i] for i in range(sid_count)])
np.random.seed(0)
snpdata = SnpData(iid,sid,np.zeros((iid_count,sid_count)),pos=pos) #random.choice((0.0,1.0,2.0,float("nan")),size=(iid_count,sid_count)))
output = "tempdir/bedbig.{0}.{1}".format(iid_count,sid_count)
create_directory_if_necessary(output)
Bed.write(output, snpdata, count_A1=False)
snpdata2 = Bed(output,count_A1=False).read()
np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:14,代码来源:test.py
示例17: too_slow_test_write_bedbig
def too_slow_test_write_bedbig(self):
iid_count = 100000
sid_count = 50000
from pysnptools.snpreader.snpdata import SnpData #!!! promote on level up innamespace
iid = np.array([[str(i),str(i)] for i in xrange(iid_count)])
sid = np.array(["sid_{0}".format(i) for i in xrange(sid_count)])
pos = np.array([[i,i,i] for i in xrange(sid_count)])
np.random.seed = 0
snpdata = SnpData(iid,sid,pos,np.zeros((iid_count,sid_count))) #random.choice((0.0,1.0,2.0,float("nan")),size=(iid_count,sid_count)))
output = "tempdir/bedbig.{0}.{1}".format(iid_count,sid_count)
create_directory_if_necessary(output)
Bed.write(snpdata, output)
snpdata2 = Bed(output).read()
assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?
开发者ID:amcdavid,项目名称:PySnpTools,代码行数:14,代码来源:test.py
示例18: main
def main():
"""
example that compares output to fastlmmc
"""
# set up data
phen_fn = "../feature_selection/examples/toydata.phe"
snp_fn = "../feature_selection/examples/toydata.5chrom"
#chrom_count = 5
# load data
###################################################################
snp_reader = Bed(snp_fn)
pheno = pstpheno.loadOnePhen(phen_fn)
cov = None
#cov = pstpheno.loadPhen(self.cov_fn)
snp_reader, pheno, cov = intersect_apply([snp_reader, pheno, cov])
G = snp_reader.read(order='C').val
G = stdizer.Unit().standardize(G)
G.flags.writeable = False
y = pheno['vals'][:,0]
y.flags.writeable
# load pcs
#G_pc = cov['vals']
#G_pc.flags.writeable = False
delta = 2.0
gwas = WindowingGwas(G, y, delta=delta)
pv = gwas.run_gwas()
from fastlmm.association.tests.test_gwas import GwasTest
REML = False
snp_pos_sim = snp_reader.sid
snp_pos_test = snp_reader.sid
os.environ["FastLmmUseAnyMklLib"] = "1"
gwas_c = GwasTest(snp_fn, phen_fn, snp_pos_sim, snp_pos_test, delta, REML=REML, excludeByPosition=0)
gwas_c.run_gwas()
import pylab
pylab.plot(np.log(pv), np.log(gwas_c.p_values), "+")
pylab.plot(np.arange(-18, 0), np.arange(-18,0), "-k")
pylab.show()
np.testing.assert_array_almost_equal(np.log(pv), np.log(gwas_c.p_values), decimal=3)
simple_manhattan_plot(pv)
开发者ID:bdepardo,项目名称:FaST-LMM,代码行数:50,代码来源:windowing_gwas.py
示例19: test_write_bed_f64cpp_5_python
def test_write_bed_f64cpp_5_python(self):
snpreader = Bed(self.currentFolder + "/examples/toydata")
iid_index = 5
logging.info("iid={0}".format(iid_index))
#if snpreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
# snpreader = snpreader[0:-1,:]
#assert snpreader.iid_count % 4 != 0
snpdata = snpreader[0:iid_index,:].read(order='F',dtype=np.float64)
if snpdata.iid_count > 0:
snpdata.val[-1,0] = float("NAN")
output = "tempdir/toydata.F64python.{0}".format(iid_index)
create_directory_if_necessary(output)
Bed.write(snpdata, output,force_python_only=True)
snpdata2 = Bed(output).read()
assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?
开发者ID:amcdavid,项目名称:PySnpTools,代码行数:15,代码来源:test.py
示例20: test_SNC
def test_SNC(self):
logging.info("TestSNC")
test_snps = self.bedbase
pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
covar = pstpheno.loadPhen(self.cov_fn)
bed = Bed(test_snps, count_A1=False)
snc = bed.read()
snc.val[:,2] = [0] * snc.iid_count # make SNP #2 have constant values (aka a SNC)
output_file_name = self.file_name("snc")
frame = single_snp(test_snps=snc[:,:10], pheno=pheno, G0=snc, mixing=0,leave_out_one_chrom=False,
covar=covar, output_file_name=output_file_name,count_A1=False
)
self.compare_files(frame,"snc")
开发者ID:MicrosoftGenomics,项目名称:FaST-LMM,代码行数:15,代码来源:test_single_snp.py
注:本文中的pysnptools.snpreader.Bed类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论