本文整理汇总了Python中regex.match函数的典型用法代码示例。如果您正苦于以下问题:Python match函数的具体用法?Python match怎么用?Python match使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了match函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, text, lv=None, lc=None, vc=None):
if isinstance(text, Ex):
self._text = text.text
self._lc = text.lc
self._vc = text.vc
elif not isinstance(text, str):
raise TypeError("text must be string")
else:
self._text = text
if lv:
if re.match(r'^[a-z]{3}-[0-9]{3,}$', lv):
self._lc = lv[:3]
self._vc = int(lv[4:])
else: raise ValueError("lv must be in the format xxx-000")
elif lc and (vc != None):
lc = lc.lower()
if re.match(r'^[a-z]{3}$', lc):
self._lc = lc
else: raise ValueError("lc must be a 3-letter ISO 639 code")
try:
vc = int(vc)
if vc < 0: raise ValueError("vc must be a positive integer")
self._vc = vc
except ValueError: raise ValueError("vc must be a positive integer")
else:
raise TypeError("{cls} requires lv".format(cls=self.__class__.__name__))
开发者ID:longnow,项目名称:panlex-tools,代码行数:26,代码来源:panlex.py
示例2: get_next_document
def get_next_document(h):
while True:
l = h.readline()
if not l:
doc = None
break
l = l.decode('utf-8').strip()
if not l:
continue
if re.match(u'^<doc ', l, re.UNICODE):
# Fix _unk_.
l = re.sub(r'_unk_', 'unknown', l)
# Forum detection.
if re.match(RE_FORUM, l, re.UNICODE):
l = re.sub(u'>$', r' forum="1">', l, re.UNICODE)
else:
l = re.sub(u'>$', r' forum="0">', l, re.UNICODE)
# Host and tld extraction.
l = re.sub(r'( url="https{0,1}://)([^/]+)\.([a-z]{2,4})(|/|%)([^"]*")', r'\1\2.\3\4\5 urldomain="\2.\3" tld="\3"', l)
# Fix some known problems in doc attr values.
l = re.sub(r'=" +"', r'="unknown"', l) # fix: attr=" "
l = re.sub(r'="([^"]+)\\" ', r'="\1" ', l) # fix: attr="val\"
doc = [l]
else:
doc = doc + [l]
if re.match(u'^</doc>', l, re.UNICODE):
break
return doc
开发者ID:rsling,项目名称:cow,代码行数:34,代码来源:cow16-finalize-es.py
示例3: guess_split
def guess_split(majiribun, reading):
kanjis=[]
matchreg_greedy=''
matchreg_nongreedy=''
for char in majiribun:
if kanji_re.match(char):
kanjis.append(char)
matchreg_greedy += "(\p{Hiragana}+)"
matchreg_nongreedy += "(\p{Hiragana}+?)"
else:
matchreg_greedy += re.escape(char)
matchreg_nongreedy += re.escape(char)
m = re.match(matchreg_greedy + '$', reading)
if m:
yomis = m.groups()
yomis_nongreedy = re.match(matchreg_nongreedy + '$', reading).groups()
if yomis != yomis_nongreedy:
# Ambiguous!
return None
d = {}
for idx in range(0, len(kanjis)):
d[kanjis[idx]] = yomis[idx]
return(d)
开发者ID:leoboiko,项目名称:yomisplit,代码行数:25,代码来源:__init__.py
示例4: process_file
def process_file(file_path, tagger, idf_doc_count, idf_table, threshold, maximum_words):
"""
Takes the uploaded file, detecs its type (plain text, alto XML, zip)
and calls a parsing function accordingly. If everything succeeds it
returns keywords and 200 code, returns an error otherwise.
"""
file_info = magic.from_file(file_path)
lines = []
if re.match("^UTF-8 Unicode (with BOM) text", file_info):
lines = lines_from_txt_file(file_path, encoding='utf-8-sig')
elif re.match("^UTF-8 Unicode", file_info):
lines = lines_from_txt_file(file_path, encoding='utf-8')
elif re.match("^ASCII text", file_info):
lines = lines_from_txt_file(file_path, encoding='utf-8')
elif re.match('^XML 1.0 document', file_info) and \
(file_path.endswith('.alto') or file_path.endswith('.xml')):
lines = lines_from_alto_file(file_path)
elif re.match('^Zip archive data', file_info):
lines = lines_from_zip_file(file_path)
else:
return {"eror": "Unsupported file type: {}".format(file_info)}, 400
if not lines:
return {"error": "Empty file"}, 400
return keywords.get_keywords(lines, tagger, idf_doc_count, idf_table, threshold, maximum_words), 200
开发者ID:ufal,项目名称:ker,代码行数:25,代码来源:server.py
示例5: __init__
def __init__(self, room, s, negative=True):
"""
парсит выражения типа '/5m jid [email protected]', 'nick exp regexp', etc.
короче в стиле глюкса
"""
self.room = room
self.negative = negative
self.end_time, s = fetch_time(s)
if s.count('||'): s, self.reason = s[:s.find('||')].strip(), s[s.find('||')+2:].strip()
else: s, self.reason = s.strip(), ''
if s.lower().startswith('jid '):
self.by_jid = True
s = s[4:].lower()
if not s: raise ValueError
elif s.lower().startswith('nick '):
self.by_jid = False
s = s[5:]
if not s: raise ValueError
else:
self.by_jid = True
self.regexp = False
item = room.get(s, None)
if item:
if item.jid == item.realjid: raise NoJID(item.jid)
else: self.value = item.realjid.lower()
else: raise NickNotFound(s)
return
if s.lower().startswith('exp '):
self.regexp = True
s = s[4:]
try: regex.match(s, '[email protected]')
except: raise MyRegexpError(s)
else: self.regexp = False
self.value = s
开发者ID:TLemur,项目名称:freq-bot,代码行数:34,代码来源:alists.py
示例6: testRegex
def testRegex(self):
# Basic match, beginning of string
self.assertEqual(1, match("foo", "foobar"))
# Basic match, middle of string
self.assertEqual(1, match("oba", "foobar"))
# Basic match, no match
self.assertEqual(0, match("obo", "foobar"))
# Match with start qualifier
self.assertEqual(1, match("^fo", "foobar"))
# Match with start qualifier in body
self.assertEqual(0, match("^bar", "foobar"))
# Match with end qualifier
self.assertEqual(1, match("bar$", "foobar"))
# Match with end qualifier in body
self.assertEqual(0, match("foo$", "foobar"))
# Match with optional qualifier
self.assertEqual(1, match("fo*b", "foobar"))
# Match with optional qualifier 2
self.assertEqual(1, match("fooa*b", "foobar"))
# Match with optional qualifier 3
self.assertEqual(1, match("a*foo", "foobar"))
开发者ID:heuristicfencepost,项目名称:beautiful_code,代码行数:31,代码来源:regex_test.py
示例7: parseaddr
def parseaddr(address):
# This is probably not perfect
address = string.strip(address)
# Case 1: part of the address is in <[email protected]> form.
pos = regex.search('<.*>', address)
if pos >= 0:
name = address[:pos]
address = address[pos:]
length = regex.match('<.*>', address)
name = name + address[length:]
address = address[:length]
else:
# Case 2: part of the address is in (comment) form
pos = regex.search('(.*)', address)
if pos >= 0:
name = address[pos:]
address = address[:pos]
length = regex.match('(.*)', name)
address = address + name[length:]
name = name[:length]
else:
# Case 3: neither. Only an address
name = ''
name = string.strip(name)
address = string.strip(address)
if address and address[0] == '<' and address[-1] == '>':
address = address[1:-1]
if name and name[0] == '(' and name[-1] == ')':
name = name[1:-1]
return name, address
开发者ID:asottile,项目名称:ancient-pythons,代码行数:30,代码来源:rfc822.py
示例8: test_yaml
def test_yaml(md_filepath):
filestring = md_filepath.read()
reg = regex.compile(r'^---(.*?)---',flags=regex.DOTALL)
match = regex.search(reg, filestring)
if not match: pytest.skip('No YAML header')
yaml_text = match.group(1)
parsed_yaml = yaml.load(yaml_text)
for requirement in requirements:
req = requirements[requirement]
if req['required']:
assert requirement in parsed_yaml, 'YAML metadata missing required element: ' + requirement
if req['type'] == 'link':
# Check external links have balanced brackets
regexp = regex.compile(r'\[(.*)\]\((.*)\)')
assert regex.match(regexp,parsed_yaml[requirement]), 'YAML metadata formatting error: ' + requirement
if req['type'] == 'date' and requirement in parsed_yaml:
try:
d = parse(str(parsed_yaml[requirement]))
except ValueError:
assert False, 'YAML metadata formatting error: ' + requirement + ' date parse failed.'
regexp = regex.compile(r'20[0-9]{2}-[0-9]{2}-[0-9]{2}')
assert regex.match(regexp,str(parsed_yaml[requirement])), 'YAML metadata formatting error: ' + requirement + ' should use the format YYYY-MM-DD.'
for header in parsed_yaml:
assert header in requirements, 'YAML metadata header ' + header + ' is not a valid metadata type.'
开发者ID:Jonchun,项目名称:docs,代码行数:27,代码来源:test_yaml.py
示例9: faiordict2contigorder
def faiordict2contigorder(file_name, file_format):
'''Takes either a .fai or .dict file, and return a contig order dictionary, i.e., chrom_seq['chr1'] == 0'''
assert file_format in ('fai', 'dict')
contig_sequence = []
with open(file_name) as gfile:
line_i = gfile.readline().rstrip('\n')
while line_i:
if file_format == 'fai':
contig_match = re.match(r'([^\t]+)\t', line_i)
elif file_format == 'dict':
if line_i.startswith('@SQ'):
contig_match = re.match(r'@SQ\tSN:([^\t]+)\tLN:', line_i)
if contig_match:
contig_i = contig_match.groups()[0].split(' ')[0]
# some .fai files have space after the contig for descriptions.
contig_sequence.append( contig_i )
line_i = gfile.readline().rstrip('\n')
chrom_seq = {}
for n,contig_i in enumerate(contig_sequence):
chrom_seq[contig_i] = n
return chrom_seq
开发者ID:razZ0r,项目名称:somaticseq,代码行数:30,代码来源:vcf.py
示例10: acroize_heading
def acroize_heading(m):
acro = text.get('acronym')
if not acro:
return m[0]
heading = m[2]
if not heading:
return acro
m2 = regex.match(r'(\d+(?:–(\d+))?)(?:\.)?\s*(.*)$', heading)
if not m2:
h_text = heading
else:
h_num = m2[1]
h_text = m2[3]
m3 = regex.match(r'(.*?)(\d+(?:–(\d+))?)$', text['acronym'])
acro_prefix = m3[1]
acro_num = m3[2]
if acro_num == h_num:
heading = h_text
elif '–' in acro_num and h_num:
acro = acro_prefix + h_num
heading = h_text
new_heading = f'<span class="acro">{acro}</span>{": " if h_text else ""}{h_text}'
return f'{m[1]}{new_heading}'
开发者ID:suttacentral,项目名称:suttacentral,代码行数:28,代码来源:make_html.py
示例11: process_lines
def process_lines(lines, NONBREAKING_PREFIX):
# loop text, add lines together until we get a blank line or a <p>
out_text = ''
text = ""
for line in lines:
line = line.strip()
m = re_tag.match(line)
if m is None:
m = regex.match('^\s*$', line)
if m is not None:
# time to process this block, we've hit a blank or <p>
out_text += do_it_for(text, line, NONBREAKING_PREFIX)
if regex.match('^\s*$', line) and len(text): ##if we have text followed by <P>
out_text += "<P>\n"
text = ""
else:
# append the text, with a space
text += line + " "
# do the leftover text
if len(text):
out_text += do_it_for(text, "", NONBREAKING_PREFIX)
return out_text
开发者ID:xiaolanchong,项目名称:sentence_splitter,代码行数:25,代码来源:splitter.py
示例12: create_activation
def create_activation(data, labels, standard_cols, group_labels=[]):
activation = database.Activation()
for i, col in enumerate(data):
# Cast to integer or float if appropriate
# if regex.match('[-\d]+$', col):
# col = int(col)
# elif regex.match('[-\d\.]+$', col):
# col = float(col)
# Set standard attributes if applicable and do validation where appropriate.
# Generally, validation will not prevent a bad value from making it into the
# activation object, but it will flag any potential issues using the "problem" column.
if standard_cols[i] is not None:
sc = standard_cols[i]
# Validate XYZ columns: Should only be integers (and possible trailing decimals).
# If they're not, keep only leading numbers. The exception is that ScienceDirect
# journals often follow the minus sign with a space (e.g., - 35), which we strip.
if regex.match('[xyz]$', sc):
m = regex.match('(-)\s+(\d+\.*\d*)$', col)
if m:
col = "%s%s" % (m.group(1), m.group(2))
if not regex.match('(-*\d+)\.*\d*$', col):
logging.debug("Value %s in %s column is not valid" % (col, sc))
activation.problems.append("Value in %s column is not valid" % sc)
# col = regex.search('(-*\d+)', col).group(1)
return activation
col = (float(col))
elif sc == 'region':
if not regex.search('[a-zA-Z]', col):
logging.debug("Value in region column is not a string")
activation.problems.append("Value in region column is not a string")
setattr(activation, sc, col)
# Always include all columns in record
activation.add_col(labels[i], col)
# Handle columns with multiple coordinates (e.g., 45;12;-12).
# Assume that any series of 3 numbers in a non-standard column
# reflects coordinates. Will fail if there are leading numbers!!!
# Also need to remove space between minus sign and numbers; some ScienceDirect
# journals leave a gap.
if not i in standard_cols:
cs = '([\-\.\s]*\d{1,3})'
m = regex.search('%s[,;\s]+%s[,;\s]+%s' % (cs, cs, cs), unicode(col).strip())
if m:
x, y, z = [regex.sub('-\s+', '-', c) for c in [m.group(1), m.group(2), m.group(3)]]
logger.info("Found multi-coordinate column: %s\n...and extracted: %s, %s, %s" % (col, x, y, z))
activation.set_coords(x, y, z)
activation.groups = group_labels
return activation
开发者ID:iceberg273,项目名称:ACE,代码行数:59,代码来源:tableparser.py
示例13: parse_line
def parse_line(line, perv_url):
if not line or len(line.strip()) == 0:
raise ValueError("STR_EMPTY")
line = line.strip()
spt = line.split('-')
if len(spt) == 3:
name_1 = spt[0]
name_2 = spt[1]
attrs = spt[2]
attrs_spt = attrs.split(',')
if not (len(attrs_spt) == 2 or (len(attrs_spt) == 1 and perv_url)):
raise ValueError("STR_ENTRY_EMPTY")
if not name_1 \
or not name_2 \
or not regex.match("^["+_cryllic+"\s]+$", name_1)\
or not regex.match("^["+_cryllic+"\s]+$", name_2)\
or len(name_1.split(' ')) != 2\
or len(name_2.split(' ')) != 2:
raise ValueError("STR_NAME_FORMAT")
if name_1 == name_2:
raise ValueError("STR_SAME_NAMES")
if len(attrs_spt) == 2 and perv_url:
raise ValueError("STR_TAG_FORMAT")
if not regex.match("^(?!\.)["+_cryllic+"\.]+(?<!\.)$", attrs_spt[0]):
raise ValueError("STR_TAG_FORMAT")
link_types = attrs_spt[0].split('.')
if filter(lambda x: not x, link_types):
raise ValueError("STR_TAG_FORMAT")
arr = collections.Counter(link_types)
doubled_tags = set(i for i in arr if arr[i]>1)
if len(doubled_tags) != 0:
raise ValueError("STR_TAG_DOUBLED:" + ",".join(doubled_tags))
url = attrs_spt[1] if len(attrs_spt) == 2 else perv_url
if not regex.match("http://[\w\.]+/[\w]+$", url):
raise ValueError("STR_LINK_FORMAT")
"""
sim_names = list(es.get_similar_names([name_1, name_2]))
if isinstance(sim_names[0], basestring):
raise ValueError(u"STR_SIMILAR_NAME:{},{}".format(name_1,sim_names[0]))
if isinstance(sim_names[1], basestring):
raise ValueError(u"STR_SIMILAR_NAME:{},{}".format(name_2,sim_names[1]))
tags = filter(lambda x: not x[1], zip(link_types, es.check_tags(link_types)))
if len(tags) != 0:
raise ValueError(u"STR_TAG_NOT_FOUND:{}".format(",".join(map(lambda x: x[0], tags))))
"""
return (name_1, name_2, link_types, url)
else:
raise ValueError("STR_FORMAT")
开发者ID:baio,项目名称:knit-service,代码行数:59,代码来源:line2bucket.py
示例14: test_zero_or_one
def test_zero_or_one(self):
p = regex.build_regex("ba?")
result = regex.match(p, "ba")
self.assertTrue(result)
result = regex.match(p, "b")
self.assertTrue(result)
result = regex.match(p, "aa")
self.assertFalse(result)
开发者ID:Felttrip,项目名称:PythonRegex,代码行数:8,代码来源:test_regex.py
示例15: process_marked_lines
def process_marked_lines(lines, markers, return_flags=[False, -1, -1]):
"""Run regexes against message's marked lines to strip quotations.
Return all but the last quoted segment if it exists.
>>> mark_message_lines(['Hello', 'From: [email protected]', '', '> Hi', 'tsem'])
['Hello']
Also returns return_flags.
return_flags = [were_lines_deleted, first_deleted_line,
last_deleted_line]
"""
# Pre-process marker sequence
# if there are no splitter there should be no markers. However, allow markers if more than 3!
if 's' not in markers and not re.search('(me*){3}', markers):
markers = markers.replace('m', 't')
# Look for forwards (don't remove anything on a forward)
# if there is an f before the first split, then it's a forward.
if re.match('[te]*f', markers):
return_flags[:] = [False, -1, -1]
return lines
# Remove last quoted segment
# match from the end of the markers list
markers.reverse()
# match for unmarked quote following split
quotation = re.match(r'e*(te*)+(se*)+', markers)
if not quotation:
# match for inline replies
if re_orig.match(r'e*[mfts]*((te*)+(me*)+)+[mfts]*((se*)+|(me*){2,})', markers):
return_flags[:] = [False, -1, -1]
return lines
# match for normal reply with quote
quotation = re_orig.match(r'e*(me*)+[mefts]*((se*)+|(me*){2,})', markers)
if not quotation:
# match for normal reply with quote and signature below quote
if re.match(r'e*(te*)+(me*)+.*(s)+e*(te*)+', markers):
quotation = re.match(r'e*(te*)+(me*)+.*(s)+', markers)
markers.reverse()
# If quotation, return it
if quotation:
start = len(markers) - quotation.end() + 1
end = len(markers) - quotation.start() - 1
return_flags[:] = True, start, end
return lines[:start] + lines[end:]
return_flags[:] = [False, -1, -1]
return lines
开发者ID:mgontav,项目名称:talon,代码行数:58,代码来源:quotations.py
示例16: printDiseaseClass
def printDiseaseClass(outStream,line):
outStream.write('<%s/%s> \n\ta %s;\n' % (Namespace, line[0], visumpointGene.VP_PharmGKBDisease))
outStream.write('\trdfs:label "%s"^^xsd:string ;\n' % (visumpointGene.strip(line[1])))
outStream.write('\tskos:prefLabel "%s"^^xsd:string ;\n' % (visumpointGene.strip(line[1])))
trip = ""
useName = ""
if len(line[2]) > 0 :
for name in line[2].split(','):
if name.startswith("\"") and name.endswith("\""):
useName = name
elif name.startswith("\""):
trip = name
elif name.endswith("\""):
useName = (trip + "," +name).strip("\"")
trip = ""
else :
useName = name
if len(useName) > 0:
visumpointGene.printAlternativeName(outStream, "", visumpointGene.strip(useName), visumpointGene.VP_PharmGKB,"\t\t")
useName = ""
db_id_type_strB = "(.+):(.+)\((.+)\[(.+)/(.+)\]"
db_id_PP = r'(.+):(.+)\(([^()]++)\((.*)\)+'
strExp = "(.+):(.+)\((.+)"
if len(line[4]) > 0 :
for name in line[4].split('),'):
#db_id_type_strB = "(.+):(.+)\((.+)\[(.+)/(.+)\]"
try:
match = regex.match(db_id_type_strB, name, regex.M|regex.I)
if (match):
visumpointGene.printCrossReference(outStream, dbReference(match.group(1)), visumpointGene.strip(match.group(2)), visumpointGene.VP_PharmGKB,"\t\t")
else :
#db_id_PP = r'(.+):(.+)\(([^()]++)\((.*)\)+'
try:
match1 = regex.match( db_id_PP, name, re.M|re.I)
if (match1):
visumpointGene.printCrossReference(outStream, dbReference(match1.group(1)), visumpointGene.strip(match1.group(2)), visumpointGene.VP_PharmGKB,"\t\t")
else :
#strExp = "(.+):(.+)\((.+)"
try:
match2 = regex.match( strExp, name, re.M|re.I)
if(match2):
visumpointGene.printCrossReference(outStream, dbReference(match2.group(1)), visumpointGene.strip(match2.group(2)), visumpointGene.VP_PharmGKB,"\t\t")
else :
outStream.write("#ERROR : processing External References : %s\n" % name)
outStream.write("#\t\t %s\n" % line[4])
except:
e = sys.exc_info()[0]
print( "Error: %s" % e )
except:
e = sys.exc_info()[0]
print( "Error: %s" % e )
except:
e = sys.exc_info()[0]
print( "Error: %s" % e )
outStream.write('\t. # %s %s\n\n' % (line[1], line[0]))
开发者ID:robertlario,项目名称:Allegrograph,代码行数:57,代码来源:PharmGKB.py
示例17: test_match_many
def test_match_many(self):
p = regex.build_regex("ab[cde]fg")
result = regex.match(p, "abcfg")
self.assertTrue(result)
result = regex.match(p, "abdfg")
self.assertTrue(result)
result = regex.match(p, "abefg")
self.assertTrue(result)
result = regex.match(p, "abfg")
self.assertFalse(result)
开发者ID:Felttrip,项目名称:PythonRegex,代码行数:10,代码来源:test_regex.py
示例18: hey
def hey(what):
what = what.strip()
plain = regex.sub(u'[\s{}]+'.format(regex.escape(punctuation)), '', what)
if not regex.match(u'^[\d\s]+$', plain):
if regex.match(u'^\s*$', what):
return 'Fine. Be that way!'
if regex.match(u'^[\d\s\p{Lu}]+$', plain):
return 'Whoa, chill out!'
if what.endswith('?'):
return 'Sure.'
return 'Whatever.'
开发者ID:itsolutionscorp,项目名称:AutoStyle-Clustering,代码行数:12,代码来源:7450de32a1174f55afc8726fe24b9588.py
示例19: catchup
def catchup(coordinate_i, line_j, filehandle_j, chrom_sequence):
'''
Keep reading the j_th vcf file until it hits (or goes past) the i_th coordinate, at which time the function stops reading and you can do stuff.
Returns (True, Vcf_line_j) if the j_th vcf file contains an entry that matches the i_th coordinate.
Returns (False, Vcf_line_j) if the j_th vcf file does not contain such an entry, and therefore the function has run past the i_th coordinate, by which time the programmer can decide to move into the next i_th coordiate.
'''
coordinate_j = re.match( pattern_chr_position, line_j )
if coordinate_j:
coordinate_j = coordinate_j.group()
else:
coordinate_j = ''
# Which coordinate is behind?
is_behind = whoisbehind( coordinate_i, coordinate_j, chrom_sequence )
# The file_j is already ahead, return the same line_j, but tag it "False"
if is_behind == 0:
reporter = (False, line_j)
# The two coordinates are the same, return the same line_j, but tag it
# "True"
elif is_behind == 10:
reporter = (True, line_j)
# If file_j is behind, then needs to catch up:
elif is_behind == 1:
# Keep at it until line_j is no longer behind:
while is_behind == 1:
# Catch up
line_j = filehandle_j.readline().rstrip()
next_coord = re.match( pattern_chr_position, line_j )
if next_coord:
coordinate_j = next_coord.group()
else:
coordinate_j = ''
is_behind = whoisbehind( coordinate_i, coordinate_j, chrom_sequence )
# If file_j has caught up exactly to the position of coordinate_i:
if is_behind == 10:
reporter = (True, line_j)
# If file_j has run past coordinate_i:
elif is_behind == 0:
reporter = (False, line_j)
return reporter
开发者ID:razZ0r,项目名称:somaticseq,代码行数:53,代码来源:vcf.py
示例20: readDrugLabelsFile
def readDrugLabelsFile(infilename):
drug = ""
genes = ""
x = 0
table = {}
try:
with open(infilename, 'r') as f:
lineRaw = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
for line in lineRaw:
if x > START:
print("Drug Labels : %s - %s" % (x,line[1]))
try:
match = regex.match(r'FDA Label for ([a-zA-Z0-9 \-,]+) and (.+)', line[1], regex.M|regex.I)
if not match:
match = regex.match(r'European Medicines Agency \(EMA\) Label for ([a-zA-Z0-9 \-,]+) and (.+)', line[1], regex.M|regex.I)
if(match):
drug = match.group(1)
genes = match.group(2)
else :
match = regex.match(r'FDA Label for ([a-zA-Z0-9 \-,]+)', line[1], regex.M|regex.I)
if not match:
match = regex.match(r'European Medicines Agency \(EMA\) Label for ([a-zA-Z0-9 \-,]+)', line[1], regex.M|regex.I)
if (match):
drug = match.group(1)
genes = ""
else:
print("ERROR ON ROW - readDrugLabelsFile : %s - %s" % (line[1]))
if len(drug) > 0:
if drug in table:
row = table[drug]
else:
row = {}
table[drug] = row
if(len(genes) > 0):
for gene in genes.split(","):
row[gene] = {gene, line[2], line[3],line[4]}
else :
row[NO_GENE] = {NO_GENE, line[2], line[3],line[4]}
drug = ""
genes = ""
except:
print("ERROR ON ROW - readDrugLabelsFile 2: %s - %s" % (x,line[0]))
if x > END:
return table
x = x + 1
finally:
f.close()
return table
开发者ID:robertlario,项目名称:Allegrograph,代码行数:52,代码来源:PharmGKB.py
注:本文中的regex.match函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论