本文整理汇总了Python中re.split函数的典型用法代码示例。如果您正苦于以下问题:Python split函数的具体用法?Python split怎么用?Python split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: parse_dmapdumpstring
def parse_dmapdumpstring(dumpstring):
scandata = {}
scan = dumpstring.split('scalars:')[-1].split('arrays:')
scalars = scan[0].split('\n')
vectors = re.split(VECTOR_SPLITTER, scan[1])
for scalar in scalars:
if scalar == '':
continue
assignment = scalar.split('\t')[-1].split(' = ')
var = assignment[0].lstrip('"').rstrip('"')
value = eval(assignment[1])
scandata[var] = value
for vector in vectors:
vector = vector.split('=')
if len(vector) <= 1:
continue
var = vector[0].split('"')[1]
vecvalue = []
for v in re.split(ELEM_SPLITTER, vector[1]):
v = v.rstrip(',')
if v == '':
continue
if v == 'inf' or v == 'nan' or v == '-nan':
v = 'float("NaN")'
try:
vecvalue.append(eval(v))
except:
print 'error parsing vector'
scandata[var] = np.array(vecvalue)
return scandata
开发者ID:loxodes,项目名称:SuperDARN_pydmap_write,代码行数:32,代码来源:superdarn_tools.py
示例2: __init__
def __init__(self, filename, myopen=open, swapYZ=False):
super(MeshPLY,self).__init__()
with myopen(filename, "r") as f:
assert f.readline().strip() == "ply"
assert f.readline().strip().startswith("format ascii")
elementCounts = []
while True:
line = f.readline().strip()
if line == "end_header":
break
args = re.split("\\s+",line)
if len(args) >= 3 and args[0] == 'element':
elementCounts.append((args[1],int(args[2])))
assert len(elementCounts) >= 2
for element,count in elementCounts:
for i in range(count):
line = f.readline().strip()
if element == 'vertex':
args = re.split("\\s+",line)
if swapYZ:
v = V3(float(args[0]),float(args[2]),-float(args[1]))
else:
v = V3(float(args[0]),float(args[1]),float(args[2]))
self.vertices.append(v)
elif element == 'face':
args = re.split("\\s+",line)
count = int(args.pop(0))
v = tuple(int(args[j]) for j in range(count))
self.faces.append((0,v))
assert self.vertices
assert self.faces
开发者ID:mdgunn,项目名称:raspberryjammod,代码行数:33,代码来源:render.py
示例3: get_head_words
def get_head_words(s, nwords, ctype):
#print ctype
#print s
# first limit to before any commas, semicolons; and remove stop list phrases
s = re.split(r';,', s)[0]
remove_list = r'(a\splurality\sof\s|at\sleast|composition\sof|the\ssteps\sof|wherein\s*(?:said)?|first|second|third|(?:[a-z]|\d+)?(?:\)|\.))'
s = re.sub(remove_list, '', s)
if ctype == 'device':
# get first ~ <JJ>*<NN>+ chunk
return first_JN_chunk(s, nwords)
elif ctype == 'method':
# first try to split around "method" (for first parent node)
msplit1 = re.split(r'method\s(of|for|to)', s)
if len(msplit1) > 1:
return first_V_chunk(msplit1[2], nwords)
msplit2 = re.split(r'method', s)
if len(msplit2) > 1:
return first_V_chunk(msplit2[0], nwords)
# else, get first VBG + its subject if possible
return first_V_chunk(s, nwords)
开发者ID:ajratner,项目名称:patent-similarity,代码行数:26,代码来源:SRHM.py
示例4: scrape_and_look_for_next_link
def scrape_and_look_for_next_link(url):
html = scraperwiki.scrape(url)
#print html
root = lxml.html.fromstring(html)
soup = BeautifulSoup(html) #using BeautifulSoup to find next page links
scrape_table(root) #before carrying on scrape the hrefs using the scrape_table function
#print soup
items = soup.findAll('a',title="Next page") # findAll "next page" links
if items: # if there is a next page link continue
next_link = root.cssselect("div.srch-Page.srch-Page-bg a")
#print next_link
if next_link:
next_link2 = next_link[2].attrib['href']
#print next_link2
split_link = re.split("\)+",next_link2)
split_link2 = re.split("\=+",split_link[0])
split_link3 = re.split("\'+",split_link2[2])
#print split_link3[0]
#print split_link2
#if split_link ==11:
next_url = nextlink_url+split_link3[0]
if next_url:
print next_url
scrape_and_look_for_next_link(next_url)
开发者ID:carriercomm,项目名称:scraperwiki-scraper-vault,代码行数:26,代码来源:aqp_nhs_contracts_scraper_resting_findall_function.py
示例5: tokenize
def tokenize(lines):
tokens = []
strings = []
functions = {}
new_lines = ''
for i, line in enumerate(lines):
line = re.sub(r'#.*$', "", line)
line = re.sub('\n', ' ', line)
line = re.sub('\t', '', line)
line = re.split('\'', line)
for j, c in enumerate(line):
if j % 2 == 0:
new_lines += c
else:
strings.append(c)
new_lines += 'string ' + str(len(strings) - 1)
new_lines = re.split(';', new_lines)
for i, token in enumerate(new_lines):
if token != '' and token != ' ' and token != '\t':
token = token.strip()
token = re.split(' ', token)
if i % 2 != 0:
functions[token[0]] = token[1:]
else:
tokens += token
tokens = substitute_tokens(tokens)
return [tokens, strings, functions]
开发者ID:jake100,项目名称:interpreter,代码行数:27,代码来源:loader.py
示例6: update_index_html
def update_index_html(dest_dir, sectnum):
# Process index.html separately from the modules files
with open(dest_dir + 'index.html', 'r') as index_html_file:
index_html = index_html_file.readlines()
for line_num, line in enumerate(index_html):
#inject css rule to remove haiku's orange bullets
if '</head>' in line:
index_html[line_num] = line.replace('</head>','<style>\nul li {\n\tbackground: none;\n\tlist-style-type: none;\n}\n</style>\n</head>')
elif 'class="section"' in line:
sectnum += 1
elif 'RegisterBook' in line:
#remove registerbook page from TOC
index_html[line_num] = ''
elif 'hide-from-toc' in line:
#remove stub chapter title
if '<h1>' in index_html[line_num-1]:
index_html[line_num-1] = ''
elif 'class="toctree-l' in line and 'Gradebook' not in line and 'TODO List' not in line:
title = re.split('>', re.split('</a>', line, re.IGNORECASE)[0], re.IGNORECASE)[-1]
new_title = '%s.' % sectnum + title
index_html[line_num] = line.replace(title, new_title)
# Write the modified contents back to index.html
with open(dest_dir + 'index.html', 'wb') as index_html_file:
index_html_file.writelines(index_html)
开发者ID:tfq,项目名称:OpenDSA,代码行数:26,代码来源:postprocessor.py
示例7: verify
def verify(self, data, chans, botops):
"""Verify a configuration, and make changes if needed."""
verify = input('Is this configuration correct? [y/n]: ').lower()
if verify == 'y':
return
else:
verify = ''
while verify != 'y':
print('\n')
name = data[0]
nick = self.prompt("Nick", data[1])
realname = self.prompt("Ident", data[2])
ident = self.prompt("Realname", data[3])
chans = self.prompt("Chans", ", ".join(chans))
botop = self.prompt("Bot operator(s)", ", ".join(botops))
password = self.prompt("Server password (optional)", hidden=True)
youtube = self.prompt("YouTube API key (optional)", hidden=True)
chans = re.split(',? ', chans)
botop = re.split(',? ', botop)
self.display((name, nick, realname, ident, password, youtube), chans, botop)
verify = input('Is this configuration correct? [y/n]: ').lower()
self.delete(name)
cursor = self.db_conn.cursor()
cursor.execute('''DELETE FROM channels WHERE config = ?''', (name,))
cursor.execute('''DELETE FROM users WHERE config = ?''', (name,))
self.db_conn.commit()
cursor.close()
self.save_config((name, nick, realname, ident, chans, botop, password, youtube))
开发者ID:stoye,项目名称:GorillaBot,代码行数:28,代码来源:configure.py
示例8: extractValues
def extractValues (self, line):
parts = re.split(':', line)
raw_values = re.split(',', parts[1])
values = []
for rv in raw_values:
values.append(self.cleanValue(rv))
return values
开发者ID:MediaPreneur,项目名称:schemaorg,代码行数:7,代码来源:parsers.py
示例9: find_time_interval
def find_time_interval(fits):
"""
find time interval of the fits file
input: fits --- fits file name
output: [tmin, tmax] --- start and stop time in seconds from 1998.1.1
"""
cmd = 'dmstat "' + fits + '[cols time]" centroid=no >' + zspace
scf.run_ascds(cmd)
out = scf.read_file(zspace, remove=1)
chk = 0
for val in out:
mc1 = re.search('min', val)
mc2 = re.search('max', val)
if mc1 is not None:
atemp = re.split('\s+', val)
tmin = int(float(atemp[1]))
chk += 1
elif mc2 is not None:
atemp = re.split('\s+', val)
tmax = int(float(atemp[1]))
chk += 1
if chk > 1:
break
return [tmin, tmax]
开发者ID:tisobe,项目名称:Sib_corr,代码行数:30,代码来源:create_sib_data.py
示例10: epg_list
def epg_list(self):
try:
now = datetime.datetime.now()
now = '%04d' % now.year + '%02d' % now.month + '%02d' % now.day + '%02d' % now.hour + '%02d' % now.minute + '%02d' % now.second
file = open(addonEPG,'r')
read = file.read()
file.close()
programmes = re.compile('(<programme.+?</programme>)').findall(read)
except:
return
for programme in programmes:
try:
start = re.compile('start="(.+?)"').findall(programme)[0]
start = re.split('\s+', start)[0]
stop = re.compile('stop="(.+?)"').findall(programme)[0]
stop = re.split('\s+', stop)[0]
if not int(start) <= int(now) <= int(stop): raise Exception()
channel = common.parseDOM(programme, "programme", ret="channel")[0]
title = common.parseDOM(programme, "title")[0]
title = common.replaceHTMLCodes(title).encode('utf-8')
desc = common.parseDOM(programme, "desc")[0]
desc = common.replaceHTMLCodes(desc).encode('utf-8')
epg = "[B][%s] - %s[/B]\n%s" % ('ÔÙÑÁ'.decode('iso-8859-7').encode('utf-8'), title, desc)
self.epg.update({channel: epg})
except:
pass
开发者ID:1c0n,项目名称:lambda-xbmc-addons,代码行数:34,代码来源:default.py
示例11: __init__
def __init__(self, gtf_line):
self.gtf_list = gtf_line
self.seqname, self.source, self.feature, self.start, self.end, self.score, self.strand, self.frame, self.attribute = gtf_line # These indexes are defined by the GTF spec
tmp = map(lambda x: re.split('\s+', x.replace('"', '')),
re.split('\s*;\s*', self.attribute.strip().strip(';')))
self.attribute = dict([x for x in tmp if len(x)==2]) # convert attrs to dict
self.start, self.end = int(self.start) - 1, int(self.end)
开发者ID:ctokheim,项目名称:PrimerSeq,代码行数:7,代码来源:gtf.py
示例12: ReadCropAttrs
def ReadCropAttrs(cropFile):
if not os.path.exists(cropFile):
cropFile = TXT_DB_DIR + os.sep + CROP_FILE
f = open(cropFile)
lines = f.readlines()
f.close()
attrDic = {}
fields = [item.replace('"', '')
for item in re.split('\t|\n', lines[0]) if item is not '']
n = len(fields)
for i in xrange(n):
attrDic[fields[i]] = {}
for line in lines[2:]:
items = [item.replace('"', '')
for item in re.split('\t', line) if item is not '']
id = int(items[0])
for i in xrange(n):
dic = attrDic[fields[i]]
try:
dic[id] = float(items[i])
except:
dic[id] = items[i]
return attrDic
开发者ID:Shenfang1993,项目名称:SEIMS,代码行数:30,代码来源:reclass_crop.py
示例13: GetRestaurantGrid
def GetRestaurantGrid(d, zip):
br.select_form("Form1")
br.set_all_readonly(False)
dt = 'dgResults$ctl' + str(d) + '$ctl00'
# print dt
br["__EVENTTARGET"] = dt
br["__EVENTARGUMENT"] = ''
request = br.click()
response1 = br1.open(request)
# find the window open hidden in the script
html1 = response1.read()
# print html1
root1 = lxml.html.fromstring(html1)
rest_name = root1.cssselect("span#lblName")[0].text
rest_address = root1.cssselect("span#lblAddress")[0].text
cityStateZip = root1.cssselect("span#lblCityStateZip")[0].text
city = re.split(",", cityStateZip)[0]
rest_inspectionDate = root1.cssselect("span#lblLastInspection")[0].text
if rest_inspectionDate == " ":
date = ""
else:
date = re.split(":", rest_inspectionDate)[1].strip()
violations = parseViolations(html1)
# print violations
scraperwiki.sqlite.save(unique_keys=["dt"], data={"dt": dt + "_" + zip + "_" + str(datetime.date.today()), "name": rest_name, "address": rest_address, "city": city, "state":"NY", "zip": zip, "inspection_date": date, "violations": violations, "time_scraped":datetime.datetime.now(), "page_id" : dt})
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:28,代码来源:suffolk_restaurants.py
示例14: split_string_with_lines
def split_string_with_lines(string, indentation = "", chars_per_line = 100):
# expert splitting mode
matches = re.split(''',(?=(?:[^'"]|'[^']*'|"[^"]*")*$)''', string)
splitted = []
for s in matches:
splitted.append( s + ("," if s != matches[-1] else ""))
res = []
buf = ""
for s in splitted:
if len(s) > chars_per_line:
splitted2 = re.split(''' (?=(?:[^'"]|'[^']*'|"[^"]*")*$)''', s)
for s2 in splitted2:
ext = s2
if s2 == splitted2[-2]:
if len(splitted2[-1]) <= 5:
ext += " " + splitted2[-1]
buf += ext + (" " if s2 != splitted2[-1] and ext == s2 else "")
if len(buf) >= chars_per_line or s2 == splitted2[-1]:
res.append(buf)
buf = ""
if ext != s2:
break
else:
buf += s
if len(buf) >= chars_per_line or s == splitted[-1]:
res.append(buf)
buf = ""
return ("\n%s"%indentation).join( res ), len( res )
开发者ID:timofurrer,项目名称:lettuce,代码行数:32,代码来源:strings.py
示例15: compare_time
def compare_time(start, end):
"""
<Purpose>
Manually compares two times.
Returns True if end time is more recent than start time.
Returns False otherwise.
<Arguments>
start time
end time
<Exceptions>
None
<Returns>
Bool
"""
s = re.split('-|\+|:| ', start)
e = re.split('-|\+|:| ', end)
if s[0] > e[0]:
return False
if s[1] > e[1]:
return False
if s[2] > e[2]:
return False
if s[3] > e[3]:
return False
if s[4] > e[4]:
return False
if s[5] > e[5]:
return False
return True
开发者ID:kellender,项目名称:Totolly-Secure,代码行数:32,代码来源:ctrl.py
示例16: cmpAlphaNum
def cmpAlphaNum(str1,str2):
str1=str1.lower()
str2=str2.lower()
ReSplit='(\d+)'
str1=re.split(ReSplit,str1)
str2=re.split(ReSplit,str2)
if( ''==str1[0] ):
str1.remove('')
if( ''==str1[len(str1)-1] ):
str1.remove('')
if( ''==str2[0] ):
str2.remove('')
if( ''==str2[len(str2)-1] ):
str2.remove('')
for i in range( min( len(str1),len(str2) ) ):
try:
tmp=int(str1[i])
str1[i]=tmp
except:ValueError
try:
tmp=int(str2[i])
str2[i]=tmp
except:ValueError
if( str1[i]==str2[i] ):
continue
if (str1[i]>str2[i]):
return 1
else:
return -1
return cmp(len(str1),len(str2))
开发者ID:briner,项目名称:script-solaris,代码行数:30,代码来源:tsm_zpool.py
示例17: _parse_meta
def _parse_meta(fname):
"""Get the metadata as a dict out of the mitGCM mds .meta file."""
flds = {}
basename = re.match("(^.+?)\..+", os.path.basename(fname)).groups()[0]
flds["basename"] = basename
with open(fname) as f:
text = f.read()
# split into items
for item in re.split(";", text):
# remove whitespace at beginning
item = re.sub("^\s+", "", item)
# match = re.match('(\w+) = ', item)
match = re.match("(\w+) = (\[|\{)(.*)(\]|\})", item, re.DOTALL)
if match:
key, _, value, _ = match.groups()
# remove more whitespace
value = re.sub("^\s+", "", value)
value = re.sub("\s+$", "", value)
# print key,':', value
flds[key] = value
# now check the needed things are there
needed_keys = ["dimList", "nDims", "nrecords", "dataprec"]
for k in needed_keys:
assert k in flds
# transform datatypes
flds["nDims"] = int(flds["nDims"])
flds["nrecords"] = int(flds["nrecords"])
# use big endian always
flds["dataprec"] = np.dtype(re.sub("'", "", flds["dataprec"])).newbyteorder(">")
flds["dimList"] = [[int(h) for h in re.split(",", g)] for g in re.split(",\n", flds["dimList"])]
if "fldList" in flds:
flds["fldList"] = [re.match("'*(\w+)", g).groups()[0] for g in re.split("'\s+'", flds["fldList"])]
assert flds["nrecords"] == len(flds["fldList"])
return flds
开发者ID:rabernat,项目名称:xgcm,代码行数:35,代码来源:mdsxray.py
示例18: parse_relationship
def parse_relationship(expression):
"""
Parse a relationship expression containing a package name and (optionally)
a version relation of the form ``python (>= 2.6)``. Raises
:py:exc:`ValueError` when parsing fails. An example:
>>> from deb_pkg_tools.deps import parse_relationship
>>> parse_relationship('python')
Relationship(name='python')
>>> parse_relationship('python (<< 3)')
VersionedRelationship(name='python', operator='<<', version='3')
:param expression: A relationship expression (a string).
:returns: A :py:class:`Relationship` object.
"""
tokens = [t.strip() for t in re.split('[()]', expression) if t and not t.isspace()]
if len(tokens) == 1:
# Just a package name (no version information).
return Relationship(tokens[0])
elif len(tokens) != 2:
# Encountered something unexpected!
msg = "Corrupt package relationship expression: Splitting name from relationship resulted in more than two tokens! (expression: %r, tokens: %r)"
raise ValueError(msg % (expression, tokens))
else:
# Package name followed by relationship to specific version(s) of package.
name, relationship = tokens
tokens = [t.strip() for t in re.split('([<>=]+)', relationship) if t and not t.isspace()]
if len(tokens) != 2:
# Encountered something unexpected!
msg = "Corrupt package relationship expression: Splitting operator from version resulted in more than two tokens! (expression: %r, tokens: %r)"
raise ValueError(msg % (relationship, tokens))
return VersionedRelationship(name, *tokens)
开发者ID:sankalp-khare,项目名称:python-deb-pkg-tools,代码行数:32,代码来源:deps.py
示例19: create_new
def create_new(self):
"""Create a new configuration."""
verify = ''
while verify != 'y':
print('\n')
name = ""
while name == "":
name = input("Unique name for this configuration: ")
cursor = self.db_conn.cursor()
cursor.execute('''SELECT * FROM configs WHERE name = ?''', (name,))
data = cursor.fetchone()
cursor.close()
if data:
print('The name "{0}" is not unique.'.format(name))
name = ""
nick = self.prompt("Nick", "GorillaBot")
realname = self.prompt("Ident", "GorillaBot")
ident = self.prompt("Realname", "GorillaBot")
chans = self.prompt("Channel(s)")
botop = self.prompt("Bot operator(s)", '')
password = self.prompt("Server password (optional)", hidden=True)
youtube = self.prompt("YouTube API key (optional)", hidden=True)
chans = re.split(',? ', chans)
botop = re.split(',? ', botop)
self.display((name, nick, realname, ident, password, youtube), chans, botop)
verify = input('Is this configuration correct? [y/n]: ').lower()
self.save_config((name, nick, realname, ident, chans, botop, password, youtube))
return name
开发者ID:stoye,项目名称:GorillaBot,代码行数:28,代码来源:configure.py
示例20: main
def main():
parser = ArgumentParser(description="", formatter_class=RawDescriptionHelpFormatter, add_help=True)
parser.add_argument("--data-directory", dest="data_directory", default=None, help="path to directory containing the source instance data to use")
parser.add_argument("--output-directory", dest="output_directory", default=None, help="path to directory for all of the output instance data")
parser.add_argument("--duplicates-file", dest="duplicates_file", default=None, help="path to file containing list of duplicate instance data, rows of <shasum> <count> <instance1> <instance2> ...")
args = parser.parse_args()
# organize all of the duplicate information
# rows are in the format <shasum> <instance count> <instance 1> <instance 2>
instances_to_duplicates = {}
instance_keys_to_paths = {}
with open(args.duplicates_file, 'r') as f:
for line in f:
line = line.lstrip().rstrip()
components = line.split(' ')
shasum = components[0]
count = int(components[1])
instances = components[2:]
instance_keys = [re.split('\.[a-z]+$', os.path.basename(x))[0] for x in instances]
for key,path in zip(instance_keys, instances):
instance_keys_to_paths[key] = path
for key in instance_keys:
remaining = list(instance_keys)
remaining.remove(key)
instances_to_duplicates[key] = remaining
for instance_data in os.listdir(args.data_directory):
instance_components = re.split('\.([a-z]+)$', instance_data)
instance_key = instance_components[0]
instance_extension = instance_components[1]
# copy the instance data, then copy it to its duplicate keys if needed
instance_path = "{}/{}".format(args.data_directory, instance_data)
shutil.copy(instance_path, args.output_directory)
if instance_key in instances_to_duplicates:
for dupe in instances_to_duplicates[instance_key]:
dupe_filename = "{}.{}".format(dupe, instance_extension)
source = instance_keys_to_paths[instance_key]
dest = instance_keys_to_paths[dupe]
prefix = os.path.commonprefix([source, dest])
source_suffix = source.replace(prefix, '')
dest_suffix = dest.replace(prefix, '')
# modify the content to contain the right file.
with open(instance_path, 'r') as source_file:
with open("{}/{}".format(args.output_directory, dupe_filename), 'w') as dest_file:
for line in source_file:
modified_line = line.rstrip().replace(source_suffix, dest_suffix)
print(modified_line, file=dest_file)
开发者ID:fawcettc,项目名称:planning-instances,代码行数:60,代码来源:expand_duplicate_instance_data.py
注:本文中的re.split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论