本文整理汇总了Python中xml.etree.cElementTree.iterparse函数的典型用法代码示例。如果您正苦于以下问题:Python iterparse函数的具体用法?Python iterparse怎么用?Python iterparse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了iterparse函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: audit
def audit(osmfile, audit_value = ''):
osm_file = open(osmfile, "r")
if audit_value == 'postal':
zip_fixed= set()
for event, elem in ET.iterparse(osm_file, events=("start",)):
if elem.tag == "node" or elem.tag == "way":
for tag in elem.iter("tag"):
if is_postal(tag):
zip_fixed.add(audit_postalcode(tag.attrib['v']))
return zip_fixed
elif audit_value == 'city':
city_fixed = set()
for event, elem in ET.iterparse(osm_file, events=("start",)):
if elem.tag == "node" or elem.tag == "way":
for tag in elem.iter("tag"):
if is_city(tag):
city_fixed.add(audit_city(tag.attrib['v']))
return city_fixed
else:
street_types = defaultdict(set)
for event, elem in ET.iterparse(osm_file, events=("start",)):
if elem.tag == "node" or elem.tag == "way":
for tag in elem.iter("tag"):
if is_street_name(tag):
audit_street_type(street_types, tag.attrib['v'])
return street_types
开发者ID:medwards147,项目名称:Udacity_Data_Analyst_Nanodegree,代码行数:31,代码来源:audit.py
示例2: test_getAllNestedElementInformation
def test_getAllNestedElementInformation(self):
expectedResultPeaks = {'fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology', 'id': 'MS', 'tagName': '{http://psi.hupo.org/ms/mzml}cv',
'URI': 'http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo'}
expectedResultMzml = {'fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology', 'id': 'MS', 'tagName': '{http://psi.hupo.org/ms/mzml}cv',
'URI': 'http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo', 'version':'2.26.0'}
expectedResultFeatureXML = {'name': 'FeatureFinder', 'tagName': 'software', 'version': '1.8.0'}
actualResultPeaks = {}
elementFile = open(testFolder+'peaksMzmlTestfile.peaks.mzML')
for event, element in cElementTree.iterparse(elementFile):
actualResultPeaks = elementFunctions.getAllNestedElementInformation(element)
# only doing one to test, break
break
actualResultMzml = {}
elementFile = open(testFolder+'mzml_test_file_1.mzML')
for event, element in cElementTree.iterparse(elementFile):
actualResultMzml = elementFunctions.getAllNestedElementInformation(element)
# only doing one to test, break
break
actualResultFeatureXML = {}
elementFile = open(testFolder+'featurexmlTestFile_1.featureXML')
for event, element in cElementTree.iterparse(elementFile):
actualResultFeatureXML = elementFunctions.getAllNestedElementInformation(element)
# only doing one to test, break
break
self.assertDictEqual(expectedResultPeaks, actualResultPeaks)
self.assertDictEqual(expectedResultMzml, actualResultMzml)
self.assertDictEqual(expectedResultFeatureXML, actualResultFeatureXML)
开发者ID:davidmam,项目名称:pyMSA,代码行数:32,代码来源:test_elementFunctions.py
示例3: parse_blast_xml
def parse_blast_xml(args, cur):
if args.input:
for f in args.input:
con = et.iterparse(f, events=('end', 'start'))
_parse_blast_xml(args, cur, con)
else:
con = et.iterparse(sys.stdin, events=('end', 'start'))
_parse_blast_xml(args, cur, con)
开发者ID:arendsee,项目名称:blastdbm,代码行数:8,代码来源:blastin.py
示例4: __init__
def __init__(self):
self.ATC_dict = json.loads('all_level.json')
self.result = []
for event, drug = ET.iterparse('drugbank.xml'):
if drug.tag != '{http://www.drugbank.ca}drug':
continue
for first_level in drug:
if first_level.tag == '{http://www.drugbank.ca}calculated-properties':
self.result += self.calculated_properties(first_level)
开发者ID:novking,项目名称:Deep-Learning-Project,代码行数:9,代码来源:Class_Build.py
示例5: _iterparse
def _iterparse(xmlfile):
"""
Avoid bug in python 3.{2,3}. See http://bugs.python.org/issue9257.
:param xmlfile: XML file or file-like object
"""
try:
return ET.iterparse(xmlfile, events=("start-ns", ))
except TypeError:
return ET.iterparse(xmlfile, events=(b"start-ns", ))
开发者ID:ssato,项目名称:python-anyconfig,代码行数:10,代码来源:xml.py
示例6: main
def main():
global args
options = argparse.ArgumentParser(epilog="Example: \
%(prog)s dmarc-xml-file 1> outfile.log")
options.add_argument("dmarcfile", help="dmarc file in XML format")
args = options.parse_args()
# get an iterable and turn it into an iterator
meta_fields = get_meta(iter(etree.iterparse(args.dmarcfile, events=("start", "end"))));
if not meta_fields:
print >> sys.stderr, "Error: No valid 'policy_published' and 'report_metadata' xml tags found; File: " + args.dmarcfile
sys.exit(1)
print_record(iter(etree.iterparse(args.dmarcfile, events=("start", "end"))), meta_fields, args)
开发者ID:1234max,项目名称:dmarc-report-processor,代码行数:14,代码来源:dmarc-parser.py
示例7: count_tags
def count_tags(filename, limit=-1, verbose=False):
"""
Parses the OSM file and counts the tags by type.
"""
# initialize dict objects and counter
tag_count = {}
tag_keys = {}
counter = 0
# iterate through elements
for _, element in ET.iterparse(filename, events=("start",)):
# add to tag count
add_tag(element.tag, tag_count)
# if tag and has key, add the tag key to tag_keys dict
if element.tag == 'tag' and 'k' in element.attrib:
add_tag(element.get('k'), tag_keys)
# print if verbose output enabled
if verbose:
print "{0}: {1}".format(counter, element.tag)
# break if exceed limit
if limit > 0 and counter >= limit:
break
counter += 1
# produces a sorted-by-decreasing list of tag key-count pairs
tag_keys = sorted(tag_keys.items(), key=operator.itemgetter(1))[::-1]
# return values
return tag_count, tag_keys
开发者ID:aarthyv,项目名称:Udacity-Data-analyst-nanodegree,代码行数:32,代码来源:Count+tags+for+Seattle.py
示例8: parse_and_write
def parse_and_write(xml_file, outfile, fields, tag, n, interval=1):
# get an iterable
context = ET.iterparse(xml_file, events=("start", "end"))
# turn it into an iterator
context = iter(context)
# get the root element
event, root = context.next()
i = 0
with open(outfile, 'w') as f:
for event, row in context:
if event == "end" and row.tag == tag:
if i % 100000 == 0:
pct = round((i * 1.0 / n) * 100, 1)
Printer("Processed {0} records. ~ {1}\% complete.".format(i, pct))
if interval == 1 or i % interval == 0:
if all(map(lambda x: x in row.attrib, fields)):
field_data = []
for fd in fields:
if fd == 'Tags':
field_data.extend(parse_tags(row.attrib[fd].encode('ascii', 'ignore')))
else:
field_data.append(clean(row.attrib[fd].encode('ascii', 'ignore')))
text = " ".join(field_data) + "\n"
f.write(text)
i += 1
root.clear()
if i >= n:
break
开发者ID:chenjz,项目名称:online_lda_python,代码行数:28,代码来源:xml_parse.py
示例9: parseOsm
def parseOsm(source, handler):
for event, elem in ElementTree.iterparse(source, events=('start', 'end')):
if event == 'start':
handler.startElement(elem.tag, elem.attrib)
elif event == 'end':
handler.endElement(elem.tag)
elem.clear()
开发者ID:AndrewBuck,项目名称:supybot-plugin-osm,代码行数:7,代码来源:plugin.py
示例10: process_map
def process_map(filename):
users = set()
for _, el in ET.iterparse(filename):
if 'user' in el.attrib:
users.add(el.attrib['user'])
return users
开发者ID:j-bennet,项目名称:udacity-nano-da,代码行数:7,代码来源:users.py
示例11: audit
def audit(osmfile):
osm_file = open(osmfile, "r")
street_types = defaultdict(set)
for event, elem in ET.iterparse(osm_file, events=("start",)):
# audit street names in "nodes" containing "addr:street"
if elem.tag == "node":
for tag in elem.iter("tag"):
if is_street_name(tag) and tag.attrib['v'] != "":
street_type, street_name = audit_street_type(
tag.attrib['v'])
add_street_type(street_types, street_type, street_name)
# audit street names in "ways" containing "highway"
elif elem.tag == "way":
highway = 0
# check if way matches an included highway type
for tag in elem.iter("tag"):
if (tag.attrib['k'] == "highway") and (tag.attrib['v']
in highway_types):
highway = 1
if highway == 1:
for tag in elem.iter("tag"):
if is_name(tag) and tag.attrib['v'] != "":
street_type, street_name = audit_street_type(
tag.attrib['v'])
if street_type != None:
add_street_type(street_types, street_type,
street_name)
return street_types
开发者ID:rbmayer,项目名称:Udacity,代码行数:29,代码来源:audit_street_names.py
示例12: process_osm
def process_osm(file_in):
with open(file_in) as file:
for _, element in ET.iterparse(file):
el = shape_data(element)
if el:
#pprint.pprint(el)
way_node_collection.insert(el)
开发者ID:kshannon,项目名称:Udacity_Data_Analyst,代码行数:7,代码来源:project3_dataWrangling.py
示例13: iterparse
def iterparse(text, interested_path_handlers):
'''
interested_path_handlers => {'start': ((interested_path, handler), (interested_path, handler), ...),
'end':((interested_path, handler), (interested_path, handler), ...)}
interested_path => (tag1, tag2, tag3, ...)
An incremental XML parser. ElementTree.findall has too high CPU/Memory footprint when data set is big
'''
strf = StringIO()
strf.write(text)
strf.seek(0)
context = ElementTree.iterparse(strf, events=('start', 'end'))
context = iter(context)
all_start_handlers = interested_path_handlers.get('start', ())
all_end_handlers = interested_path_handlers.get('end', ())
current_path = []
for ev, elem in context:
tag, value = elem.tag, elem.text
if ev == 'start':
current_path.append(tag)
if all_start_handlers:
_do_handlers(ev, elem, current_path, all_start_handlers)
elif ev == 'end':
if all_end_handlers:
_do_handlers(ev, elem, current_path, all_end_handlers)
current_path.pop()
elem.clear()
开发者ID:chenziliang,项目名称:src,代码行数:28,代码来源:z_xml_iterparser.py
示例14: iterArticles
def iterArticles(f):
pmid = None
title = None
abstract = None
journal= None
mesh_list = []
for event,elem in ET.iterparse(f, events=("start","end")):
if event == 'start':
if elem.tag == 'PubmedArticle':
pmid,title,abstract,journal,mesh_list= None,None,None, None,[]
elif event == 'end':
if elem.tag == 'PubmedArticle':
yield pmid, title, abstract,journal,mesh_list
elif elem.tag == 'PMID':
pmid = elem.text
elif elem.tag == 'ArticleTitle':
title = elem.text
elif elem.tag == 'AbstractText':
abstract = elem.text
elif elem.tag == 'Title':
journal = elem.text
elif elem.tag == 'KeywordList':
keyword_list=elem.findall("Keyword")
for aa in keyword_list:
mesh_list.append(aa.text)
elif elem.tag == 'MeshHeadingList':
mhlist = elem.findall("MeshHeading")
for child in mhlist:
if child.findtext('DescriptorName'):
mesh_list.append(child.findtext('DescriptorName'))
if child.findtext('QualifierName'):
mesh_list.append(child.findtext('QualifierName'))
开发者ID:Priya70,项目名称:Parsing-pubmed,代码行数:32,代码来源:parse_pubmed_journal.py
示例15: _xml_namespaces
def _xml_namespaces(self):
for _, e in iterparse(self._path, events=('start-ns',)):
lcode, uri = e
if 1 > Namespace.objects.filter(resource__name=uri).count():
r = Resource(name=uri)
yield r
yield Namespace(code=lcode, resource=r)
开发者ID:pombredanne,项目名称:django-rdf,代码行数:7,代码来源:__init__.py
示例16: parse
def parse(stream):
for event, element in et.iterparse(stream):
if element.tag != 'row':
continue
yield {
x.get('name'): x.text.strip() if x.text else None for x in element
}
开发者ID:ad-m,项目名称:django-teryt,代码行数:7,代码来源:utils.py
示例17: _parse_results
def _parse_results(self, stream):
"""Parse results and messages out of *stream*."""
result = None
values = None
try:
for event, elem in et.iterparse(stream, events=('start', 'end')):
if elem.tag == 'results' and event == 'start':
# The wrapper element is a <results preview="0|1">. We
# don't care about it except to tell is whether these
# are preview results, or the final results from the
# search.
is_preview = elem.attrib['preview'] == '1'
self.is_preview = is_preview
if elem.tag == 'result':
if event == 'start':
result = OrderedDict()
elif event == 'end':
yield result
result = None
elem.clear()
elif elem.tag == 'field' and result is not None:
# We need the 'result is not None' check because
# 'field' is also the element name in the <meta>
# header that gives field order, which is not what we
# want at all.
if event == 'start':
values = []
elif event == 'end':
field_name = elem.attrib['k'].encode('utf8')
if len(values) == 1:
result[field_name] = values[0]
else:
result[field_name] = values
# Calling .clear() is necessary to let the
# element be garbage collected. Otherwise
# arbitrarily large results sets will use
# arbitrarily large memory intead of
# streaming.
elem.clear()
elif elem.tag in ('text', 'v') and event == 'end':
text = "".join(elem.itertext())
values.append(text.encode('utf8'))
elem.clear()
elif elem.tag == 'msg':
if event == 'start':
msg_type = elem.attrib['type']
elif event == 'end':
text = elem.text if elem.text is not None else ""
yield Message(msg_type, text.encode('utf8'))
elem.clear()
except SyntaxError as pe:
# This is here to handle the same incorrect return from
# splunk that is described in __init__.
if 'no element found' in pe.msg:
return
else:
raise
开发者ID:martindurant,项目名称:splunk-sdk-python,代码行数:60,代码来源:results.py
示例18: elements
def elements(self):
if not self.parser:
reader = self.stream.reader
class f(object):
def read(self, n):
if reader.buffer.remaining == 0:
#read more data into buffer
reader._read_more()
return reader.buffer.read_bytes(min(n, reader.buffer.remaining))
self.parser = iter(iterparse(f(), events=("start", "end")))
event, self.root = self.parser.next()
level = 0
for event, element in self.parser:
if event == 'start':
level += 1
elif event == 'end':
level -= 1
if level == 0:
yield element
#TODO clear root
else:
assert False, "unexpected event"
开发者ID:bradjasper,项目名称:concurrence,代码行数:25,代码来源:stream.py
示例19: list_country
def list_country(filename):
governorate_set=set()
for _,element in ET.iterparse(filename):
for tag in element.iter('tag'):
if 'governorate' in tag.attrib['k'] and tag.attrib['v'] not in governorate_set:
governorate_set.add(tag.attrib['v'])
return governorate_set
开发者ID:akhoufi,项目名称:OpenStreetMap-Sample-Project-Data-Wrangling-with-MongoDB,代码行数:7,代码来源:AuditingMap.py
示例20: _get_annotations
def _get_annotations(self, source, start_offset=0):
"""It returns the annotations found in the document.
It follows the following format:
[
('TAG', {ATTRIBUTES}, (start_offset, end_offset)),
('TAG', {ATTRIBUTES}, (start_offset, end_offset)),
...
('TAG', {ATTRIBUTES}, (start_offset, end_offset))
]
"""
annotations = []
for event, element in etree.iterparse(
StringIO(source), events=('start', 'end')):
if event == 'start':
if element.tag in self.tags_to_spot:
try:
end_offset = start_offset + len(element.text)
except TypeError:
continue
annotations.append((element.tag, element.attrib,
(start_offset, end_offset)))
start_offset += len(element.text)
elif event == 'end':
if element.text is not None and element.tail is not None:
start_offset += len(element.tail)
return annotations
开发者ID:filannim,项目名称:ManTIME,代码行数:28,代码来源:readers.py
注:本文中的xml.etree.cElementTree.iterparse函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论