本文整理汇总了Python中regparser.citations.internal_citations函数的典型用法代码示例。如果您正苦于以下问题:Python internal_citations函数的具体用法?Python internal_citations怎么用?Python internal_citations使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了internal_citations函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: is_child_of
def is_child_of(child_xml, header_xml):
"""Children are paragraphs, have lower 'source' or the header has
citations and the child does not"""
return (child_xml.tag != 'HD'
or child_xml.get('SOURCE') > header_xml.get('SOURCE')
or (internal_citations(header_xml.text, Label())
and not internal_citations(child_xml.text, Label())))
开发者ID:jposi,项目名称:regulations-parser,代码行数:7,代码来源:sxs.py
示例2: test_section_ref_in_appendix
def test_section_ref_in_appendix(self):
text = u"""(a) Something something § 1005.7(b)(1)."""
citations = internal_citations(
text, Label(part='1005', appendix='A', appendix_section='2',
p1='a'))
self.assertEqual(citations[0].label.to_list(),
['1005', '7', 'b', '1'])
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:7,代码来源:citations_tests.py
示例3: scope_of_text
def scope_of_text(self, text, label_struct, verify_prefix=True):
"""Given specific text, try to determine the definition scope it
indicates. Implicit return None if none is found."""
scopes = []
# First, make a list of potential scope indicators
citations = internal_citations(text, label_struct,
require_marker=True)
indicators = [(c.full_start, c.label.to_list()) for c in citations]
text = text.lower()
label_list = label_struct.to_list()
indicators.extend((m.start(), label_list[:1])
for m in Terms.part_re.finditer(text))
indicators.extend((m.start(), label_list[:2])
for m in Terms.sect_re.finditer(text))
indicators.extend((m.start(), label_list)
for m in Terms.par_re.finditer(text))
# Subpart's a bit more complicated, as it gets expanded into a
# list of sections
for match in Terms.subpart_re.finditer(text):
indicators.extend(
(match.start(), subpart_label)
for subpart_label in self.subpart_scope(label_list))
# Finally, add the scope if we verify its prefix
for start, label in indicators:
if not verify_prefix or Terms.scope_re.match(text[:start]):
scopes.append(label)
elif Terms.scope_used_re.match(text[:start]):
scopes.append(label)
# Add interpretation to scopes
scopes = scopes + [s + [struct.Node.INTERP_MARK] for s in scopes]
if scopes:
return [tuple(s) for s in scopes]
开发者ID:cfpb,项目名称:regulations-parser,代码行数:34,代码来源:terms.py
示例4: segment_tree
def segment_tree(text, part, parent_label):
"""Build a tree representing the interpretation of a section, paragraph,
or appendix."""
title, body = utils.title_body(text)
exclude = [(pc.full_start, pc.full_end) for pc in
internal_citations(body, Label(part=parent_label[0]))]
label = merge_labels(text_to_labels(title, Label(part=part, comment=True)))
return interpParser.build_tree(body, 1, exclude, label, title)
开发者ID:tadhg-ohiggins,项目名称:regulations-parser,代码行数:9,代码来源:interpretation.py
示例5: test_single_match_multiple_paragraphs4
def test_single_match_multiple_paragraphs4(self):
text = "Listing sections 11.55(d) and 321.11 (h)(4)"
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(2, len(citations))
citation = citations[0]
self.assertEqual(['11', '55', 'd'], citation.label.to_list())
self.assertEqual(to_text(citation, text), '11.55(d)')
citation = citations[1]
self.assertEqual(['321', '11', 'h', '4'], citation.label.to_list())
self.assertEqual(to_text(citation, text), '321.11 (h)(4)')
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:10,代码来源:citations_tests.py
示例6: add_spaces_to_title
def add_spaces_to_title(title):
"""Federal Register often seems to miss spaces in the title of SxS
sections. Make sure spaces get added if appropriate"""
for citation in internal_citations(title, Label()):
end = citation.end
# Next char is an alpha and last char isn't a space
if end < len(title) and title[end].isalpha() and title[end-1] != ' ':
title = title[:end] + ' ' + title[end:]
break # Assumes there is only one paragraph in a title
return title
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:10,代码来源:sxs.py
示例7: parse_into_labels
def parse_into_labels(txt, part):
"""Find what part+section+(paragraph) (could be multiple) this text is
related to."""
citations = internal_citations(txt, Label(part=part))
# odd corner case: headers shouldn't include both an appendix and regtext
labels = [c.label for c in citations]
if any('appendix' in l.settings for l in labels):
labels = [l for l in labels if 'appendix' in l.settings]
labels = ['-'.join(l.to_list()) for l in labels]
return labels
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:10,代码来源:sxs.py
示例8: test_single_match_multiple_paragraphs2
def test_single_match_multiple_paragraphs2(self):
text = u'§ 1005.10(a) and (d)'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(2, len(citations))
citation = citations[0]
self.assertEqual(['1005', '10', 'a'], citation.label.to_list())
self.assertEqual(to_text(citation, text), '1005.10(a)')
citation = citations[1]
self.assertEqual(['1005', '10', 'd'], citation.label.to_list())
self.assertEqual(to_text(citation, text), '(d)')
text = u'§ 1005.7(b)(1), (2) and (3)'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(3, len(citations))
self.assertEqual(['1005', '7', 'b', '1'],
citations[0].label.to_list())
self.assertEqual(['1005', '7', 'b', '2'],
citations[1].label.to_list())
self.assertEqual(['1005', '7', 'b', '3'],
citations[2].label.to_list())
text = u'§ 1005.15(d)(1)(i) and (ii)'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(2, len(citations))
self.assertEqual(['1005', '15', 'd', '1', 'i'],
citations[0].label.to_list())
self.assertEqual(['1005', '15', 'd', '1', 'ii'],
citations[1].label.to_list())
text = u'§ 1005.9(a)(5) (i), (ii), or (iii)'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(3, len(citations))
self.assertEqual(['1005', '9', 'a', '5', 'i'],
citations[0].label.to_list())
self.assertEqual(['1005', '9', 'a', '5', 'ii'],
citations[1].label.to_list())
self.assertEqual(['1005', '9', 'a', '5', 'iii'],
citations[2].label.to_list())
text = u'§ 1005.11(a)(1)(vi) or (vii).'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(2, len(citations))
self.assertEqual(['1005', '11', 'a', '1', 'vi'],
citations[0].label.to_list())
self.assertEqual(['1005', '11', 'a', '1', 'vii'],
citations[1].label.to_list())
text = u'§§ 1005.3(b)(2) and (3), 1005.10(b), (d), and (e), 1005.13, '
text += 'and 1005.20'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(7, len(citations))
text = 'Sections 1005.3, .4, and .5'
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(3, len(citations))
self.assertEqual(['1005', '3'], citations[0].label.to_list())
self.assertEqual(['1005', '4'], citations[1].label.to_list())
self.assertEqual(['1005', '5'], citations[2].label.to_list())
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:58,代码来源:citations_tests.py
示例9: generate_keyterm
def generate_keyterm(node):
label_id = node.label_id()
if label_id in real_key_terms_layer:
layer[label_id] = real_key_terms_layer[label_id]
else:
node_text = key_terms.KeyTerms.process_node_text(node)
if not node_text:
return
# Our Appendix parsing isn't particularly accurate -- avoid keyterms
if node.node_type == struct.Node.APPENDIX:
return
exclude = [(start, end) for _, start, end in
exclude_parser.scanString(node_text)]
exclude.extend((pc.full_start, pc.full_end) for pc in
internal_citations(node_text, Label()))
periods = [m.start() for m in period.finditer(node_text)]
# Remove any periods which are part of a citation
periods = filter(lambda p: all(p < start or p > end
for start, end in exclude), periods)
# Key terms must either have a full "sentence" or end with a hyphen
if not periods and node_text[-1] != u'—':
return
if periods:
first_p = periods[0]
# Check for cases where the period is "inside" something;
# include the period
next_char = node_text[first_p + 1: first_p + 2]
if next_char in (')', u'”'):
first_sentence = node_text[:first_p + 2]
else:
first_sentence = node_text[:first_p + 1]
else:
first_sentence = node_text
# Key terms can't be the entire text of a leaf node
if first_sentence == node_text and not node.children:
return
words = first_sentence.split()
if (not words[-1] == part_end and
not first_sentence.startswith('![')):
num_words = len(words)
# key terms are short
if num_words <= 15:
layer_element = {
"key_term": first_sentence,
"locations": [0]
}
layer[label_id] = [layer_element]
开发者ID:cfpb,项目名称:regulations-parser,代码行数:55,代码来源:plaintext_keyterms.py
示例10: is_child_of
def is_child_of(child_xml, header_xml, header_citations=None):
"""Children are paragraphs, have lower 'source', the header has
citations and the child does not, the citations for header and child
are the same or the citation in a child is incorrect"""
if child_xml.tag != 'HD':
return True
else:
if header_citations is None:
header_citations = [c.label for c in
internal_citations(header_xml.text, Label())]
child_citations = [c.label for c in
internal_citations(child_xml.text, Label())]
if (child_xml.get('SOURCE') > header_xml.get('SOURCE')
or (header_citations and not child_citations)
or (header_citations and header_citations == child_citations)):
return True
elif header_citations and child_citations:
return is_backtrack(header_citations[-1].to_list(),
child_citations[0].to_list())
else:
return False
开发者ID:khandelwal,项目名称:regulations-parser,代码行数:21,代码来源:sxs.py
示例11: build_section_tree
def build_section_tree(text, part):
"""Construct the tree for a whole section. Assumes the section starts
with an identifier"""
title, text = utils.title_body(text)
exclude = [(pc.full_start, pc.full_end) for pc in
internal_citations(text, Label(part=part))]
section = re.search(r'%d\.(\d+)\b' % part, title).group(1)
label = [str(part), section]
p_tree = regParser.build_tree(
text, exclude=exclude, label=label, title=title)
return p_tree
开发者ID:ascott1,项目名称:regulations-parser,代码行数:12,代码来源:reg_text.py
示例12: test_single_match_multiple_paragraphs5
def test_single_match_multiple_paragraphs5(self):
text = "See, e.g., comments 31(b)(1)(iv)-1 and 31(b)(1)(vi)-1"
citations = internal_citations(text, Label(part='222', section='5'))
self.assertEqual(2, len(citations))
citation = citations[0]
self.assertEqual(['222', '31', 'b', '1', 'iv', 'Interp', '1'],
citation.label.to_list())
self.assertEqual(to_text(citation, text), '31(b)(1)(iv)-1')
citation = citations[1]
self.assertEqual(['222', '31', 'b', '1', 'vi', 'Interp', '1'],
citation.label.to_list())
self.assertEqual(to_text(citation, text), '31(b)(1)(vi)-1')
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:12,代码来源:citations_tests.py
示例13: test_single_match_multiple_paragraphs6
def test_single_match_multiple_paragraphs6(self):
text = "comments 5(b)(3)-1 through -3"
citations = internal_citations(text, Label(part='100', section='5'))
citation = citations[0]
self.assertEqual(2, len(citations))
self.assertEqual(['100', '5', 'b', '3', 'Interp', '1'],
citation.label.to_list())
self.assertEqual(to_text(citation, text), '5(b)(3)-1')
citation = citations[1]
self.assertEqual(['100', '5', 'b', '3', 'Interp', '3'],
citation.label.to_list())
self.assertEqual(to_text(citation, text), '-3')
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:12,代码来源:citations_tests.py
示例14: test_multiple_matches
def test_multiple_matches(self):
text = "Please see A-5 and Q-2(r) and Z-12(g)(2)(ii) then more text"
citations = internal_citations(text, Label(part='102', section='1'))
self.assertEqual(3, len(citations))
citation = citations[0]
self.assertEqual(citation.label.to_list(), ['102', 'A', '5'])
self.assertEqual(to_text(citation, text), 'A-5')
citation = citations[1]
self.assertEqual(citation.label.to_list(), ['102', 'Q', '2(r)'])
self.assertEqual(to_text(citation, text), 'Q-2(r)')
citation = citations[2]
self.assertEqual(citation.label.to_list(),
['102', 'Z', '12(g)(2)(ii)'])
self.assertEqual(to_text(citation, text), 'Z-12(g)(2)(ii)')
text = u"Appendices G and H—Yadda yadda"
citations = internal_citations(text, Label(part='102'))
self.assertEqual(2, len(citations))
citG, citH = citations
self.assertEqual(citG.label.to_list(), ['102', 'G'])
self.assertEqual(citH.label.to_list(), ['102', 'H'])
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:21,代码来源:citations_tests.py
示例15: parse
def parse(self, text, label):
""" Parse the provided text, pulling out all the internal
(self-referential) citations. """
to_layer = lambda pc: {'offsets': [(pc.start, pc.end)],
'citation': pc.label.to_list()}
citations = internal_citations(text, label, require_marker=True)
if self.verify_citations:
citations = self.remove_missing_citations(citations, text)
all_citations = list(map(to_layer, citations))
return self.strip_whitespace(text, all_citations)
开发者ID:jposi,项目名称:regulations-parser,代码行数:12,代码来源:internal_citations.py
示例16: split_into_ttsr
def split_into_ttsr(sxs):
"""Split the provided list of xml nodes into a node with a title, a
sequence of text nodes, a sequence of nodes associated with the sub
sections of this header, and the remaining xml nodes"""
title = sxs[0]
title_citations = [c.label for c in
internal_citations(title.text, Label())]
section = list(takewhile(lambda e: is_child_of(e, title, title_citations),
sxs[1:]))
text_elements = list(takewhile(lambda e: e.tag != 'HD', section))
sub_sections = section[len(text_elements):]
remaining = sxs[1+len(text_elements)+len(sub_sections):]
return (title, text_elements, sub_sections, remaining)
开发者ID:khandelwal,项目名称:regulations-parser,代码行数:13,代码来源:sxs.py
示例17: parse
def parse(self, text, label, title=None):
""" Parse the provided text, pulling out all the internal
(self-referential) citations. """
def to_layer(pc):
return {'offsets': [(pc.start, pc.end)],
'citation': pc.label.to_list()}
citations = internal_citations(text, label, require_marker=True,
title=title)
if self.verify_citations:
citations = self.remove_missing_citations(citations, text)
all_citations = [to_layer(c) for c in citations]
return self.strip_whitespace(text, all_citations)
开发者ID:anthonygarvan,项目名称:regulations-parser,代码行数:15,代码来源:internal_citations.py
示例18: test_interp_headers
def test_interp_headers(self):
for text, label in [
("Section 102.22Stuff", ['102', '22']),
("22(d) Content", ['101', '22', 'd']),
("22(d)(5) Content", ['101', '22', 'd', '5']),
("22(d)(5)(x) Content", ['101', '22', 'd', '5', 'x']),
(u"§ 102.22(d)(5)(x) Content", ['102', '22', 'd', '5', 'x']),
("22(d)(5)(x)(Q) Content", ['101', '22', 'd', '5', 'x', 'Q']),
("Appendix A Heading", ['101', 'A']),
("Comment 21(c)-1 Heading", ['101', '21', 'c', 'Interp', '1']),
("Paragraph 38(l)(7)(i)(A)(2).",
['101', '38', 'l', '7', 'i', 'A', '2']),
(u'Official Interpretations of § 102.33(c)(2)',
['102', '33', 'c', '2', 'Interp'])]:
citations = internal_citations(text, Label(part='101'))
self.assertEqual(1, len(citations))
self.assertEqual(citations[0].label.to_list(), label)
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:18,代码来源:citations_tests.py
示例19: paragraph_tree
def paragraph_tree(appendix_letter, sections, text, label, title=None):
"""Use the paragraph parser to parse through each section in this
appendix."""
if not sections:
return Node(text, label=label, title=title, node_type=Node.APPENDIX)
children = []
for begin, end in sections:
seg_title, section_text = utils.title_body(text[begin:end])
sec_num = carving.get_appendix_section_number(
seg_title, appendix_letter)
exclude = [(pc.full_start, pc.full_end) for pc in
internal_citations(section_text, Label(part=label[0]))]
child = parParser.build_tree(
section_text, exclude=exclude, label=label + [sec_num],
title=seg_title)
children.append(child)
return Node(text[:sections[0][0]], children, label, title, Node.APPENDIX)
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:19,代码来源:tree.py
示例20: split_paragraph_text
def split_paragraph_text(self, text, next_text=''):
marker_positions = []
for marker in _first_markers:
# text.index('(') to skip over the periods, spaces, etc.
marker_positions.extend(text.index('(', m.start())
for m in marker.finditer(text))
# Remove any citations
citations = internal_citations(text, require_marker=True)
marker_positions = [pos for pos in marker_positions
if not any(cit.start <= pos and cit.end >= pos
for cit in citations)]
texts = []
# Drop Zeros, add the end
break_points = [p for p in marker_positions if p] + [len(text)]
last_pos = 0
for pos in break_points:
texts.append(text[last_pos:pos])
last_pos = pos
texts.append(next_text)
return texts
开发者ID:khandelwal,项目名称:regulations-parser,代码行数:20,代码来源:appendices.py
注:本文中的regparser.citations.internal_citations函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论