本文整理汇总了Python中regparser.layer.key_terms.KeyTerms类的典型用法代码示例。如果您正苦于以下问题:Python KeyTerms类的具体用法?Python KeyTerms怎么用?Python KeyTerms使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KeyTerms类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_keyterm_is_first_not_first
def test_keyterm_is_first_not_first(self):
node = Node('(a) This has a list: apples et seq.',
label=['101', '22', 'a'])
node.tagged_text = '(a) This has a list: apples <E T="03">et seq.</E>'
kt = KeyTerms(None)
self.assertFalse(kt.keyterm_is_first(node, 'et seq.'))
开发者ID:cfpb,项目名称:regulations-parser,代码行数:7,代码来源:layer_keyterms_tests.py
示例2: process
def process(self, node):
label = node.label_id()
if label in self.model_forms_nodes and self.model_forms_nodes[label]:
keyterm = KeyTerms.get_keyterm(node)
if keyterm:
end = '</E>'
node_text = node.text[
node.text.find(end) + len(end):].split(' ')
else:
node_text = KeyTerms.process_node_text(node).split(' ')
start_of_model_form = node_text[0]
end_of_model_form = node_text[-1]
if start_of_model_form and end_of_model_form:
list_of_ends = [w for w in node_text if w == end_of_model_form]
location_end = len(list_of_ends) - 1
layer_el = [{
'start_word': start_of_model_form,
'start_locations': [0],
'end_word': end_of_model_form,
'end_locations':[location_end]
}]
return layer_el
开发者ID:EricSchles,项目名称:regulations-parser,代码行数:26,代码来源:model_forms_text.py
示例3: test_no_keyterm
def test_no_keyterm(self):
node = Node('(a) Apples are grown in New Zealand.',
label=['101', '22', 'a'])
node.tagged_text = '(a) Apples are grown in New Zealand.'
kt = KeyTerms(None)
results = kt.process(node)
self.assertEquals(results, None)
开发者ID:cfpb,项目名称:regulations-parser,代码行数:7,代码来源:layer_keyterms_tests.py
示例4: test_keyterm_and_emphasis
def test_keyterm_and_emphasis(self):
node = Node('(a) <E T="03">Apples.</E> Apples are grown in '
+ 'New <E T="03">Zealand.</E>', label=['101', '22', 'a'])
kt = KeyTerms(None)
results = kt.process(node)
self.assertNotEqual(results, None)
self.assertEqual(results[0]['key_term'], 'Apples.')
self.assertEqual(results[0]['locations'], [0])
开发者ID:dclegalhackers,项目名称:regulations-parser,代码行数:8,代码来源:layer_keyterms.py
示例5: test_interpretation_markers
def test_interpretation_markers(self):
node = Node('3. <E T="03">et seq.</E> has a list: apples',
label=['101', 'c', Node.INTERP_MARK, '3'])
kt = KeyTerms(None)
results = kt.process(node)
self.assertNotEqual(results, None)
self.assertEqual(results[0]['key_term'], 'et seq.')
self.assertEqual(results[0]['locations'], [0])
开发者ID:dclegalhackers,项目名称:regulations-parser,代码行数:8,代码来源:layer_keyterms.py
示例6: test_emphasis_later
def test_emphasis_later(self):
""" Don't pick up something that is emphasized later in a paragraph as a key-term. """
node = Node('(a) This has a list: apples <E T="03">et seq.</E>',
label=['101', '22', 'a'])
kt = KeyTerms(None)
results = kt.process(node)
self.assertEqual(results, None)
开发者ID:dclegalhackers,项目名称:regulations-parser,代码行数:8,代码来源:layer_keyterms.py
示例7: test_emphasis_close_to_front
def test_emphasis_close_to_front(self):
""" An emphasized word is close to the front, but is not a key term.
"""
node = Node('(a) T et seq. has a list: apples',
label=['101', '22', 'a'])
node.tagged_text = '(a) T <E T="03">et seq.</E> has a list: apples'
kt = KeyTerms(None)
self.assertFalse(kt.keyterm_is_first(node, 'et seq.'))
开发者ID:cfpb,项目名称:regulations-parser,代码行数:10,代码来源:layer_keyterms_tests.py
示例8: test_find_keyterm
def test_find_keyterm(self):
node = Node(
'(a) Apples. Apples are grown in New Zealand.',
label=['101', '22', 'a'])
node.tagged_text = '(a) <E T="03">Apples.</E> Apples are grown in '
node.tagged_text += 'New Zealand.'
kt = KeyTerms(None)
results = kt.process(node)
self.assertNotEqual(results, None)
self.assertEqual(results[0]['key_term'], 'Apples.')
self.assertEqual(results[0]['locations'], [0])
开发者ID:cfpb,项目名称:regulations-parser,代码行数:11,代码来源:layer_keyterms_tests.py
示例9: test_keyterm_see
def test_keyterm_see(self):
""" Keyterm tags sometimes enclose phrases such as 'See also' because
those tags are also used for emphasis. """
node = Node('(a) Apples. See Section 101.2',
label=['101', '22', 'a'])
node.tagged_text = '(a) <E T="03">Apples. See also</E>'
kt = KeyTerms(None)
results = kt.process(node)
self.assertEqual('Apples.', results[0]['key_term'])
开发者ID:cfpb,项目名称:regulations-parser,代码行数:11,代码来源:layer_keyterms_tests.py
示例10: paragraph_with_marker
def paragraph_with_marker(self, text, tagged_text):
"""The paragraph has a marker, like (a) or a. etc."""
# To aid in determining collapsed paragraphs, replace any
# keyterms present
node_for_keyterms = Node(text, node_type=Node.APPENDIX)
node_for_keyterms.tagged_text = tagged_text
node_for_keyterms.label = [initial_marker(text)[0]]
keyterm = KeyTerms.get_keyterm(node_for_keyterms)
if keyterm:
mtext = text.replace(keyterm, ';'*len(keyterm))
else:
mtext = text
for mtext in split_paragraph_text(mtext):
if keyterm: # still need the original text
mtext = mtext.replace(';'*len(keyterm), keyterm)
# label_candidate = [initial_marker(mtext)[0]]
# existing_node = None
# for node in self.nodes:
# if node.label == label_candidate:
# existing_node = node
# if existing_node:
# self.paragraph_counter += 1
# node = Node(mtext, node_type=Node.APPENDIX,
# label=['dup{}'.format(self.paragraph_counter),
# initial_marker(mtext)[0]])
# else:
node = Node(mtext, node_type=Node.APPENDIX,
label=[initial_marker(mtext)[0]])
node.tagged_text = tagged_text
self.nodes.append(node)
开发者ID:phildini,项目名称:regulations-parser,代码行数:31,代码来源:appendices.py
示例11: collapsed_markers_matches
def collapsed_markers_matches(node_text, tagged_text):
"""Find collapsed markers, i.e. tree node paragraphs that begin within a
single XML node, within this text. Remove citations and other false
positives. This is pretty hacky right now -- it focuses on the plain
text but takes cues from the tagged text. @todo: streamline logic"""
# In addition to the regex above, keyterms are an acceptable prefix. We
# therefore convert keyterms to satisfy the above regex
node_for_keyterms = Node(
node_text, node_type=Node.INTERP, tagged_text=tagged_text,
label=[get_first_interp_marker(node_text)]
)
keyterm = KeyTerms.keyterm_in_node(node_for_keyterms)
if keyterm:
node_text = node_text.replace(keyterm, '.' * len(keyterm))
collapsed_markers = []
for marker in _first_markers:
possible = [(m, m.start(), m.end())
for m in marker.finditer(node_text)]
possible = remove_citation_overlaps(node_text, possible)
possible = [triplet[0] for triplet in possible]
collapsed_markers.extend(
match for match in possible
if not false_collapsed_marker(match, node_text, tagged_text)
)
return collapsed_markers
开发者ID:eregs,项目名称:regulations-parser,代码行数:26,代码来源:gpo_cfr.py
示例12: test_keyterm_definition
def test_keyterm_definition(self):
node = Node("(a) Terminator means I'll be back",
label=['101', '22', 'a'])
node.tagged_text = """(a) <E T="03">Terminator</E> means I'll be """
node.tagged_text += 'back'
kt = KeyTerms(None)
results = kt.process(node)
self.assertEqual(results, None)
node = Node("(1) Act means pretend", label=['101', '22', 'a', '1'])
node.tagged_text = """(1) <E T="03">Act</E> means pretend"""
node = Node("(1) Act means the Truth in Lending Act (15 U.S.C. 1601 et seq.).", label=['1026', '2', 'a', '1'])
node.tagged_text = """(1) <E T="03">Act</E> means the Truth in Lending Act (15 U.S.C. 1601 <E T="03">et seq.</E>)."""
kt = KeyTerms(None)
results = kt.process(node)
self.assertEqual(results, None)
开发者ID:jposi,项目名称:regulations-parser,代码行数:16,代码来源:layer_keyterms_tests.py
示例13: collapsed_markers_matches
def collapsed_markers_matches(node_text, tagged_text):
"""Find collapsed markers, i.e. tree node paragraphs that begin within a
single XML node, within this text. Remove citations and other false
positives. This is pretty hacky right now -- it focuses on the plain
text but takes cues from the tagged text. @todo: streamline logic"""
# In addition to the regex above, keyterms are an acceptable prefix. We
# therefore convert keyterms to satisfy the above regex
node_for_keyterms = Node(node_text, node_type=Node.INTERP,
label=[get_first_interp_marker(node_text)])
node_for_keyterms.tagged_text = tagged_text
keyterm = KeyTerms.get_keyterm(node_for_keyterms)
if keyterm:
node_text = node_text.replace(keyterm, '.'*len(keyterm))
collapsed_markers = []
for marker in _first_markers:
possible = ((m, m.start(), m.end())
for m in marker.finditer(node_text) if m.start() > 0)
possible = remove_citation_overlaps(node_text, possible)
# If certain characters follow, kill it
for following in ("e.", ")", u"”", '"', "'"):
possible = [(m, s, end) for m, s, end in possible
if not node_text[end:].startswith(following)]
possible = [m for m, _, _ in possible]
# As all "1." collapsed markers must be emphasized, run a quick
# check to weed out some false positives
if '<E T="03">1' not in tagged_text:
possible = filter(lambda m: m.group(1) != '1', possible)
collapsed_markers.extend(possible)
return collapsed_markers
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:30,代码来源:interpretations.py
示例14: test_emphasis_later
def test_emphasis_later(self):
""" Don't pick up something that is emphasized later in a paragraph as
a key-term. """
node = Node(
'(a) This has a list: apples et seq.',
label=['101', '22', 'a'],
tagged_text='(a) This has a list: apples <E T="03">et seq.</E>')
assert KeyTerms.keyterm_in_node(node) is None
开发者ID:anthonygarvan,项目名称:regulations-parser,代码行数:9,代码来源:layer_keyterms_tests.py
示例15: test_emphasis_close_to_front
def test_emphasis_close_to_front(self):
""" An emphasized word is close to the front, but is not a key term.
"""
node = Node(
'(a) T et seq. has a list: apples',
label=['101', '22', 'a'],
tagged_text='(a) T <E T="03">et seq.</E> has a list: apples')
assert KeyTerms.keyterm_in_node(node) is None
开发者ID:anthonygarvan,项目名称:regulations-parser,代码行数:10,代码来源:layer_keyterms_tests.py
示例16: replace_markerless
def replace_markerless(self, stack, node, depth):
"""Assign a unique index to all of the MARKERLESS paragraphs"""
if node.label[-1] == mtypes.MARKERLESS:
keyterm = KeyTerms.get_keyterm(node, ignore_definitions=False)
if keyterm:
p_num = keyterm_to_int(keyterm)
else:
# len(n.label[-1]) < 6 filters out keyterm nodes
p_num = sum(n.is_markerless() and len(n.label[-1]) < 6
for n in stack.peek_level(depth)) + 1
node.label[-1] = 'p{}'.format(p_num)
开发者ID:vrajmohan,项目名称:regulations-parser,代码行数:11,代码来源:paragraph_processor.py
示例17: paragraph_with_marker
def paragraph_with_marker(self, text, tagged_text):
"""The paragraph has a marker, like (a) or a. etc."""
# To aid in determining collapsed paragraphs, replace any
# keyterms present
node_for_keyterms = Node(text, node_type=Node.APPENDIX)
node_for_keyterms.tagged_text = tagged_text
node_for_keyterms.label = [initial_marker(text)[0]]
keyterm = KeyTerms.get_keyterm(node_for_keyterms)
if keyterm:
mtext = text.replace(keyterm, '.'*len(keyterm))
else:
mtext = text
for mtext in split_paragraph_text(mtext):
if keyterm: # still need the original text
mtext = mtext.replace('.'*len(keyterm), keyterm)
node = Node(mtext, node_type=Node.APPENDIX,
label=[initial_marker(mtext)[0]])
self.nodes.append(node)
开发者ID:EricSchles,项目名称:regulations-parser,代码行数:19,代码来源:appendices.py
注:本文中的regparser.layer.key_terms.KeyTerms类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论