本文整理汇总了Python中slybot.plugins.scrapely_annotations.extraction.SlybotIBLExtractor类的典型用法代码示例。如果您正苦于以下问题:Python SlybotIBLExtractor类的具体用法?Python SlybotIBLExtractor怎么用?Python SlybotIBLExtractor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SlybotIBLExtractor类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_extractor_w_empty_string_extraction
def test_extractor_w_empty_string_extraction(self):
schema = {
'fields': {
'gender': {
'required': False,
'type': 'text',
'vary': False,
},
'name': {
'required': True,
'type': 'text',
'vary': False,
}
}
}
descriptor = create_slybot_item_descriptor(schema)
extractors = {
1: {
"regular_expression": "([0-9]+)"
}
}
apply_extractors(descriptor, {"gender": [1]}, extractors)
ibl_extractor = SlybotIBLExtractor([
(self.template2, {'#default': descriptor}, '0.12.0')])
self.assertEqual(ibl_extractor.extract(self.target2)[0][0]['name'], [u'Name Olivia'])
开发者ID:FFFFFurry,项目名称:portia,代码行数:26,代码来源:test_extractors.py
示例2: test_extract_missing_schema
def test_extract_missing_schema(self):
extractor = SlybotIBLExtractor([(sample_411, {}, '0.13.0')])
data = extractor.extract(page_411)[0][1]
raw_html = ('<span itemprop="name"><span itemprop="givenName">Joe'
'</span> <span itemprop="familyName">Smith</span></span>')
self.assertEqual(data['full_name'], [raw_html])
self.assertEqual(data['first_name'], [raw_html])
self.assertEqual(data['last_name'], [raw_html])
开发者ID:daqv,项目名称:portia-dashboard,代码行数:8,代码来源:test_multiple_item_extraction.py
示例3: test_per_annotation_extractors
def test_per_annotation_extractors(self):
schema = {
'fields': {
'url': {
'required': False,
'type': 'text',
'vary': False,
},
'name': {
'required': True,
'type': 'text',
'vary': False,
}
}
}
extractors = {
'1': {
'type_extractor': 'url'
},
'2': {
'regular_expression': '(.*)\.html'
},
'3': {
'regular_expression': 'Name: (.*)'
},
'4': {
'type_extractor': 'text'
},
'5': {
'type_extractor': 'price'
},
'6': {
'type_extractor': 'number'
},
'7': {
'type_extractor': 'date'
},
'8': {
'regular_expression': '(\d+)-'
}
}
descriptors = {'#default': create_slybot_item_descriptor(schema)}
add_extractors_to_descriptors(descriptors, extractors)
ibl_extractor = SlybotIBLExtractor([
(self.template3, descriptors, '0.13.0')
])
result = {'name': [u'Olivia'], 'url': [u'http://www.test.com/olivia'],
'title': [u'Name: Olivia'], 'price': [u'2016'],
'date': [datetime(2016, 3, 17, 20, 25)]}
data = ibl_extractor.extract(self.target3)[0][0]
del data['_template']
self.assertEqual(data, result)
开发者ID:01-,项目名称:portia,代码行数:52,代码来源:test_extractors.py
示例4: test_default_type_extractor
def test_default_type_extractor(self):
schema = {
'fields': {}
}
descriptor = create_slybot_item_descriptor(schema)
extractors = {
1: {"regular_expression": "Gender\\s+(Male|Female)"}
}
apply_extractors(descriptor, {"gender": [1]}, extractors)
ibl_extractor = SlybotIBLExtractor([
(self.template, {'#default': descriptor}, '0.12.0')])
self.assertEqual(ibl_extractor.extract(self.target)[0][0]['gender'], [u'Male'])
开发者ID:FFFFFurry,项目名称:portia,代码行数:13,代码来源:test_extractors.py
示例5: test_required_annotation
def test_required_annotation(self):
extractor = SlybotIBLExtractor([(sample_daft, {}, '0.13.0')])
data = extractor.extract(page_daft)[0]
self.assertEqual(len(data), 5)
assert all('ber' in house for house in data)
assert all('address' in house for house in data)
assert all('price_change' in house for house in data)
extractor = SlybotIBLExtractor([(sample_daft_no_requireds, {},
'0.13.0')])
data = extractor.extract(page_daft)[0]
self.assertEqual(len(data), 8)
assert all('ber' in house for house in data)
assert all('address' in house for house in data)
assert any('price_change' not in house for house in data)
开发者ID:FrankieChan885,项目名称:portia,代码行数:14,代码来源:test_multiple_item_extraction.py
示例6: test_required_annotation
def test_required_annotation(self):
ibl_extractor = SlybotIBLExtractor([
(simple_template, simple_descriptors, '0.13.0')
])
data, _ = ibl_extractor.extract(target1)
self.assertEqual(len(data), 10)
self.assertTrue(all('rank' in item and item['rank'] for item in data))
self.assertTrue(all('description' in item and item['description']
for item in data))
data, _ = ibl_extractor.extract(target2)
self.assertEqual(len(data), 5)
self.assertTrue(all('rank' in item and item['rank'] for item in data))
self.assertTrue(all('description' in item and item['description']
for item in data))
开发者ID:daqv,项目名称:portia-dashboard,代码行数:14,代码来源:test_multiple_item_extraction.py
示例7: test_extract_single_attribute_to_multiple_fields
def test_extract_single_attribute_to_multiple_fields(self):
extractors = {'1': {'regular_expression': '(.*)\s'},
'2': {'regular_expression': '\s(.*)'}}
descriptors = {'#default': create_slybot_item_descriptor({'fields': {
'full_name': {'type': 'text', 'required': False, 'vary': False},
'first_name': {'type': 'text', 'required': False, 'vary': False,
'name': u'prénom'},
'last_name': {'type': 'text', 'required': False, 'vary': False,
'name': 'nom'},
'address': {'type': 'text', 'required': False, 'vary': False}}})}
add_extractors_to_descriptors(descriptors, extractors)
extractor = SlybotIBLExtractor([(sample_411, descriptors, '0.13.0')])
data = extractor.extract(page_411)[0]
self.assertEqual(data[1]['full_name'], [u'Joe Smith'])
self.assertEqual(data[1][u'prénom'], [u'Joe'])
self.assertEqual(data[1]['nom'], [u'Smith'])
开发者ID:FrankieChan885,项目名称:portia,代码行数:16,代码来源:test_multiple_item_extraction.py
示例8: test_text_type_w_regex
def test_text_type_w_regex(self):
schema = {
"fields": {
'gender': {
'required': False,
'type': 'text',
'vary': False,
}
}
}
descriptor = create_slybot_item_descriptor(schema)
extractors = {1: {"regular_expression": "Gender\\s+(Male|Female)"}}
apply_extractors(descriptor, {"gender": [1]}, extractors)
ibl_extractor = SlybotIBLExtractor([
(self.template, {'#default': descriptor}, '0.12.0')])
self.assertEqual(ibl_extractor.extract(self.target)[0][0]['gender'], [u'Male'])
开发者ID:FFFFFurry,项目名称:portia,代码行数:17,代码来源:test_extractors.py
示例9: test_raw_type_w_regex
def test_raw_type_w_regex(self):
schema = {
'fields': {
'gender': {
'required': False,
'type': 'raw',
'vary': False,
}
}
}
descriptor = create_slybot_item_descriptor(schema)
extractors = {1: {
"regular_expression": "Gender.*(<td\s*>(?:Male|Female)</td>)"
}}
apply_extractors(descriptor, {"gender": [1]}, extractors)
ibl_extractor = SlybotIBLExtractor([(self.template, {'#default': descriptor})])
self.assertEqual(ibl_extractor.extract(self.target)[0][0]['gender'], [u'<td >Male</td>'])
开发者ID:TimoC1982,项目名称:portia,代码行数:18,代码来源:test_extractors.py
示例10: test_extract_missing_schema
def test_extract_missing_schema(self):
extractor = SlybotIBLExtractor([(sample_411, {}, '0.13.0')])
data = extractor.extract(page_411)[0]
self.assertEqual(data[1]['full_name'], [u'Joe Smith'])
self.assertEqual(data[1]['first_name'], [u'Joe Smith'])
self.assertEqual(data[1]['last_name'], [u'Joe Smith'])
开发者ID:FrankieChan885,项目名称:portia,代码行数:6,代码来源:test_multiple_item_extraction.py
注:本文中的slybot.plugins.scrapely_annotations.extraction.SlybotIBLExtractor类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论