本文整理汇总了Python中utils.CanadianPerson类的典型用法代码示例。如果您正苦于以下问题:Python CanadianPerson类的具体用法?Python CanadianPerson怎么用?Python CanadianPerson使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CanadianPerson类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: scrape
def scrape(self):
regional_councillor_seat_number = 1
page = self.lxmlize(COUNCIL_PAGE)
yield self.scrape_mayor(page)
councillor_nodes = page.xpath('//h3[contains(text(), "Councillors")]/following-sibling::p')[:-1]
for councillor_node in councillor_nodes:
text = ' '.join(councillor_node.xpath('./strong/text()'))
if not text or 'Vacant' in text:
continue
name, role_district = text.split(', ', 1)
if 'Regional Councillor' in role_district:
role = role_district
district = 'Whitby (seat {})'.format(regional_councillor_seat_number)
regional_councillor_seat_number += 1
else:
role, district = role_district.strip().split(', ')
district = district.split(' (')[0]
email = self.get_email(councillor_node)
image = councillor_node.xpath('./img/@src')[0]
p = Person(primary_org='legislature', name=name, district=district, role=role, image=image)
p.add_source(COUNCIL_PAGE)
p.add_contact('email', email)
yield p
开发者ID:ppival,项目名称:scrapers-ca,代码行数:28,代码来源:people.py
示例2: scrape
def scrape(self):
page = self.lxmlize(COUNCIL_PAGE)
councillors = page.xpath('//div[@class="article-content"]//td[@class="ms-rteTableOddCol-0"]')
yield self.scrape_mayor(councillors[0])
assert len(councillors), 'No councillors found'
for councillor in councillors[1:]:
if not councillor.xpath('.//a'):
continue
texts = [text for text in councillor.xpath('.//text()') if clean_string(text)]
name = texts[0]
district = texts[1]
url = councillor.xpath('.//a/@href')[0]
page = self.lxmlize(url)
p = Person(primary_org='legislature', name=name, district=district, role='Conseiller')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.image = councillor.xpath('./preceding-sibling::td//img/@src')[-1]
contacts = page.xpath('.//td[@class="ms-rteTableOddCol-0"]//text()')
for contact in contacts:
if re.findall(r'[0-9]{4}', contact):
phone = contact.strip().replace(' ', '-')
p.add_contact('voice', phone, 'legislature')
get_links(p, page.xpath('.//td[@class="ms-rteTableOddCol-0"]')[0])
email = self.get_email(page)
p.add_contact('email', email)
yield p
开发者ID:opencivicdata,项目名称:scrapers-ca,代码行数:32,代码来源:people.py
示例3: scrape
def scrape(self):
page = self.lxmlize(COUNCIL_PAGE)
councillors = page.xpath('//div[@id="c2087"]//a')
for councillor in councillors:
name = councillor.text_content()
url = councillor.attrib['href']
page = self.lxmlize(url)
if 'Maire' in page.xpath('//h2/text()')[0]:
district = 'Sherbrooke'
role = 'Maire'
else:
district = page.xpath('//div[@class="csc-default"]//a[@target="_blank"]/text()')[0].replace('district', '').replace('Domaine Howard', 'Domaine-Howard').strip()
role = 'Conseiller'
if district in ('de Brompton', 'de Lennoxville'):
district = district.replace('de ', '')
p = Person(primary_org='legislature', name=name, district=district, role=role)
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.image = page.xpath('//div[@class="csc-textpic-image csc-textpic-last"]//img/@src')[0]
parts = page.xpath('//li[contains(text(), "phone")]/text()')[0].split(':')
note = parts[0]
phone = parts[1]
p.add_contact(note, phone, note)
email = self.get_email(page)
if email:
p.add_contact('email', email)
if district == 'Brompton':
p._related[0].extras['boundary_url'] = '/boundaries/sherbrooke-boroughs/brompton/'
elif district == 'Lennoxville':
p._related[0].extras['boundary_url'] = '/boundaries/sherbrooke-boroughs/lennoxville/'
yield p
开发者ID:anukat2015,项目名称:scrapers-ca,代码行数:32,代码来源:people.py
示例4: scrape
def scrape(self):
page = self.lxmlize(COUNCIL_PAGE)
mayor_url = page.xpath('//a[contains(text(), "Mayor")]/@href')[0]
mayor = self.scrape_mayor(mayor_url)
if mayor:
yield mayor
councillors_url = page.xpath('//a[contains(text(), "Councillors")]/@href')[0]
cpage = self.lxmlize(councillors_url)
councillor_rows = cpage.xpath("//tr[td//img]")[:-1]
for councillor_row in councillor_rows:
img_cell, info_cell = tuple(councillor_row)
if info_cell.xpath('.//p//text()[contains(., "Vacant")]'):
continue
name = info_cell.xpath('.//p//text()[contains(., "Councillor")]')[0].replace("Councillor ", "")
district = info_cell.xpath('.//p[contains(text(), "District")]//text()')[0]
email = self.get_email(info_cell)
phone = self.get_phone(info_cell, area_codes=[438, 514])
img_url_rel = img_cell.xpath(".//img/@src")[0]
img_url = urljoin(councillors_url, img_url_rel)
p = Person(primary_org="legislature", name=name, district=district, role="Conseiller")
p.add_source(COUNCIL_PAGE)
p.add_source(councillors_url)
p.add_contact("email", email)
p.add_contact("voice", phone, "legislature")
p.image = img_url
yield p
开发者ID:tor-councilmatic,项目名称:scrapers-ca,代码行数:30,代码来源:people.py
示例5: scrape_mayor
def scrape_mayor(self, url):
page = self.lxmlize(url)
name = page.xpath('//img/@alt[contains(., "Mayor")]')[0].split(' ', 1)[1]
p = Person(primary_org='legislature', name=name, district='Markham', role='Mayor')
p.add_source(url)
yield p
开发者ID:opencivicdata,项目名称:scrapers-ca,代码行数:8,代码来源:people.py
示例6: scrape
def scrape(self):
response = urlopen(COUNCIL_PAGE).read()
pdf = open('/tmp/ns.pdf', 'w')
pdf.write(response)
pdf.close()
data = subprocess.check_output(['pdftotext', '/tmp/ns.pdf', '-'])
emails = re.findall(r'(?<=E-mail: ).+', data)
data = re.split(r'Mayor |Warden ', data)[1:]
for i, mayor in enumerate(data):
lines = mayor.splitlines(True)
name = lines.pop(0).strip()
if name == "Jim Smith":
continue
district = lines.pop(0).strip()
if not re.findall(r'[0-9]', lines[0]):
district = district + ' ' + lines.pop(0).strip()
org = Organization(name=district + ' Municipal Council', classification='legislature', jurisdiction_id=self.jurisdiction.jurisdiction_id)
org.add_source(COUNCIL_PAGE)
yield org
p = Person(primary_org='legislature', name=name, district=district)
p.add_source(COUNCIL_PAGE)
membership = p.add_membership(org, role='Mayor', district=district)
address = lines.pop(0).strip() + ', ' + lines.pop(0).strip()
if 'Phone' not in lines[0]:
address = address + ', ' + lines.pop(0).strip()
if 'Phone' not in lines[0]:
address = address + ', ' + lines.pop(0).strip()
phone = lines.pop(0).split(':')[1].strip()
if 'Fax' in lines.pop(0):
fax = lines.pop(0)
membership.add_contact_detail('address', address, 'legislature')
membership.add_contact_detail('voice', phone, 'legislature')
membership.add_contact_detail('fax', fax, 'legislature')
# @todo emails are being assigned incorrectly, e.g. Town of Berwick picks
# up Cape Breton Regional Municipality and Region of Queens Municipality
for i, email in enumerate(emails):
regex = name.split()[-1].lower() + '|' + '|'.join(district.split()[-2:]).replace('of', '').lower()
regex = regex.replace('||', '|')
matches = re.findall(r'{}'.format(regex), email)
if matches:
membership.add_contact_detail('email', emails.pop(i))
yield p
os.system('rm /tmp/ns.pdf')
开发者ID:opencivicdata,项目名称:scrapers-ca,代码行数:51,代码来源:people.py
示例7: scrape
def scrape(self):
page = self.lxmlize(COUNCIL_PAGE, 'iso-8859-1')
councillors = page.xpath('//div[@id="PageContent"]/table/tbody/tr/td')
for councillor in councillors:
if not councillor.text_content().strip():
continue
if councillor == councillors[0]:
district = 'Kirkland'
role = 'Maire'
else:
district = councillor.xpath('.//h2')[0].text_content()
district = re.search('- (.+)', district).group(1).strip()
district = district.replace(' Ouest', ' ouest').replace(' Est', ' est')
role = 'Conseiller'
name = councillor.xpath('.//strong/text()')[0]
phone = councillor.xpath('.//div[contains(text(), "#")]/text()')[0].replace('T ', '').replace(' ', '-').replace(',-#-', ' x')
email = self.get_email(councillor)
p = Person(primary_org='legislature', name=name, district=district, role=role)
p.add_source(COUNCIL_PAGE)
p.add_contact('voice', phone, 'legislature')
p.add_contact('email', email)
p.image = councillor.xpath('.//img/@src')[0]
yield p
开发者ID:anukat2015,项目名称:scrapers-ca,代码行数:27,代码来源:people.py
示例8: scrape_mayor
def scrape_mayor(self, url):
page = self.lxmlize(url)
name = page.xpath('//div[@id="printAreaContent"]/h1/strong/text()')[0].replace('Mayor', '').strip()
address = page.xpath('//strong[contains(text(), "mail")]/parent::p/text()')[1].replace(':', '').strip()
phone = page.xpath('//strong[contains(text(), "phone")]/parent::p/text()')[1].split()[1]
p = Person(primary_org='legislature', name=name, district='Caledon', role='Mayor')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.image = page.xpath('//h2[contains(text(), "About me")]/img/@src')[0]
p.add_contact('address', address, 'legislature')
p.add_contact('voice', phone, 'legislature')
return p
开发者ID:anukat2015,项目名称:scrapers-ca,代码行数:14,代码来源:people.py
注:本文中的utils.CanadianPerson类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论