• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python legislation.Bill类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyutils.legislation.Bill的典型用法代码示例。如果您正苦于以下问题:Python Bill类的具体用法?Python Bill怎么用?Python Bill使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Bill类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: scrape_bills

    def scrape_bills(self,chamber,year):
        self.log("Getting bill list for %s %s" % (chamber, year))

        if chamber == 'upper':
            min_id = self.upper_min_id
            max_id = self.upper_max_id
        elif chamber == 'lower':
            min_id = self.lower_min_id
            max_id = self.lower_max_id

        for id in range(min_id, max_id):
            bill_info_url = 'http://dlr.leg.wa.gov/billsummary/default.aspx?year=%s&bill=%s' % (year, id)
            with self.soup_context(bill_info_url) as soup:
                print('opened %s', id)
                bill_id = soup.find('span', id='ctl00_contentRegion_lblShortBillID').string
                bill_title = soup.find('span', id='ctl00_contentRegion_lblBriefDescription').string

                print('bill_id ', bill_id)
                print('bill_title ', bill_title)
                session_name = self._session_dict[year]

                bill = Bill(session_name, chamber, bill_id, bill_title)
                bill.add_source(bill_info_url)

                self._scrape_bill_docs(soup, bill)

                self._scrape_bill_sponsors(soup, bill)
                self._scrape_bill_votes(soup, bill, chamber)

                self.add_bill(bill)
开发者ID:katpet,项目名称:fiftystates,代码行数:30,代码来源:get_legislation.py


示例2: parse_senate_billpage

    def parse_senate_billpage(self, bill_url, year):
        with self.soup_context(bill_url) as bill_page:
            # get all the info needed to record the bill
            bill_id = bill_page.find(id="lblBillNum").b.font.contents[0]
            bill_title = bill_page.find(id="lblBillTitle").font.string
            bill_desc = bill_page.find(id="lblBriefDesc").font.contents[0]
            bill_lr = bill_page.find(id="lblLRNum").font.string

            bill = Bill(year, 'upper', bill_id, bill_desc, bill_url=bill_url,
                        bill_lr=bill_lr, official_title=bill_title)
            bill.add_source(bill_url)

            # Get the primary sponsor
            bill_sponsor = bill_page.find(id="hlSponsor").i.font.contents[0]
            bill_sponsor_link = bill_page.find(id="hlSponsor").href
            bill.add_sponsor('primary', bill_sponsor,
                             sponsor_link=bill_sponsor_link)

            # cosponsors show up on their own page, if they exist
            cosponsor_tag = bill_page.find(id="hlCoSponsors")
            if cosponsor_tag and 'href' in cosponsor_tag:
                self.parse_senate_cosponsors(bill, cosponsor_tag['href'])

            # get the actions
            action_url = bill_page.find(id="hlAllActions")['href']
            self.parse_senate_actions(bill, action_url)

            # stored on a separate page
            versions_url = bill_page.find(id="hlFullBillText")
            if versions_url:
                self.parse_senate_bill_versions(bill, versions_url['href'])

        self.save_bill(bill)
开发者ID:rcadby,项目名称:fiftystates,代码行数:33,代码来源:get_legislation.py


示例3: scrape2009

    def scrape2009(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/2009_10/sum/sum/sb1.htm"
        page = parse(url).getroot()

        # Bill
        name = page.cssselect('#legislation h1')[0].text_content().strip()
        bill = Bill(session, chamberName, number, name)

        # Sponsorships
        for a in page.cssselect("#sponsors a"):
            bill.add_sponsor('', a.text_content().strip())

        # Actions
        for row in page.cssselect('#history tr')[1:]:
            date = row[0].text_content().strip()
            action_text = row[1].text_content().strip()

            if '/' not in date:
                continue

            if action_text.startswith('Senate'):
                bill.add_action('upper', action_text, date)
            elif action_text.startswith('House'):
                bill.add_action('lower', action_text, date)

        # Versions
        for row in page.cssselect('#versions a'):
            bill.add_version(a.text_content(),
                             urlparse.urljoin(url, a.get('href')))

        self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:31,代码来源:get_legislation.py


示例4: scrape2003

    def scrape2003(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/2003_04/sum/sum/sb1.htm"
        page = parse(url).getroot()

        # Grab the interesting tables on the page.
        tables = page.cssselect('center table')

        # Bill
        name = tables[0].text_content().split('-', 1)[1]
        bill = Bill(session, chamberName, number, name)

        # Sponsorships
        for a in tables[1].cssselect('a'):
            bill.add_sponsor('', a.text_content().strip())

        # Actions
        center = page.cssselect('center table center')[0]

        for row in center.cssselect('table')[-2].cssselect('tr')[2:]:
            date = row[0].text_content().strip()
            action_text = row[1].text_content().strip()
            if '/' not in date:
                continue
            if action_text.startswith('Senate'):
                bill.add_action('upper', action_text, date)
            elif action_text.startswith('House'):
                bill.add_action('lower', action_text, date)

        # Versions
        for row in center.cssselect('table')[-1].cssselect('a'):
            bill.add_version(a.text_content(),
                             urlparse.urljoin(url, a.get('href')))

        self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:34,代码来源:get_legislation.py


示例5: scrape1999

    def scrape1999(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/1999_00/leg/sum/sb1.htm"
        page = parse(url).getroot()

        # Grab the interesting tables on the page.
        tables = page.cssselect('table')

        # Bill
        name = tables[1].cssselect('a')[0].text_content().split('-', 1)[1]
        bill = Bill(session, chamberName, number, name)

        # Versions
        bill.add_version('Current', url.replace('/sum/', '/fulltext/'))

        # Sponsorships
        for a in tables[2].cssselect('a'):
            bill.add_sponsor('', a.text_content().strip())

        # Actions
        for row in tables[-1].cssselect('tr'):
            senate_date = row[0].text_content().strip()
            action_text = row[1].text_content().strip()
            house_date = row[2].text_content().strip()
            if '/' not in senate_date and '/' not in house_date:
                continue
            if senate_date:
                bill.add_action('upper', action_text, senate_date)
            if house_date:
                bill.add_action('lower', action_text, house_date)

        self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:31,代码来源:get_legislation.py


示例6: scrape2009

    def scrape2009(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/2009_10/sum/sb1.htm"
        page = parse(url).getroot()

        # Bill
        try:
            name = page.cssselect("#legislation h1")[0].text_content().strip()
        except:
            name = "Unknown"
        bill = Bill(session, chamberName, number, name)

        # Sponsorships
        for a in page.cssselect("#sponsors a"):
            bill.add_sponsor("", a.text_content().strip())

        self.parse_votes(url, page, chamberName, bill)

        # Actions
        for row in page.cssselect("#history tr")[1:]:
            date = row[0].text_content().strip()
            action_text = row[1].text_content().strip()

            if "/" not in date:
                continue

            if action_text.startswith("Senate"):
                bill.add_action("upper", action_text, date)
            elif action_text.startswith("House"):
                bill.add_action("lower", action_text, date)

        # Versions
        for row in page.cssselect("#versions a"):
            bill.add_version(a.text_content(), urlparse.urljoin(url, a.get("href")))

        self.add_bill(bill)
开发者ID:airportyh,项目名称:fiftystates,代码行数:35,代码来源:get_legislation.py


示例7: scrape2003

    def scrape2003(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/2003_04/sum/sb1.htm"
        page = parse(url).getroot()

        # Grab the interesting tables on the page.
        tables = page.cssselect("center table")

        # Bill
        name = tables[0].text_content().split("-", 1)[1]
        bill = Bill(session, chamberName, number, name)

        # Sponsorships
        for a in tables[1].cssselect("a"):
            bill.add_sponsor("", a.text_content().strip())

        self.parse_votes_2001_2004(url, page, chamberName, bill)

        # Actions
        center = page.cssselect("center table center")[0]

        for row in center.cssselect("table")[-2].cssselect("tr")[2:]:
            date = row[0].text_content().strip()
            action_text = row[1].text_content().strip()
            if "/" not in date:
                continue
            if action_text.startswith("Senate"):
                bill.add_action("upper", action_text, date)
            elif action_text.startswith("House"):
                bill.add_action("lower", action_text, date)

        # Versions
        for row in center.cssselect("table")[-1].cssselect("a"):
            bill.add_version(a.text_content(), urlparse.urljoin(url, a.get("href")))

        self.add_bill(bill)
开发者ID:airportyh,项目名称:fiftystates,代码行数:35,代码来源:get_legislation.py


示例8: scrape1999

    def scrape1999(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/1999_00/leg/sum/sb1.htm"
        page = parse(url).getroot()

        # Grab the interesting tables on the page.
        tables = page.cssselect("table")

        # Bill
        name = tables[1].cssselect("a")[0].text_content().split("-", 1)[1]
        bill = Bill(session, chamberName, number, name)

        # Versions
        bill.add_version("Current", url.replace("/sum/", "/fulltext/"))

        # Sponsorships
        for a in tables[2].cssselect("a"):
            bill.add_sponsor("", a.text_content().strip())

        self.parse_votes_1999(url, page, chamberName, bill)

        # Actions
        for row in tables[-1].cssselect("tr"):
            senate_date = row[0].text_content().strip()
            action_text = row[1].text_content().strip()
            house_date = row[2].text_content().strip()
            if "/" not in senate_date and "/" not in house_date:
                continue
            if senate_date:
                bill.add_action("upper", action_text, senate_date)
            if house_date:
                bill.add_action("lower", action_text, house_date)

        self.add_bill(bill)
开发者ID:airportyh,项目名称:fiftystates,代码行数:33,代码来源:get_legislation.py


示例9: get_bill_info

    def get_bill_info(self, chamber, session, bill_detail_url):
	"""Extracts all the requested info for a given bill.  
	
	Calls the parent's methods to enter the results into CSV files.
	"""
        bill_detail_url_base='https://www.revisor.leg.state.mn.us/revisor/pages/search_status/'
        bill_detail_url = urlparse.urljoin(bill_detail_url_base, bill_detail_url)

        if chamber == "House":
            chamber = 'lower'
        else:
            chamber = 'upper'

        with self.soup_context(bill_detail_url) as bill_soup:

            bill_id = self.extract_bill_id(bill_soup)
            bill_title =  self.extract_bill_title(bill_soup)
            bill = Bill(session, chamber, bill_id, bill_title)

            # get all versions of the bill.
            # Versions of a bill are on a separate page, linked to from the bill
            # details page in a link titled, "Bill Text".
            version_url_base = 'https://www.revisor.leg.state.mn.us'
            bill_version_link = self.extract_bill_version_link(bill_soup)

        version_detail_url = urlparse.urljoin(version_url_base, bill_version_link)

        with self.soup_context(version_detail_url) as version_soup:

            # MN bills can have multiple versions.  Get them all, and loop over
            # the results, adding each one.
            bill_versions = self.extract_bill_versions(version_soup)
            for version in bill_versions:
                version_name = version['name']
                version_url = urlparse.urljoin(version_url_base, version['url'])
                bill.add_version(version_name, version_url)

            # grab primary and cosponsors 
            # MN uses "Primary Author" to name a bill's primary sponsor.
            # Everyone else listed will be added as a 'cosponsor'.
            sponsors = self.extract_bill_sponsors(bill_soup)
            primary_sponsor = sponsors[0]
            cosponsors = sponsors[1:]
            bill.add_sponsor('primary', primary_sponsor)
            for leg in cosponsors:
                bill.add_sponsor('cosponsor', leg)

            # Add Actions performed on the bill.
            bill_actions = self.extract_bill_actions(bill_soup, chamber)
            for action in bill_actions:
                action_chamber = action['action_chamber']
                action_date = action['action_date']
                action_text = action['action_text']
                bill.add_action(action_chamber, action_text, action_date)

        self.add_bill(bill)
开发者ID:airportyh,项目名称:fiftystates,代码行数:56,代码来源:get_legislation.py


示例10: get_bill_info

    def get_bill_info(self, chamber, session, bill_detail_url, version_list_url):
	"""Extracts all the requested info for a given bill.

	Calls the parent's methods to enter the results into JSON files.
	"""
        if chamber == "House":
            chamber = 'lower'
        else:
            chamber = 'upper'

        with self.soup_context(bill_detail_url) as bill_soup:

            bill_id = self.extract_bill_id(bill_soup)
            bill_title =  self.extract_bill_title(bill_soup)
            bill = Bill(session, chamber, bill_id, bill_title)

        # Get all versions of the bill.
        # Versions of a bill are on a separate page, linked to from the column
        # labeled, "Bill Text", on the search results page.

        with self.soup_context(version_list_url) as version_soup:

            # MN bills can have multiple versions.  Get them all, and loop over
            # the results, adding each one.
            self.debug("Extracting bill versions from: " + version_list_url)
            bill_versions = self.extract_bill_versions(version_soup)
            for version in bill_versions:
                version_name = version['name']
                version_url = urlparse.urljoin(VERSION_URL_BASE, version['url'])
                bill.add_version(version_name, version_url)

            # grab primary and cosponsors
            # MN uses "Primary Author" to name a bill's primary sponsor.
            # Everyone else listed will be added as a 'cosponsor'.
            sponsors = self.extract_bill_sponsors(bill_soup)
            primary_sponsor = sponsors[0]
            cosponsors = sponsors[1:]
            bill.add_sponsor('primary', primary_sponsor)
            for leg in cosponsors:
                bill.add_sponsor('cosponsor', leg)

            # Add Actions performed on the bill.
            bill_actions = self.extract_bill_actions(bill_soup, chamber)
            for action in bill_actions:
                action_chamber = action['action_chamber']
                action_date = action['action_date']
                action_text = action['action_text']
                bill.add_action(action_chamber, action_text, action_date)

        self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:50,代码来源:get_legislation.py


示例11: scrape1995

    def scrape1995(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/1995_96/leg/sum/sb1.htm"
        page = parse(url).getroot()

        # Bill
        name = page.cssselect('h3 br')[0].tail.split('-', 1)[1].strip()
        bill = Bill(session, chamberName, number, name)

        # Versions
        bill.add_version('Current', url.replace('/sum/', '/fulltext/'))

        # Sponsorships
        rows = page.cssselect('center table tr')
        for row in rows:
            if row.text_content().strip() == 'Sponsor and CoSponsors':
                continue
            if row.text_content().strip() == 'Links / Committees / Status':
                break
            for a in row.cssselect('a'):
                bill.add_sponsor('', a.text_content().strip())

        # Actions
        # The actions are in a pre table that looks like:
        """    SENATE                         HOUSE
               -------------------------------------
             1/13/95   Read 1st time          2/6/95
             1/31/95   Favorably Reported
             2/1/95    Read 2nd Time          2/7/95
             2/3/95    Read 3rd Time
             2/3/95    Passed/Adopted                   """

        actions = page.cssselect('pre')[0].text_content().split('\n')
        actions = actions[2:]
        for action in actions:
            senate_date = action[:22].strip()
            action_text = action[23:46].strip()
            house_date = action[46:].strip()

            if '/' not in senate_date and '/' not in house_date:
                continue

            if senate_date:
                bill.add_action('upper', action_text, senate_date)

            if house_date:
                bill.add_action('lower', action_text, house_date)

        self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:48,代码来源:get_legislation.py


示例12: scrape1995

    def scrape1995(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/1995_96/leg/sum/sb1.htm"
        page = parse(url).getroot()

        # Bill
        name = page.cssselect("h3 br")[0].tail.split("-", 1)[1].strip()
        bill = Bill(session, chamberName, number, name)

        # Versions
        bill.add_version("Current", url.replace("/sum/", "/fulltext/"))

        # Sponsorships
        rows = page.cssselect("center table tr")
        for row in rows:
            if row.text_content().strip() == "Sponsor and CoSponsors":
                continue
            if row.text_content().strip() == "Links / Committees / Status":
                break
            for a in row.cssselect("a"):
                bill.add_sponsor("", a.text_content().strip())

        # Actions
        # The actions are in a pre table that looks like:
        """    SENATE                         HOUSE
               -------------------------------------
             1/13/95   Read 1st time          2/6/95
             1/31/95   Favorably Reported
             2/1/95    Read 2nd Time          2/7/95
             2/3/95    Read 3rd Time
             2/3/95    Passed/Adopted                   """

        actions = page.cssselect("pre")[0].text_content().split("\n")
        actions = actions[2:]
        for action in actions:
            senate_date = action[:22].strip()
            action_text = action[23:46].strip()
            house_date = action[46:].strip()

            if "/" not in senate_date and "/" not in house_date:
                continue

            if senate_date:
                bill.add_action("upper", action_text, senate_date)

            if house_date:
                bill.add_action("lower", action_text, house_date)

        self.add_bill(bill)
开发者ID:airportyh,项目名称:fiftystates,代码行数:48,代码来源:get_legislation.py


示例13: scrape_bill

    def scrape_bill(self, chamber, session, billid, histurl, year):
        if year[0] != 'R':
            session = year
        else:
            session = self.metadata['session_details'][year][
                'sub_sessions'][int(year[0]) - 1]

        with self.urlopen_context(histurl) as data:
            soup = BS(cleansource(data))
            basicinfo = soup.findAll('div', id='bhistleft')[0]
            hist = basicinfo.table

            sponsor = None
            title = None
            for b in basicinfo.findAll('b'):
                if b.next.startswith('SUMMARY'):
                    title = b.findNextSiblings(text=True)[0].strip()
                elif b.next.startswith('SPONSOR'):
                    for a in b.findNextSiblings('a'):
                        if not issponsorlink(a):
                            break
                        sponsor = cleansponsor(a.contents[0])

            bill = Bill(session, chamber, billid, title)

            if sponsor:
                bill.add_sponsor('primary', sponsor)

            for row in hist.findAll('tr'):
                link = row.td.a
                vlink = urlbase % link['href']
                vname = link.contents[0].strip()
                bill.add_version(vname, vlink)

            history = soup.findAll('div', id='bhisttab')[0].table
            rows = history.findAll('tr')[1:]
            for row in rows:
                tds = row.findAll('td')
                if len(tds) < 2:
                    # This is not actually an action
                    continue
                date, action = row.findAll('td')[:2]
                date = dt.datetime.strptime(date.contents[0], '%m/%d/%y')
                action = action.contents[0].strip()
                if 'House' in action:
                    actor = 'lower'
                elif 'Senate' in action:
                    actor = 'upper'
                else:  # for lack of a better
                    actor = chamber

                bill.add_action(actor, action, date)

        self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:54,代码来源:get_legislation.py


示例14: scrape_session

    def scrape_session(self, chamber, session):
        if chamber == "lower":
            bill_abbr = "HB"
        else:
            bill_abbr = "SB"

        bill_list_url = "http://www.le.state.ut.us/~%s/bills.htm" % (
            session.replace(' ', ''))
        self.log("Getting bill list for %s, %s" % (session, chamber))

        try:
            base_bill_list = self.soup_parser(self.urlopen(bill_list_url))
        except:
            # this session doesn't exist for this year
            return

        bill_list_link_re = re.compile('.*%s\d+ht.htm$' % bill_abbr)

        for link in base_bill_list.findAll('a', href=bill_list_link_re):
            bill_list = self.soup_parser(self.urlopen(link['href']))
            bill_link_re = re.compile('.*billhtm/%s.*.htm' % bill_abbr)

            for bill_link in bill_list.findAll('a', href=bill_link_re):
                bill_id = bill_link.find(text=True).strip()

                bill_info_url = bill_link['href']
                bill_info = self.soup_parser(self.urlopen(bill_info_url))

                bill_title, primary_sponsor = bill_info.h3.contents[2].replace(
                    '&nbsp;', ' ').strip().split(' -- ')

                bill = Bill(session, chamber, bill_id, bill_title)
                bill.add_source(bill_info_url)
                bill.add_sponsor('primary', primary_sponsor)

                status_re = re.compile('.*billsta/%s.*.htm' %
                                       bill_abbr.lower())
                status_link = bill_info.find('a', href=status_re)

                if status_link:
                    self.parse_status(bill, status_link['href'])

                text_find = bill_info.find(
                    text="Bill Text (If you are having trouble viewing")

                if text_find:
                    text_link_re = re.compile('.*\.htm')
                    for text_link in text_find.parent.parent.findAll(
                        'a', href=text_link_re)[1:]:
                        version_name = text_link.previous.strip()
                        bill.add_version(version_name, text_link['href'])

                self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:53,代码来源:get_legislation.py


示例15: parse_bill

    def parse_bill(self, chamber, session, special, link):
        bill_number = link.contents[0]
        type = re.search('type=(B|R|)', link['href']).group(1)
        bill_id = "%s%s %s" % (bill_abbr(chamber), type, bill_number)

        bill_info_url = info_url(chamber, session, special, type, bill_number)

        with self.soup_context(bill_info_url) as info_page:
            title_label = info_page.find(text='Short Title:')
            title = title_label.findNext().contents[0]

            bill = Bill(session, chamber, bill_id, title)
            bill.add_source(bill_info_url)

            self.parse_bill_versions(bill, info_page)

            self.parse_history(bill, history_url(chamber, session, special,
                                                 type, bill_number))

            self.parse_votes(bill, vote_url(chamber, session, special,
                                            type, bill_number))

            self.add_bill(bill)
开发者ID:HughP,项目名称:fiftystates,代码行数:23,代码来源:get_legislation.py


示例16: scrape1997

    def scrape1997(self, url, year, chamberName, session, number):
        "e.g. http://www.legis.ga.gov/legis/1997_98/leg/sum/sb1.htm"
        with self.lxml_context(url) as page:
            # Grab the interesting tables on the page.
            tables = []
            for table in page.cssselect('center table'):
                if table.get('border') == '5':
                    tables.append(table)

            # Bill
            name = page.cssselect('tr > td > font > b')[0].text_content().split(
                '-', 1)[1]
            bill = Bill(session, chamberName, number, name)

            # Versions
            bill.add_version('Current', url.replace('/sum/', '/fulltext/'))

            # Sponsorships
            for a in tables[0].cssselect('a'):
                if a.text_content().strip() == 'Current':
                    break
                bill.add_sponsor('', a.text_content().strip())

            # Actions
            for row in tables[1].cssselect('tr'):
                senate_date = row[0].text_content().strip()
                action_text = row[1].text_content().strip()
                house_date = row[2].text_content().strip()
                if '/' not in senate_date and '/' not in house_date:
                    continue
                if senate_date:
                    bill.add_action('upper', action_text, senate_date)
                if house_date:
                    bill.add_action('lower', action_text, house_date)

            self.save_bill(bill)
开发者ID:rcadby,项目名称:fiftystates,代码行数:36,代码来源:get_legislation.py


示例17: parse_bill

def parse_bill(scraper, url):
    """Given a bill status URL, return a fully loaded Bill object, except for votes, which
       are expected to be handled externally.
    """
    session = extract_session(url)
    chamber = chamber_for_doctype(extract_doctype(url))
    s = get_soup(scraper, url)
    bill_id = extract_bill_id(s)
    landmark = s(text=re.compile(".*Short Description.*"))
    name_span = landmark[0].findParent().findNextSibling()
    bill_name = get_text(name_span)
    bill = Bill(session, chamber, bill_id, bill_name.strip(),status_url=url)
    actions = extract_actions(s)
    for chamber,action,date in actions:
        bill.add_action(chamber,action,date) #kwargs are permitted if we have 'em.  
    sponsor_dict = extract_sponsors_from_actions([action[1] for action in actions])
    for type,namelist in sponsor_dict.iteritems():
        for name in namelist:
            bill.add_sponsor(type,name)
    for name,link in extract_versions(scraper, s):
        bill.add_version(name,link)
    return bill
开发者ID:HughP,项目名称:fiftystates,代码行数:22,代码来源:bills.py


示例18: scrape_bills

    def scrape_bills(self, chamber, year):
        if year != "2009":
            raise NoDataForYear

        if chamber == "upper":
            other_chamber = "lower"
            bill_id = "SB 1"
        else:
            other_chamber = "upper"
            bill_id = "HB 1"

        b1 = Bill("2009-2010", chamber, bill_id, "A super bill")
        b1.add_source("http://example.com")
        b1.add_version("As Introduced", "http://example.com/SB1.html")
        b1.add_document("Google", "http://google.com")
        b1.add_sponsor("primary", "Bob Smith")
        b1.add_sponsor("secondary", "Johnson, Sally")

        d1 = datetime.datetime.strptime("1/29/2010", "%m/%d/%Y")
        v1 = Vote("upper", d1, "Final passage", True, 2, 0, 0)
        v1.yes("Bob Smith")
        v1.yes("Sally Johnson")

        d2 = datetime.datetime.strptime("1/30/2010", "%m/%d/%Y")
        v2 = Vote("lower", d2, "Final passage", False, 0, 1, 1)
        v2.no("B. Smith")
        v2.other("Sally Johnson")

        b1.add_vote(v1)
        b1.add_vote(v2)

        b1.add_action(chamber, "introduced", d1)
        b1.add_action(chamber, "read first time", d1)
        b1.add_action(other_chamber, "introduced", d2)

        self.save_bill(b1)
开发者ID:rcadby,项目名称:fiftystates,代码行数:36,代码来源:get_legislation.py


示例19: scrape_session

    def scrape_session(self, chamber, year):
        if chamber == "upper":
            bill_abbr = "SB|SCR|SJR"
        elif chamber == "lower":
            bill_abbr = "HB|HCR|HJR"

        # Sessions last 2 years, 1993-1994 was the 18th
        session = str(18 + ((int(year) - 1993) / 2))
        year2 = str(int(year) + 1)

        # Full calendar year
        date1 = "0101" + year[2:]
        date2 = "1231" + year2[2:]

        # Get bill list
        bill_list_url = "http://www.legis.state.ak.us/" "basis/range_multi.asp?session=%s&date1=%s&date2=%s" % (
            session,
            date1,
            date2,
        )
        self.log("Getting bill list for %s %s (this may take a long time)." % (chamber, session))
        bill_list = self.soup_parser(self.urlopen(bill_list_url))

        # Find bill links
        re_str = "bill=%s\d+" % bill_abbr
        links = bill_list.findAll(href=re.compile(re_str))

        for link in links:
            bill_id = link.contents[0].replace(" ", "")
            bill_name = link.parent.parent.findNext("td").find("font").contents[0].strip()
            bill = Bill(session, chamber, bill_id, bill_name.strip())

            # Get the bill info page and strip malformed t
            info_url = "http://www.legis.state.ak.us/basis/%s" % link["href"]
            info_page = self.soup_parser(self.urlopen(info_url))
            bill.add_source(info_url)

            # Get sponsors
            spons_str = info_page.find(text="SPONSOR(s):").parent.parent.contents[1]
            sponsors_match = re.match(" (SENATOR|REPRESENTATIVE)\([Ss]\) ([^,]+(,[^,]+){0,})", spons_str)
            if sponsors_match:
                sponsors = sponsors_match.group(2).split(",")
                bill.add_sponsor("primary", sponsors[0].strip())

                for sponsor in sponsors[1:]:
                    bill.add_sponsor("cosponsor", sponsor.strip())
            else:
                # Committee sponsorship
                bill.add_sponsor("committee", spons_str.strip())

            # Get actions
            act_rows = info_page.findAll("table", "myth")[1].findAll("tr")[1:]
            for row in act_rows:
                cols = row.findAll("td")
                act_date = cols[0].font.contents[0]
                act_date = dt.datetime.strptime(act_date, "%m/%d/%y")

                if cols[2].font.string == "(H)":
                    act_chamber = "lower"
                elif cols[2].font.string == "(S)":
                    act_chamber = "upper"
                else:
                    act_chamber = chamber

                action = cols[3].font.contents[0].strip()
                if re.match("\w+ Y(\d+) N(\d+)", action):
                    vote = self.parse_vote(bill, action, act_chamber, act_date, cols[1].a["href"])
                    bill.add_vote(vote)

                bill.add_action(act_chamber, action, act_date)

            # Get subjects
            bill["subjects"] = []
            subject_link_re = re.compile(".*subject=\w+$")
            for subject_link in info_page.findAll("a", href=subject_link_re):
                subject = subject_link.contents[0].strip()
                bill["subjects"].append(subject)

            # Get versions
            text_list_url = "http://www.legis.state.ak.us/" "basis/get_fulltext.asp?session=%s&bill=%s" % (
                session,
                bill_id,
            )
            text_list = self.soup_parser(self.urlopen(text_list_url))
            bill.add_source(text_list_url)

            text_link_re = re.compile("^get_bill_text?")
            for text_link in text_list.findAll("a", href=text_link_re):
                text_name = text_link.parent.previousSibling.contents[0]
                text_name = text_name.strip()

                text_url = "http://www.legis.state.ak.us/basis/%s" % (text_link["href"])

                bill.add_version(text_name, text_url)

            self.add_bill(bill)
开发者ID:katpet,项目名称:fiftystates,代码行数:96,代码来源:get_legislation.py


示例20: scrape_session

    def scrape_session(self, chamber, year):
        if chamber == 'upper':
            bill_abbr = 'SB|SCR|SJR'
        elif chamber == 'lower':
            bill_abbr = 'HB|HCR|HJR'

        # Sessions last 2 years, 1993-1994 was the 18th
        session = str(18 + ((int(year) - 1993) / 2))
        year2 = str(int(year) + 1)

        # Full calendar year
        date1 = '0101' + year[2:]
        date2 = '1231' + year2[2:]

        # Get bill list
        bill_list_url = 'http://www.legis.state.ak.us/'\
            'basis/range_multi.asp?session=%s&date1=%s&date2=%s' % (
            session, date1, date2)
        self.log("Getting bill list for %s %s (this may take a long time)." %
                 (chamber, session))
        bill_list = self.soup_parser(self.urlopen(bill_list_url))

        # Find bill links
        re_str = "bill=%s\d+" % bill_abbr
        links = bill_list.findAll(href=re.compile(re_str))

        for link in links:
            bill_id = link.contents[0].replace(' ', '')
            bill_name = link.parent.parent.findNext('td').find(
                'font').contents[0].strip()
            bill = Bill(session, chamber, bill_id, bill_name.strip())

            # Get the bill info page and strip malformed t
            info_url = "http://www.legis.state.ak.us/basis/%s" % link['href']
            info_page = self.soup_parser(self.urlopen(info_url))
            bill.add_source(info_url)

            # Get sponsors
            spons_str = info_page.find(
                text="SPONSOR(s):").parent.parent.contents[1]
            sponsors_match = re.match(
                ' (SENATOR|REPRESENTATIVE)\([Ss]\) ([^,]+(,[^,]+){0,})',
                spons_str)
            if sponsors_match:
                sponsors = sponsors_match.group(2).split(',')
                bill.add_sponsor('primary', sponsors[0].strip())

                for sponsor in sponsors[1:]:
                    bill.add_sponsor('cosponsor', sponsor.strip())
            else:
                # Committee sponsorship
                bill.add_sponsor('committee', spons_str.strip())

            # Get actions
            act_rows = info_page.findAll('table', 'myth')[1].findAll('tr')[1:]
            for row in act_rows:
                cols = row.findAll('td')
                act_date = cols[0].font.contents[0]
                act_date = dt.datetime.strptime(act_date, '%m/%d/%y')

                if cols[2].font.string == "(H)":
                    act_chamber = "lower"
                elif cols[2].font.string == "(S)":
                    act_chamber = "upper"
                else:
                    act_chamber = chamber

                action = cols[3].font.contents[0].strip()
                if re.match("\w+ Y(\d+) N(\d+)", action):
                    try:
                        vote = self.parse_vote(bill, action,
                                               act_chamber, act_date,
                                               cols[1].a['href'])
                        bill.add_vote(vote)
                    except:
                        self.log("Failed parsing vote at %s" %
                                 cols[1].a['href'])

                bill.add_action(act_chamber, action, act_date)

            # Get subjects
            bill['subjects'] = []
            subject_link_re = re.compile('.*subject=\w+$')
            for subject_link in info_page.findAll('a', href=subject_link_re):
                subject = subject_link.contents[0].strip 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python models.Request类代码示例发布时间:2022-05-27
下一篇:
Python th.main函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap