本文整理汇总了PHP中scraperWiki类的典型用法代码示例。如果您正苦于以下问题:PHP scraperWiki类的具体用法?PHP scraperWiki怎么用?PHP scraperWiki使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了scraperWiki类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的PHP代码示例。
示例1: scrape_page
function scrape_page()
{
$row = 0;
$html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
if (count($tds) > 8) {
$row++;
$GLOBALS['rowTotal']++;
$apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext);
scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']);
print $GLOBALS['rowTotal'] . "\n";
print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n";
}
}
if ($row == 50) {
print "Vielä jatkuu, haetaan seuraava sivu..." . "\n";
$GLOBALS['z']++;
scrape_page();
} else {
print "Skrääpiminen suoritettu." . "\n";
print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n";
print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n";
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:27,代码来源:asuntojen_hintatiedot_1.php
示例2: scrap_yp
function scrap_yp($last_alphabet = '', $last_page = '')
{
$alphabet = range('a', 'z');
if (is_null($last_alphabet) || $last_alphabet == '') {
$temp_alphabet = scraperwiki::get_var('last_alphabet_loaded');
if (!is_null($temp_alphabet)) {
$last_alphabet = $temp_alphabet;
} else {
$last_alphabet = 'a';
}
}
if (is_null($last_page) || $last_page == '') {
$temp_page = scraperwiki::get_var('last_page_loaded');
if (!is_null($temp_page)) {
$last_page = $temp_page;
} else {
$last_page = 1;
}
}
$yp_base_url = 'http://www.yellowpages.co.id/browse/letter/' . $last_alphabet . '?page=' . $last_page;
$html = scraperWiki::scrape($yp_base_url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("ul.directory-list") as $data) {
echo $data;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:27,代码来源:yellowpage_id.php
示例3: run_ml
function run_ml($q_num = 0)
{
$html = scraperWiki::scrape("http://musiklegal.com/search/result/a/" . $q_num);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr") as $data) {
$tds = $data->find("td");
$temp_data = explode('">', str_replace('</<strong>a</strong>>', '', str_replace('<<strong>a</strong> href="http://musiklegal.com/song/detail/', '', $tds[1]->plaintext)));
$record = array('No' => str_replace('.', '', $tds[0]->plaintext), 'Code' => $temp_data[0], 'Song Title' => $temp_data[1], 'Artist' => $tds[2]->plaintext, 'Album' => $tds[3]->plaintext);
/*
* Stores results
*/
scraperwiki::save_sqlite(array("No"), $record);
unset($temp_data);
}
foreach ($dom->find("a") as $a) {
if ($a->plaintext == 'Next') {
$tmp_a = $a->href;
$tmp_a = str_replace('http://musiklegal.com/search/result/a/', '', $tmp_a);
if ($tmp_a > 0) {
continue;
}
}
}
if ((int) $tmp_a != 0) {
run_ml($tmp_a);
} else {
exit;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:30,代码来源:other_scraper_ml.php
示例4: get_dom
function get_dom($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
return $dom;
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:7,代码来源:testing_scraper.php
示例5: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape("http://www.norwegian.no/fly/lavpris/?D_City=CPH&A_City=DUB&TripType=2&D_Day=1&D_Month=201104&R_Day=1&R_Month=201104&AdultCount=1&ChildCount=0&InfantCount=0");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("div");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:29,代码来源:phl-flight-scraperphp.php
示例6: scrapeMarketGroup
function scrapeMarketGroup($url)
{
global $visitedIds;
$html = scraperWiki::scrape($url);
$html = str_replace("\n", "", $html);
preg_match_all("|<a href=\"/importing/61000746/marketgroup/(\\d+?)/\">(.+?)</a>|s", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$groupId = $match[1];
$groupName = html_entity_decode($match[2]);
//echo $groupName."\n";
if (!in_array($groupId, $visitedIds)) {
$visitedIds[] = $groupId;
scrapeMarketGroup("http://goonmetrics.com/importing/61000746/marketgroup/" . $groupId . "/");
}
}
preg_match_all("|<tr(.*?)>(.*?)<td(.*?)><a href=\"http://games.chruker.dk/eve_online/item.php\\?type_id=(.+?)\" target=\"_blank\">(.*?)<span class=\"dot\" onclick=\"CCPEVE.showMarketDetails\\((.*?)\\)\">(.+?)</span>(.*?)</td>(.*?)<td(.*?)>(.+?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.+?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)<td(.*?)>(.*?)</td>(.*?)</tr>|s", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$item = array("itemId" => trim($match[4]), "name" => trim(mb_check_encoding($match[7], 'UTF-8') ? $match[7] : utf8_encode($match[7])), "weekVol" => trim(mb_check_encoding($match[11], 'UTF-8') ? $match[11] : utf8_encode($match[11])), "k6Stock" => trim(mb_check_encoding($match[17], 'UTF-8') ? $match[17] : utf8_encode($match[17])));
$item['weekVol'] = str_replace(",", "", $item['weekVol']);
$item['k6Stock'] = str_replace(",", "", $item['k6Stock']);
$saved = false;
$delay = 0;
while (!$saved && $delay < 600) {
try {
@scraperwiki::save_sqlite(array('itemId'), $item, 'eve_goonmetrics');
$saved = true;
} catch (Exception $e) {
sleep(10);
$delay++;
}
}
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:33,代码来源:goonmetrics.php
示例7: grep_munich
function grep_munich($url, $table_name)
{
$html = scraperWiki::scrape($url);
$count = 0;
# Use the PHP Simple HTML DOM Parser to extract <td> tags
$dom = new simple_html_dom();
$dom->load($html);
//Drop all old informations by dropping the table
scraperwiki::sqliteexecute("drop table if exists " . $table_name);
scraperwiki::sqlitecommit();
$table = $dom->getElementById('flight_info_area');
foreach ($table->find('tr') as $data) {
// Flight details. Read tds or ths
$tds = $data->find("td");
//if there are less then 7 columns continue to next loop
if (sizeof($tds) < 7) {
continue;
}
//print $data->plaintext . "\n";
$flightnr = $tds[1]->plaintext;
$from = $tds[2]->plaintext;
$time = $tds[3]->plaintext;
$expected_time = $tds[4]->plaintext;
//Create date
$date = date("Y-m-d");
//Build array of flight informations
$flight_data = array("date" => $date, "count" => $count, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time);
//Save the informations of one flight
scraperwiki::save_sqlite(array("date", "count"), $flight_data, $table_name);
$count = $count + 1;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:32,代码来源:munich_airport.php
示例8: getCardInfo
function getCardInfo($url)
{
$baseURL = 'http://gatherer.wizards.com/Pages/Card/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cardImage = $dom->find('img[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cardImage]', 0)->src;
$cardImage = str_replace("amp;", "", $cardImage);
$imgURL = $baseURL . $cardImage;
$name = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow] div[class=value]', 0)->plaintext;
$name = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $name);
$mana = "";
$manaImages = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow] div[class=value] img');
foreach ($manaImages as $manaItem) {
$mana .= substr($manaItem->alt, 0, 1);
}
$mana = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $mana);
$cmc = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cmcRow] div[class=value]', 0);
$cmc = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cmc);
$type = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow] div[class=value]', 0);
$type = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $type);
$text = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow] div[class=value]', 0);
$text = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $text);
$flavor = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_flavorRow] div[class=value]', 0);
$flavor = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $flavor);
$cardNumber = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow] div[class=value]', 0);
$cardNumber = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $cardNumber);
$artist = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_artistRow] div[class=value]', 0);
$artist = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $artist);
$rarity = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_rarityRow] div[class=value]', 0);
$rarity = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $rarity);
$set = $dom->find('div[id=ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_setRow] div[class=value]', 0);
$set = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $set);
scraperwiki::save_sqlite(array("card"), array("Card" => trim($name), "Image" => $imgURL, "Mana" => trim($mana), "CMC" => trim($cmc), "Type" => trim($type), "Card Text" => trim($text), "Flavor Text" => trim($flavor), "Artist" => trim($artist), "Card Number" => trim($cardNumber), "Rarity" => trim($rarity), "Expansion" => trim($set)));
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:35,代码来源:test_93.php
示例9: read_listing
function read_listing($params, $url = 'http://www.auto24.ee/kasutatud/nimekiri.php')
{
$endpoint = build_query($url, $params);
$html = scraperWiki::scrape($endpoint);
$dom = new simple_html_dom();
$dom->load($html);
$totalResultsEl = $dom->find('.paginator .current-range strong');
$totalResults = $totalResultsEl[0]->plaintext;
$medianItem = ($totalResults + 1) / 2;
if ($medianItem > RESULTS_PER_PAGE) {
$listingOffset = floor($medianItem / RESULTS_PER_PAGE) * RESULTS_PER_PAGE;
$params['ak'] = $listingOffset;
$medianItem -= $listingOffset;
$endpoint = build_query($url, $params);
$html = scraperWiki::scrape($endpoint);
$dom = new simple_html_dom();
$dom->load($html);
}
$rows = $dom->find("[@id=usedVehiclesSearchResult] .result-row");
$lPoint = floor($medianItem) - 1;
$hPoint = ceil($medianItem) - 1;
$a24ksi = 0;
if ($lPoint == $hPoint) {
$rowData = get_row_data($rows[$lPoint]);
$a24ksi = $rowData['price'];
} else {
$lRowData = get_row_data($rows[$lPoint]);
$hRowData = get_row_data($rows[$hPoint]);
$a24ksi = round(($lRowData['price'] + $hRowData['price']) / 2);
}
return array('n' => $totalResults, 'val' => $a24ksi);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:32,代码来源:test_403.php
示例10: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape(BASE_URL . "?type={$param}");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("td");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:29,代码来源:php.php
示例11: scrape_NG_news_article
function scrape_NG_news_article($art_url)
{
$html = scraperWiki::scrape($art_url);
require_once 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("div#page_head h1") as $data) {
$art_title = $data->innertext;
}
foreach ($dom->find("div#page_head h2") as $data) {
$art_subtitle = $data->innertext;
}
$art_text_array = array();
$art_paragraph_count = 0;
$art_text_full = "";
$art_teaser50 = "";
$art_teaser100 = "";
foreach ($dom->find("div#content div.article_text p") as $data) {
$art_paragraph_count++;
$tmp = str_get_html($data)->plaintext;
// $art_text_array[$art_paragraph_count] = $tmp;
$art_text_full .= $tmp . " #" . $art_paragraph_count . "# ";
//if ($art_paragraph_count == 1) $art_teaser = $tmp;
}
$art_teaserS = word_teaser($art_text_full, 60);
$art_teaserM = word_teaser($art_text_full, 120);
/* print $art_text_full; show_article($art_title,$art_subtitle,$art_text_array);
for($i=0;$i<count($art_text_array);$i++) { $art_text_full .= $art_text_array[$i]." #".$i."# "; }
$art_text_full = $art_text_full->plaintext; $art_teaser = $art_text_array[0]->plaintext; */
// $record = array("Title" => $art_title, "Subtitle" => $art_subtitle, "TeaserS" => $art_teaserS, "TeaserM" => $art_teaserM, "Text" => $art_text_full, "URL" => $art_url);
$record = array("TeaserM" => $art_teaserM, "URL" => $art_url);
scraperwiki::save(array('URL'), $record);
return $record;
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:34,代码来源:test_scraper_35.php
示例12: populateDOM
function populateDOM($htmlDOM, $src_link, $upd_flag = false)
{
scraperwiki::sqliteexecute("CREATE TABLE IF NOT EXISTS sources (src_link TEXT PRIMARY KEY, timestamp DATETIME, src_dump TEXT)");
echo "Checking local cache...<br>\n";
$result = scraperwiki::sqliteexecute("SELECT src_link, timestamp, src_dump FROM sources WHERE src_link = :slnk", array("slnk" => $src_link));
if (empty($result->data[0][2]) || $upd_flag == true) {
echo "No Cache for this site (or force-update flag given), scraping live site for local cache...<br>\n";
// Load the site and save it locally so that we dont end up crawling their site a million times during development
$source = scraperWiki::scrape($src_link);
$htmlDOM->load($source);
$save_source = $htmlDOM->save();
echo "Scrape complete, storing into cache...<br>\n";
scraperwiki::sqliteexecute("INSERT OR REPLACE INTO sources VALUES (:slnk, :stime, :sdmp)", array("slnk" => $src_link, "stime" => time(), "sdmp" => $save_source));
scraperwiki::sqlitecommit();
echo "Cache saved.<br>\n";
echo "Populate DOM Complete.";
return $htmlDOM;
} else {
echo "Using local cache, as cached data exists from '" . date(DATE_RFC822, $result->data[0][1]) . ".'<br>\n";
echo "Loading...<br>\n";
$htmlDOM->load($result->data[0][2]);
echo "Populate DOM Complete.";
return $htmlDOM;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:25,代码来源:local_cache_scraper.php
示例13: scrapepage
function scrapepage($url)
{
$html = scraperWiki::scrape($url);
$html = new simple_html_dom();
$html->load($url);
foreach ($html->find("table[@class='products-list'] tr td h2 a") as $menu_link) {
$menu_link = $product_link->href;
echo "Link to Details: " . $product_link . "<br>";
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:10,代码来源:ebi2_1.php
示例14: getDetails
function getDetails($url, $team)
{
global $teams;
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("tr.active") as $data) {
$tds = $data->find("td.title a");
$teams[$team]['nextOpponent'] = $tds[0]->plaintext;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:11,代码来源:wdei_scraper_nfl.php
示例15: scrapeTeams
function scrapeTeams($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cells = $dom->find('td.cw a');
foreach ($cells as $cell) {
$name = $cell->plaintext;
$team = array('club' => $name);
scraperWiki::save_sqlite(array('club'), $team);
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:12,代码来源:english_football_teams_1.php
示例16: grab
function grab($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
foreach ($dom->find("#tbl_proxy_list tr") as $data) {
$tds = $data->find("td");
if (count($tds) == 7) {
$input = decode_ip((string) $tds[0]);
$record = array('ip' => $input);
scraperwiki::save(array('ip'), $record);
}
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:14,代码来源:proxyparser2.php
示例17: gazelangs
function gazelangs($url, $lang)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$michi = "strong";
$michi = $michi . " hope";
foreach ($dom->find("ul[@class='trans_sent']") as $data) {
$tds = $data->find("li");
$record = array('user_input' => $tds[0]->plaintext, 'babelfish_output' => $tds[1]->plaintext, 'timestamp_scrape' => date("Y-m-d H:i:s"), 'page' => $url, 'language' => $lang);
// print json_encode($record) . "\n";
scraperwiki::save(array('user_input', 'babelfish_output', 'timestamp_scrape', 'page', 'language'), $record);
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:14,代码来源:test_77.php
示例18: scrapePage
function scrapePage($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$cells = $dom->find('td.nom');
foreach ($cells as $cell) {
$name = $cell->find('a', 0)->plaintext;
$parent = $cell->parent();
$count = $parent->find('td.compte', 0)->plaintext;
if ($count) {
$payload = array('name' => $name, 'count' => $count);
scraperWiki::save_sqlite(array('name'), $payload);
}
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:16,代码来源:common_surnames.php
示例19: getLinks
function getLinks($page)
{
global $destination, $id, $from_city, $pisah;
$id = 0;
$source = scraperWiki::scrape($page);
$html = new simple_html_dom();
$html->load($source);
$id = $id + 1;
$ticketvalues = $html->find("td[@class='ticketvalue']");
$from_city = $ticketvalues[0]->plaintext;
$destination = $ticketvalues[5]->plaintext;
$pisah = $ticketvalues[2]->plaintext;
$railway = array("id" => $id, "from_city" => $from_city, "destination" => $destination, "pisah" => $pisah);
// Save the record.
saveData(array("from_city", "destination", "pisah"), $railway);
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:16,代码来源:testing_31.php
示例20: scrapepage
function scrapepage($url)
{
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$table = $dom->find("table");
$rows = $table[15]->find("tr");
foreach ($rows as $row) {
$tds = $row->find('td');
//print "\nnewrow\n";
//print $row->plaintext;
//print "\n";
//print $tds[0]->height;
//print "\nendrow\n";
if (isset($tds[1])) {
if ($tds[1]->height == 30) {
//print $row->plaintext;
$document = array();
$document['date'] = get_date(date_create($tds[1]->plaintext));
//if ($tds[1]->plaintext=='-') $document['date']='';
//else $document['date']=get_date(date_create($tds[2]->plaintext));
$document['house'] = $tds[3]->plaintext;
//$document['language']=$tds[6]->plaintext;
$link = $tds[5]->find('a');
$img = $tds[5]->find('img');
$document['url'] = 'http://www.parliament.gov.za/live/' . $link[0]->href;
if ($img[0]->src == 'images/icon_word.gif') {
$type = '.doc';
}
if ($img[0]->src == 'images/icon_pdf.gif') {
$type = '.pdf';
}
$document['type'] = $type;
scraperwiki::save(array('url'), $document);
//print_r($document);
//print $row->plaintext;
}
}
}
//find next page to scrape
$links = $dom->find("table[style=height:26px] a");
foreach ($links as $link) {
if ($link->plaintext == 'Next') {
scrapepage('http://www.parliament.gov.za/live/' . $link->href);
}
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:47,代码来源:south_africa_parliament_hansards.php
注:本文中的scraperWiki类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论