本文整理汇总了PHP中Crawler类的典型用法代码示例。如果您正苦于以下问题:PHP Crawler类的具体用法?PHP Crawler怎么用?PHP Crawler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Crawler类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的PHP代码示例。
示例1: go
public function go()
{
$start_url = $this->url;
$c = new Crawler($start_url);
$c->go2linewhere('<p><a href="');
$c->close();
$ledak = explode('<a href="', $c->curline);
for ($i = 1; $i < count($ledak); ++$i) {
$aurl = Crawler::cutuntil($ledak[$i], '"');
$aurl = str_replace('http://hentaifromhell.net/redirect.html?', '', $aurl);
echo "<a href='{$aurl}'>{$aurl}</a><br />\n";
flush();
/*
$basename = Crawler::cutuntillast($aurl, '/');
if (!in_array($basename, $this->blacklist)) {
$c = new Crawler($aurl);
$c->go2linewhere('id="thepic"');
$imgurl = $c->getbetween('SRC="', '"');
$c->close();
echo "<a href='$basename/$imgurl'>".Crawler::n($i,3).".jpg</a><br />\n";
flush();
} else {
echo "$i blacklisted server<br/>";flush();
}
*/
}
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:27,代码来源:spider_hfhgallery3.php
示例2: getHotSpots
public function getHotSpots()
{
$crawler = new Crawler($this);
$outlines = new CrawlerOutlineCollection();
$size = $this->image->size();
for ($x = 0; $x < $size[0]; $x++) {
for ($y = 0; $y < $size[1]; $y++) {
$pixel = $this->pixel($x, $y);
// Skip white pixels
if ($pixel->color()->compare(ImageColor::white(), 5)) {
continue;
}
// Skip crawled areas
if ($outlines->contains($pixel)) {
continue;
}
// Start crawling
$outline = $crawler->crawl($x, $y);
$outlines->push($outline);
}
}
$hotspots = new ImageCollection();
foreach ($outlines as $outline) {
$hotspots->push($this->image->sliceByOutline($outline));
}
return array($hotspots, $outlines);
}
开发者ID:passbolt,项目名称:passbolt_selenium,代码行数:27,代码来源:imagepixelmatrix.php
示例3: crawl_1_page
function crawl_1_page($url)
{
echo "URL2 {$url} <br/>\n";
flush();
$dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/'));
$hasil = array();
$c = new Crawler($url);
$c->go_to('<div class="entry">');
while ($line = $c->readline()) {
if (Crawler::is_there($line, "href='")) {
$img = Crawler::extract($line, "href='", "'");
echo "<a href='{$img}'>{$dirname}</a><br/>\n";
flush();
} else {
if (Crawler::is_there($line, 'href="')) {
$img = Crawler::extract($line, 'href="', '"');
echo "<a href='{$img}'>{$dirname}</a><br/>\n";
flush();
} else {
if (Crawler::is_there($line, '</div>')) {
break;
}
}
}
}
$c->close();
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:27,代码来源:reallycuteasians.php
示例4: login
/**
* Log the user
* @param Crawler $crawler
*/
private function login($crawler)
{
$form = $crawler->selectButton('_submit')->form();
// définit certaines valeurs
$form['_username'] = 'kwizer';
$form['_password'] = 'sslover';
return $this->client->submit($form);
}
开发者ID:jlm-entreprise,项目名称:fee-bundle,代码行数:12,代码来源:FeeControllerTest.php
示例5: testCrawl
public function testCrawl()
{
$account = Account::login("searchzen.org", "test");
$c = $account->collections[0];
$crawler = new Crawler($c);
$crawler->pageLimit = 10;
$crawler->start();
}
开发者ID:jacobandresen,项目名称:now,代码行数:8,代码来源:CrawlerTest.php
示例6: crawlForNews
/**
* Start the crawler to retrieve pages from a given news website
* @param type $nrOfDaysBack The nr of days the crawler should go back (counting from today)
* @param type $newsSiteUrl The root URL of the news site (the seed of the crawler)
* @return type
*/
public function crawlForNews($nrOfDaysBack, $newsSiteUrl, $timeToLive, $startDate = null)
{
$crawler = new Crawler($newsSiteUrl, $timeToLive);
if ($startDate) {
$crawler->crawl($nrOfDaysBack, $startDate);
} else {
$crawler->crawl($nrOfDaysBack);
}
return count($crawler->getCrawled());
}
开发者ID:Bram9205,项目名称:WebInfo,代码行数:16,代码来源:Main.php
示例7: run
static function run()
{
if (isset($_GET['site_url']) && isset($_GET['sitemap_url']) && CODOF\Access\CSRF::valid($_GET['CSRF_token'])) {
$sitemapObject = new Crawler($_GET['site_url']);
$sitemapPath = ABSPATH . 'sitemap.xml';
$sitemapFile = $sitemapObject->createSitemap($sitemapPath);
// session_write_close();
// ob_end_flush();
exit;
}
}
开发者ID:KopjeKoffie,项目名称:codoforum-sitemap-generator,代码行数:11,代码来源:sitemap.php
示例8: crawl_indowebster
function crawl_indowebster($url)
{
//echo "'$url'";
$craw = new Crawler($url);
$craw->go2lineregexor('/(<\\/div><\\/a><\\/div><\\/div>)/', 1, 'href="#idws7"');
$setring = $craw->getbetween('location.href=\'', '\'');
$path = Crawler::extract($setring, 'path=', '&');
$file_orig = Crawler::cutafter($setring, 'file_orig=');
$craw->close();
return '<a href="' . dirname($setring) . '/' . $path . '">' . rawurldecode($file_orig) . '</a>';
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:11,代码来源:indowebster.php
示例9: mangareader_1_page
public function mangareader_1_page($fil, $url, $prefix, $chapter)
{
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($fil);
$c->go_to('width="800"');
$img = $c->getbetween('src="', '"');
preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
$iname = $m[1];
$c->close();
$name = $prefix . '-' . $chapter . '-' . $iname;
return array($name => $img);
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:12,代码来源:Mangareader_Crawler.php
示例10: addCrawlMap
public function addCrawlMap($src, $patterns)
{
if (!empty($src)) {
$root = $this->_src_root . '/' . $src;
} else {
$root = $this->_src_root;
}
$crawler = new Crawler($root, $patterns);
$paths = $crawler->getPaths();
foreach ($paths as $path) {
$this->addMap($src . '/' . $path, str_replace('site/', '', $path));
}
}
开发者ID:walteraries,项目名称:anahita,代码行数:13,代码来源:class.php
示例11: mangareader_1_page
public function mangareader_1_page($fil, $url, $chapter)
{
$prefix = $this->prefix;
$chapter = Crawler::pad($chapter, 3);
$c = new Crawler($fil);
$c->go_to('width="800"');
$img = $c->getbetween('src="', '"');
// if (@$_GET['show_url']) echo "<a href='$url'>URL</a> ";
preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
$iname = $m[1];
echo '<li><a href="' . $img . '">' . $prefix . '-' . $chapter . '-' . $iname . '</a>' . "</li>\n";
$c->close();
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:13,代码来源:mangareader.php
示例12: handleCrawling
public function handleCrawling()
{
$view = new CrawlerResultViewSwe();
if (isset($_SESSION['url'])) {
$url = $_SESSION['url'];
$curl = new Curl();
$crawler = new Crawler($curl, $url);
}
//If user wants to book
if ($view->bookParamExists() && isset($crawler)) {
$group1 = $view->getBookInfo();
$reservationInfo = $crawler->getReservations($group1);
$view->outPutReservationResult($reservationInfo);
//destroy session
$_SESSION = array();
session_destroy();
session_unset();
} else {
if ($view->timeParamExists() && $view->dayParamExists() && $view->movieParamExists() && isset($crawler)) {
$day = $view->getDay();
$time = $view->getTime();
$movie = $view->getMovie();
$dinnerInfo = $crawler->getDinnerInfo($day, $time);
$view->outPutDinnerAlts($dinnerInfo, $time, $movie);
} else {
if ($view->userHasSubmittedURL()) {
//$url ="http://localhost:8080/";
$url = $view->getURL();
$curl = new Curl();
$crawler = new Crawler($curl, $url);
$crawler->getLinks();
$dates = $crawler->getCalendarInfo();
$comparer = new compareData();
$matchingDay = $comparer->compareCommonDates($dates);
if (!is_null($matchingDay)) {
$filmDates = $crawler->getFilmInfo($matchingDay);
$view->outPutMovieResult($filmDates, $matchingDay);
} else {
$view->noMatchingDays();
}
$_SESSION['url'] = $url;
} else {
$view = new formView();
}
}
}
return $view;
}
开发者ID:henceee,项目名称:hg222dv-1DV449-Webteknik-ii,代码行数:48,代码来源:webBookingController.php
示例13: testFlickrCrawl
public function testFlickrCrawl() {
$builders = $this->buildData();
$crawler = Crawler::getInstance();
$config = Config::getInstance();
//use fake Flickr API key
$plugin_builder = FixtureBuilder::build('plugins', array('id'=>'2', 'folder_name'=>'flickrthumbnails'));
$option_builder = FixtureBuilder::build('options', array(
'namespace' => OptionDAO::PLUGIN_OPTIONS . '-2',
'option_name' => 'flickr_api_key',
'option_value' => 'dummykey') );
//$config->setValue('flickr_api_key', 'dummykey');
$this->simulateLogin('[email protected]', true);
$crawler->crawl();
$ldao = DAOFactory::getDAO('LinkDAO');
$link = $ldao->getLinkById(43);
$this->assertEqual($link->expanded_url, 'http://farm3.static.flickr.com/2755/4488149974_04d9558212_m.jpg');
$this->assertEqual($link->error, '');
$link = $ldao->getLinkById(42);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'No response from Flickr API');
$link = $ldao->getLinkById(41);
$this->assertEqual($link->expanded_url, '');
$this->assertEqual($link->error, 'No response from Flickr API');
}
开发者ID:rkabir,项目名称:ThinkUp,代码行数:31,代码来源:TestOfFlickrThumbnailsPlugin.php
示例14: tearDown
/**
* Destroy Config, Webapp, $_SESSION, $_POST, $_GET, $_REQUEST
*/
public function tearDown()
{
Config::destroyInstance();
Webapp::destroyInstance();
Crawler::destroyInstance();
if (isset($_SESSION)) {
$this->unsetArray($_SESSION);
}
$this->unsetArray($_POST);
$this->unsetArray($_GET);
$this->unsetArray($_REQUEST);
$this->unsetArray($_SERVER);
$this->unsetArray($_FILES);
Loader::unregister();
$backup_dir = FileDataManager::getBackupPath();
if (file_exists($backup_dir)) {
try {
@exec('cd ' . $backup_dir . '; rm -rf *');
rmdir($backup_dir);
// won't delete if has files
} catch (Exception $e) {
}
}
$data_dir = FileDataManager::getDataPath();
if (file_exists($data_dir . 'compiled_view')) {
try {
@exec('cd ' . $data_dir . '; rm -rf compiled_view');
} catch (Exception $e) {
}
}
parent::tearDown();
}
开发者ID:ravi-modria,项目名称:ThinkUp,代码行数:35,代码来源:class.ThinkUpBasicUnitTestCase.php
示例15: control
public function control()
{
$output = "";
$authorized = false;
if (isset($this->argc) && $this->argc > 1) {
// check for CLI credentials
$session = new Session();
$username = $this->argv[1];
if ($this->argc > 2) {
$pw = $this->argv[2];
} else {
$pw = getenv('THINKUP_PASSWORD');
}
$owner_dao = DAOFactory::getDAO('OwnerDAO');
$owner = $owner_dao->getByEmail($username);
if ($owner_dao->isOwnerAuthorized($username, $pw)) {
$authorized = true;
Session::completeLogin($owner);
} else {
$output = "ERROR: Incorrect username and password.";
}
} else {
// check user is logged in on the web
if ($this->isLoggedIn()) {
$authorized = true;
} else {
$output = "ERROR: Invalid or missing username and password.";
}
}
if ($authorized) {
$crawler = Crawler::getInstance();
$crawler->crawl();
}
return $output;
}
开发者ID:rgroves,项目名称:ThinkUp,代码行数:35,代码来源:class.CrawlerAuthController.php
示例16: perform
function perform()
{
$ps = DB::prepare('INSERT INTO listings SET scraped=FALSE, code=:code, title=:title, link=:link, date=:date, price=:price, neighborhood=:neighborhood');
array_map(function ($url) use($ps) {
$code = substr($url, 30, 3);
// SUPER brittle obvs
$crawler = new Crawler(Guzzle::get($url)->getBody());
$crawler->filter('.row > .txt')->each(function ($node) use($ps, $code) {
try {
$a = $node->filter('.pl > a.hdrlnk');
$ps->execute([':code' => $code, ':title' => $a->text(), ':link' => $a->attr('href'), ':date' => strftime('%Y-%m-%d', strtotime($node->filter('.pl > .date')->text())), ':price' => ($n = $node->filter('.l2 > .price')) && $n->count() ? preg_replace('/\\D/', '', $n->text()) : null, ':neighborhood' => ($n = $node->filter('.l2 > .pnr > small')) && $n->count() ? $n->text() : null]);
} catch (Exception $e) {
Logger::error($e->getMessage(), $ps->errorinfo());
}
});
}, ['http://newyork.craigslist.org/nfa/', 'http://newyork.craigslist.org/roo/', 'http://newyork.craigslist.org/sub/']);
}
开发者ID:dthtvwls,项目名称:padscraper,代码行数:17,代码来源:Crawl.php
示例17: testDevices
public function testDevices()
{
$lines = file(__DIR__ . '/devices.txt');
foreach ($lines as $line) {
$test = Crawler::isCrawler($line);
$this->assertEquals($test, false, $line);
}
}
开发者ID:halenharper,项目名称:Laravel-Crawler-Detect,代码行数:8,代码来源:UATests.php
示例18: testDevices
public function testDevices()
{
$lines = file('https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/devices.txt');
foreach ($lines as $line) {
$test = Crawler::isCrawler($line);
$this->assertEquals($test, false, $line);
}
}
开发者ID:jaybizzle,项目名称:laravel-crawler-detect,代码行数:8,代码来源:UATests.php
示例19: crawl_1_chapter
function crawl_1_chapter($url, $chapter)
{
global $sitename;
global $prefix;
$c = new Crawler($url);
$c->go_to('name="pagejump"');
$pages = array();
while ($line = $c->readline()) {
if (Crawler::is_there($line, '<option')) {
$pages[] = Crawler::extract($line, 'value="', '"');
} else {
if (Crawler::is_there($line, '</select>')) {
break;
}
}
}
$c->go_to('id="nextpage"');
$c->readline();
$img = $c->getbetween('src="', '"');
$c->close();
$img_base = dirname($img);
$ext = '.jpg';
$chapter = Crawler::pad($chapter, 3);
foreach ($pages as $page) {
echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n";
flush();
}
//print_r($pages);flush();
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:29,代码来源:mangashare.php
示例20: crawl_page
public function crawl_page($url)
{
// crawl_page
$c = new Crawler($url);
// get title
$c->go_to('<title>');
$title = Crawler::extract($c->curline, 'PHD Comics: ', '</title>');
$title = preg_replace('/\\W/', '_', $title);
// get the date
$c->go_to('date_left.gif');
$c->readline(2);
$line = $c->curline;
preg_match('/([0-9]+)\\/([0-9]+)\\/([0-9]+)/mi', $line, $matches);
//print_r($matches);flush();
list($full, $month, $date, $year) = $matches;
if (strlen($date) < 2) {
$date = '0' . $date;
}
if (strlen($month) < 2) {
$month = '0' . $month;
}
$fileprefix = "{$year}_{$month}_{$date}_{$title}";
// get the img url
$c->go2linewhere('<td bgcolor=#FFFFFF');
$line = $c->curline;
preg_match('/<img src=["\']?([^ ]+)["\']?/i', $line, $matches);
$img = $matches[1];
$filename = basename($img);
$ext = substr($filename, strrpos($filename, '.'));
echo "<a href='{$img}'>" . $fileprefix . $ext . "</a><br/>";
flush();
$c->close();
unset($c);
}
开发者ID:JerryMaheswara,项目名称:crawler,代码行数:34,代码来源:phdcomics.php
注:本文中的Crawler类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论