本文整理汇总了PHP中str_get_html函数的典型用法代码示例。如果您正苦于以下问题:PHP str_get_html函数的具体用法?PHP str_get_html怎么用?PHP str_get_html使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了str_get_html函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的PHP代码示例。
示例1: parseModelsPage
function parseModelsPage($brandId, $brandName, $page)
{
$html_content = scraperwiki::scrape($page);
$this->html = str_get_html($html_content);
foreach ($this->html->find("div.makers a") as $el) {
$img = $el->find('img', 0);
$m['name'] = $brandName . ' ' . $el->find('strong', 0)->innertext;
$m['img'] = $img->src;
$m['link'] = 'http://www.gsmarena.com/' . $el->href;
$m['desc'] = $img->title;
$temp = explode('-', $el->href);
$m['id'] = (int) substr($temp[1], 0, -4);
$m['brand_id'] = $brandId;
scraperwiki::save_sqlite(array("id" => $m['id']), $m, "cell_model");
$this->models++;
}
$pagination = $this->html->find("div.nav-pages", 0);
if ($pagination) {
$nextPageLink = $pagination->lastChild();
if ($nextPageLink && $nextPageLink->title == "Next page") {
$this->parseModelsPage($brandId, $brandName, 'http://www.gsmarena.com/' . $nextPageLink->href);
}
}
$this->html->__destruct();
}
开发者ID:trngltrngl,项目名称:gsmarena,代码行数:25,代码来源:scraper.php
示例2: getHypem
function getHypem($item)
{
$ch = curl_init("http://hypem.com/item/{$item}?ax=1&ts=1295726809");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
curl_setopt($ch, CURLOPT_FORBID_REUSE, true);
curl_setopt($ch, CURLOPT_COOKIE, "AUTH=" . $_GET["hash"] . ";");
$output = curl_exec($ch);
$html = str_get_html($output);
$scripts = $html->find('script');
$script = $scripts[1];
$informatie = array();
$value = $script->innertext;
for ($i = 0; $i < 15; $i++) {
$pat = "/\\'([^\\']*?)\\'/";
$value = str_replace("\\'", ''', $value);
$value = str_replace("&", "", $value);
preg_match($pat, $value, $matches);
$value = str_replace(@$matches[0], '', $value);
$str = @$matches[1];
$str = str_replace("…", "...", $str);
$informatie[] = $str;
}
$result = array("id" => "http://hypem.com/item/" . $informatie[1], "blog" => $informatie[3], "secret" => $informatie[7], "artist" => $informatie[9], "song" => $informatie[10], "songurl" => 'http://hypem.com/serve/play/' . $informatie[1] . '/' . $informatie[7] . '.mp3', "duration" => $informatie[4]);
return $result;
}
开发者ID:happylinks,项目名称:Hypem-Boxee,代码行数:27,代码来源:hypem.class.php
示例3: get_product
function get_product($url, $page = 0)
{
global $save_folder;
$html = curl_get($url);
//Загружает страницу товара
$dom = str_get_html($html);
$article = $dom->find('article', 0);
//Берем артикул
$str = $article->attr['id'];
sscanf($str, 'post-%d', $art);
$scripts = $dom->find('script');
foreach ($scripts as $script) {
if (strpos($script->src, "script.js")) {
$str = "script[src='" . $script->src . "']";
}
}
$dom->find($str, 0)->outertext = '';
//Ajax запрос
$html = get_ajax($art);
//Получили данные из ajax
$dom2 = str_get_html($html);
//Ищем в 1-й странице div куда будем вставлять данные из ajax
$dom->find('div[id=order-variables]', 0)->innertext = $dom2;
//Сохраняем HTML
file_put_contents($save_folder . 'product--' . $page . '.html', $dom);
}
开发者ID:Lyoshka,项目名称:evseeva.loc,代码行数:26,代码来源:index.php
示例4: dispatchLoopShutdown
public function dispatchLoopShutdown()
{
if (!Tool::isHtmlResponse($this->getResponse())) {
return;
}
if (!Tool::useFrontendOutputFilters($this->getRequest()) && !$this->getRequest()->getParam("pimcore_preview")) {
return;
}
if (\Pimcore::inDebugMode()) {
return;
}
if ($this->enabled) {
include_once "simple_html_dom.php";
$body = $this->getResponse()->getBody();
$html = str_get_html($body);
if ($html) {
$html = $this->searchForScriptSrcAndReplace($html);
$html = $this->searchForInlineScriptAndReplace($html);
$body = $html->save();
$html->clear();
unset($html);
}
$this->getResponse()->setBody($body);
}
}
开发者ID:Cruiser13,项目名称:pimcore-minify,代码行数:25,代码来源:MinifyJs.php
示例5: getDom
public function getDom($url, $post = false)
{
$f = fopen(CURL_LOG_FILE, 'a+');
// curl session log file
if ($this->lastUrl) {
$header[] = "Referer: {$this->lastUrl}";
}
$curlOptions = array(CURLOPT_ENCODING => 'gzip,deflate', CURLOPT_AUTOREFERER => 1, CURLOPT_CONNECTTIMEOUT => 120, CURLOPT_TIMEOUT => 120, CURLOPT_URL => $url, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 9, CURLOPT_RETURNTRANSFER => 1, CURLOPT_HEADER => 0, CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", CURLOPT_COOKIEFILE => COOKIE_FILE, CURLOPT_COOKIEJAR => COOKIE_FILE, CURLOPT_STDERR => $f, CURLOPT_VERBOSE => true);
if ($post) {
// add post options
$curlOptions[CURLOPT_POSTFIELDS] = $post;
$curlOptions[CURLOPT_POST] = true;
}
$curl = curl_init();
curl_setopt_array($curl, $curlOptions);
$data = curl_exec($curl);
$this->lastUrl = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL);
// get url we've been redirected to
curl_close($curl);
if ($this->dom) {
$this->dom->clear();
$this->dom = false;
}
$dom = $this->dom = str_get_html($data);
fwrite($f, "{$post}\n\n");
fwrite($f, "-----------------------------------------------------------\n\n");
fclose($f);
return $dom;
}
开发者ID:vermaslal,项目名称:ASPBrowser,代码行数:29,代码来源:ASPBrowser.php
示例6: parsePageDynamicContent
protected function parsePageDynamicContent($content)
{
$html = str_get_html($content);
$count = 0;
// There are no dynamic widgets...
foreach ($widgets = $html->find('.dm_widget') as $widget) {
$widgetId = intval(str_replace('dm_widget_', '', $widget->id));
if ($widgetId) {
$cache = $widget->find('.dm_widget_cacheable', 0);
if ($cache) {
$count++;
// We have found a dynamic widget...
$cache->innertext = sprintf('{#page#%s#page#}{#widget#%s#widget#}', $this->getPage()->getId(), $widgetId);
}
}
}
if ($count == 0) {
// The page did not had any dynamic content...
return null;
}
$code = $html->innertext;
$code = str_replace('{#page#', '<?php echo $helper->renderWidgetInner(array(\'page_id\'=>', $code);
$code = str_replace('#page#}{#widget#', ', \'widget_id\'=>', $code);
$code = str_replace('#widget#}', ')); ?>', $code);
return $code;
}
开发者ID:runopencode,项目名称:diem-extended,代码行数:26,代码来源:dmPageCache.class.php
示例7: scrape
function scrape($url, $path, $parse)
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Core.Encoding', 'UTF-8');
//encoding of output
$config->set('HTML.Doctype', 'XHTML 1.1');
//doctype of output
$purifier = new HTMLPurifier($config);
$dirty_html = file_get_contents($url);
$clean_html = $purifier->purify($dirty_html);
$html = str_get_html($clean_html);
switch ($parse) {
case 'tag':
$ret = $html->find($path)->tag;
break;
case 'outertext':
$ret = $html->find($path)->outertext;
break;
case 'innertext':
$ret = $html->find($path)->innertext;
break;
case 'plaintext':
$ret = $html->find($path)->plaintext;
break;
default:
$ret = $html->find($path);
break;
}
// clean up memory
$html->clear();
unset($dirty_html);
unset($clean_html);
unset($html);
return $ret;
}
开发者ID:anubhaBhargava,项目名称:OpenRecommender,代码行数:35,代码来源:index.php
示例8: testRadioInput
public function testRadioInput()
{
// Create a Salutation
$new_field = magic_form_field_radio::factory($this->input_default_name, $this->input_default_label);
// Add some options to it.
$new_field->add_options(array(1 => 'Test Value 1', 2 => 'Test Value 2'));
// Add them to the form
$this->magic_form->add_fields($new_field);
$html = $this->magic_form->__toString();
//Get HTML Dom
$dom = str_get_html($html);
$form = $dom->find("//form")[0];
//Find Radio Inputs
$test_radio_field1 = $form->find("input[id=" . $this->input_default_name . "-test-value-1]")[0];
$test_radio_field2 = $form->find("input[id=" . $this->input_default_name . "-test-value-2]")[0];
//Check Radio Inputs
$this->assertEquals($this->input_default_name, $test_radio_field1->attr['name'], "Radio 1 Name Equals " . $this->input_default_name);
$this->assertEquals($this->input_default_name, $test_radio_field2->attr['name'], "Radio 2 Name Equals " . $this->input_default_name);
//Check Labels
$test_radio_1_field_label = $form->find("label[for=" . $this->input_default_name . "-test-value-1]")[0];
$this->assertEquals($this->input_default_name . "-test-value-1", $test_radio_1_field_label->attr['for'], "Check Label Radio 1");
$this->assertEquals("Test Value 1", $test_radio_1_field_label->innertext(), "Check Label Text Radio 1");
$test_radio_2_field_label = $form->find("label[for=" . $this->input_default_name . "-test-value-2]")[0];
$this->assertEquals($this->input_default_name . "-test-value-2", $test_radio_2_field_label->attr['for'], "Check Label Radio 2");
$this->assertEquals("Test Value 2", $test_radio_2_field_label->innertext(), "Check Label Text Radio 2");
}
开发者ID:matthewbaggett,项目名称:drupal_magic_forms,代码行数:26,代码来源:RadioInputTest.php
示例9: clean_discourse
private function clean_discourse($discourse)
{
App::import('Vendor', 'simple_html_dom');
$html = str_get_html($discourse['Discourse']['content']);
foreach ($html->find('div.pagebreak') as $pagebreak) {
$anchor_name = $pagebreak->find('a[name]', 0)->name;
$pdf_text = $pagebreak->find('a[target="_pdfwin"]', 0)->href;
$pdf_image = $pagebreak->find('a[target="_pdfwin2"]', 0)->href;
$pagebreak->innertext = '<a name="' . $anchor_name . '"></a><a class="pdf-image" href="' . $pdf_image . '"></a>';
$pagebreak->tag = 'span';
}
foreach ($html->find('div.columnbreak') as $columnbreak) {
$anchor_name = $columnbreak->find('a[name]', 0)->name;
$pdf_text = $columnbreak->find('a[target="_pdfwin"]', 0)->href;
$pdf_image = $columnbreak->find('a[target="_pdfwin2"]', 0)->href;
$columnbreak->innertext = '<a name="' . $anchor_name . '"></a><a class="pdf-image" href="' . $pdf_image . '"></a>';
$columnbreak->tag = 'span';
}
foreach ($html->find('div.paragraph') as $paragraph) {
$paragraph->tag = "p";
$paragraph->class = null;
}
foreach ($html->find('div.hyphen') as $hyphen) {
$hyphen->tag = "span";
}
$this->Discourse->id = $discourse['Discourse']['id'];
$this->Discourse->saveField('content', $html);
}
开发者ID:aaronshaf,项目名称:jod,代码行数:28,代码来源:import_controller.php
示例10: dados_telefone
function dados_telefone($num)
{
$num = preg_replace("/[^\\d]/", "", $num);
if (strlen($num) < 10) {
return false;
} else {
include_once 'src/simple_html_dom.php';
$query = http_build_query(array('tel' => $num));
$options = array('http' => array('header' => "Content-Type: application/x-www-form-urlencoded\r\n" . "Content-Length: " . strlen($query) . "\r\n" . "User-Agent:Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36\r\n", 'method' => "POST", 'content' => $query));
$context = stream_context_create($options);
$result = file_get_html("http://consultanumero.info/consulta", false, $context);
$resultado = @$result->find('div[class=resultado]', 0)->children(1)->outertext;
if (empty($resultado)) {
return false;
} else {
$resultado = str_get_html($resultado);
$img = @$result->find('div[class=a]', 0)->children(0)->outertext;
preg_match('%<img.*?title=["\'](.*?)["\'].*?/>%i', $img, $operadora);
$data['operadora'] = $operadora[1];
$data['tipo'] = substr(strrchr(strip_tags($resultado->find('p', 0)->outertext), ' » '), 1);
$data['portabilidade'] = strtolower(substr(strrchr(strip_tags($resultado->find('p', 1)->outertext), ' » '), 1)) == 'sim' ? "Sim" : "Não";
$data['estado'] = str_replace(array('(', ')'), '', substr(strrchr(strip_tags($resultado->find('p', 2)->outertext), ' » '), 1));
$cidade = explode(' » ', strip_tags($resultado->find('p', 3)->outertext));
$data['cidade'] = $cidade[1];
return $data;
}
}
}
开发者ID:carlosjuchoa,项目名称:consultaOperadora,代码行数:28,代码来源:operadora.php
示例11: processPage
function processPage($pageContent)
{
try {
$links = array();
$html = str_get_html($pageContent);
//reparam html stricat
if (!$html->find('body', 0, true)) {
$html = $this->fixHtml($html);
}
$body = $html->find('body', 0, true);
$this->extractText($body);
foreach ($body->find("a") as $link) {
$links[] = $link->href;
}
//cata memorie consuma
//si eliberare referinte pierdute
$html->clear();
MemoryManagement::showUsage('before cleaning', true, 'KB');
MemoryManagement::clean(true);
MemoryManagement::showUsage('after cleaning', true, 'KB');
return $links;
} catch (Exception $ex) {
Applog::exceptionLog($ex);
}
}
开发者ID:florinp,项目名称:dexonline,代码行数:25,代码来源:Crawler.php
示例12: exec
/**
* HTML属性削除処理の実行
*
* Pickles2の状態を参照し、自動的に処理を振り分けます。
*
* - パブリッシュする場合、DECコメントを削除します。
* - プレビューの場合、DECライブラリを埋め込み、URIパラメータからDECの表示・非表示を切り替えられるようにします。
*
* @param object $px Picklesオブジェクト
* @param object $options オプション
* @return boolean true
*/
public static function exec($px, $options = null)
{
require_once __DIR__ . '/simple_html_dom.php';
if (!$px->is_publish_tool()) {
// パブリッシュ時にのみ働きます。
return true;
}
if (!@is_array($options->attrs)) {
@($options->attrs = array());
}
// var_dump($options);
foreach ($px->bowl()->get_keys() as $key) {
$src = $px->bowl()->pull($key);
// HTML属性を削除
$html = str_get_html($src, true, true, DEFAULT_TARGET_CHARSET, false, DEFAULT_BR_TEXT, DEFAULT_SPAN_TEXT);
foreach ($options->attrs as $attr) {
$ret = $html->find('*[' . $attr . ']');
foreach ($ret as $retRow) {
// var_dump($retRow->$attr);
$retRow->{$attr} = null;
}
}
$src = $html->outertext;
$px->bowl()->replace($src, $key);
}
return true;
}
开发者ID:pickles2,项目名称:px2-remove-attr,代码行数:39,代码来源:main.php
示例13: exec
/**
*
* @param string $html
*/
public function exec($html)
{
mb_language('Japanese');
// 1.プリプロセス
// scriptテキスト削除
// script内に文字列リテラルの閉じタグがあるとDomDocumentがscriptのソースを#text扱いしてしまうので
// script内の文字を削除する
// 正規表現で削除しようとするとSegmentation faultが発生する(StackOverFlow?)ので
// simple_html_domでscript内文字列を削除
// MAX_FILE_SIZEの制限にひっかかったので、ソースを編集してデフォルトの3倍に変更している
$simpleHtml = str_get_html($html);
foreach ($simpleHtml->find('script') as $script) {
$script->innertext = '';
}
$html = $simpleHtml->outertext;
// トリム
// $html = preg_replace('/(\s| )+/mi', ' ', $html);
// 2. dom生成
$doc = new DomDocument("1.0", "utf-8");
@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$node = $doc->getElementsByTagName('body')->item(0);
$this->preProcessedInput = $node->textContent;
// 3.プロパティを初期化
$this->domXPath = new DomXPath($doc);
$this->title = @$doc->getElementsByTagName('title')->item(0)->textContent;
$text = $this->scan($node);
$this->textAll = $text;
$this->domCountAll = $this->domCount;
$this->pancutuationCountAll = $this->calcKutenScore($text) + $this->calcTotenScore($text);
$this->textLengthAll = mb_strlen($text);
$this->highScore = -1000000;
$this->extracedNode = null;
// 4.実行
$this->extract($node);
}
开发者ID:gammodoking,项目名称:kindle.server,代码行数:39,代码来源:ContentExtractor.php
示例14: parse
function parse($url)
{
$url = $this->readUrl($url);
var_dump($url);
die;
if (!$url or $this->cacheurl[$url] or $this->cacheurl[preg_replace('#/$#', '', $url)]) {
return false;
}
var_dump('111');
die;
$this->_allcount--;
if ($this->_allcount <= 0) {
return false;
}
$this->cacheurl[$url] = true;
$item = array();
$data = str_get_html(request($url));
$item['url'] = $url;
$item['title'] = count($data->find('title')) ? $data->find('title', 0)->plaintext : '';
$item['text'] = $data->plaintext;
$this->result[] = $item;
if (count($data->find('a'))) {
foreach ($data->find('a') as $a) {
$this->parse($a->href);
}
}
$data->clear();
unset($data);
}
开发者ID:kd-brinex,项目名称:kd,代码行数:29,代码来源:Parser.php
示例15: translate
function translate($lang = 'ru')
{
$config = App;
$content = ob_get_contents();
ob_end_clean();
if (self::$active) {
self::$locale = self::getAppLocale($lang);
preg_match_all(LOCALE_TEMPLATE, $content, $mathes);
if (sizeof($mathes[1]) > 0) {
foreach ($mathes[1] as $word) {
if (array_key_exists($word, self::$locale)) {
$content = str_replace(sprintf(LOCALE_TEMPLATE_CHANGE, $word), self::$locale[$word], $content);
}
}
}
}
if (($selector = Request::post('html_element')) || ($selector = Request::get('html_element'))) {
Load::dependence('simple_html_dom');
$html = str_get_html($content);
$elements = $html->find($selector);
if (sizeof($elements) > 0) {
echo $elements[0]->innertext;
} else {
echo $content;
}
} else {
echo $content;
}
}
开发者ID:Yogurt933,项目名称:Made-Easy,代码行数:29,代码来源:Language.class.php
示例16: parse_links
protected function parse_links()
{
$get = $this->get(self::URL);
$response = $this->getResponse();
$content = str_get_html($response);
$domain = substr(self::URL, 0, -1);
$box = $content->find('#navi-products', 0);
$a = $box->find('a');
for ($i = 0; $i < count($a); $i++) {
$link = trim($a[$i]->getAttribute('href'));
if (strpos($link, 'produkte-a-bis-z.htm') !== false) {
continue;
}
$cat_con = $this->get_content($link, 'body');
$this->position = 1;
if ($cat_con->find('#product-wrapper')) {
var_dump('from wroapper: ' . $link);
$this->parse_pagination($cat_con->find('#site', 0), $link);
} elseif ($cat_list_entry = $cat_con->find('.category-list-entry')) {
for ($y = 0; $y < count($cat_list_entry); $y++) {
$link_entry = $cat_list_entry[$y]->find('.category-name-number', 0)->find('a', 0)->getAttribute('href');
var_dump("from entry: " . $link_entry);
$entry_con = $this->get_content($link_entry);
$this->parse_pagination($entry_con->find('#site', 0), $link_entry);
}
}
}
// $this->save_data();
}
开发者ID:AlexBGD,项目名称:DrKaske,代码行数:29,代码来源:EuropaApotheekScraper.php
示例17: handle_products
function handle_products($product_link)
{
global $base_url_host, $base_url_scheme, $total;
if (!empty($product_link)) {
$link_3 = $product_link;
$cat_raw = str_replace("http://www.thule.com/en-US/US/Products/", "", $product_link);
$cats = dirname($cat_raw);
$cat_terms = array("Base-Racks/Feet", "Base-Racks/LoadAccessories", "Base-Racks/LoadBars", "Bike-Carriers/Accessories", "Bike-Carriers/Hitch", "Bike-Carriers/RearDoor", "Bike-Carriers/RoofCarriers", "Bike-Carriers/SpareTire", "Bike-Carriers/TruckBed", "Cargo-Carriers/Bags", "Cargo-Carriers/Baskets", "Cargo-Carriers/Boxes", "Cargo-Carriers/HitchCargo", "Luggage/DaypacksAndMessengers", "Luggage/LaptopAndTablet", "Luggage/LuggageAndDuffels", "Snow-Chains/SnowChains", "Snowsports/Accessories", "Snowsports/HitchSki", "Snowsports/SkiBoxes", "Snowsports/SkiCarriers", "Watersports/Accessories", "Watersports/WatersportCarriers");
$cat_cleaned = array("Base Racks/Feet", "Base Racks/Load Accessories", "Base Racks/Load Bars", "Bike Carriers/Accessories", "Bike Carriers/Hitch", "Bike Carriers/Rear Door", "Bike Carriers/Roof Carriers", "Bike Carriers/Spare Tire", "Bike Carriers/Truck Bed", "Cargo Carriers/Bags", "Cargo Carriers/Baskets", "Cargo Carriers/Boxes", "Cargo Carriers/Hitch Cargo", "Luggage/Daypacks And Messengers", "Luggage/Laptop And Tablet", "Luggage/Luggage And Duffels", "Snow Chains/Snow Chains", "Snowsports/Accessories", "Snowsports/Hitch Ski", "Snowsports/Ski Boxes", "Snowsports/Ski Carriers", "Watersports/Accessories", "Watersports/Watersport Carriers");
$cat = str_replace($cat_terms, $cat_cleaned, $cats);
$html_content = scraperwiki::scrape($link_3);
$html = str_get_html($html_content);
$name_raw = trim($html->find("div[@class='column details_overview'] h2 span", 0));
$name = !empty($name_raw) ? strip_tags($name_raw) : "";
$desc_raw = trim($html->find("div[@class='column details_overview'] h3 span", 0));
$desc = !empty($desc_raw) ? strip_tags($desc_raw) : "";
$price_raw = trim($html->find("div[@class='pricing'] span[@id='phcontent_0_ctl00_lblPriceText']", 0));
$price = strip_tags($price_raw);
$price = str_replace("MSRP \$", "", $price);
$price = trim(str_replace(" (USD)", "", $price));
$image = $html->find("img[@id='imgProductBomImage_0']", 0)->src;
echo "{$name}: {$image}\n";
// Add it to an array.
$record = array('id' => $total, 'product_name' => trim($name), 'desciption' => trim($desc), 'price' => $price, 'img' => $image, 'category' => $cat);
// Add it to the table.
scraperwiki::save_sqlite(array('id'), array($record), "products_support", 2);
// Increment the 'id' counter.
$total++;
}
}
开发者ID:flyeven,项目名称:scraperwiki-scraper-vault,代码行数:30,代码来源:thule_main_site_support_products_1.php
示例18: parseCategoryData
function parseCategoryData($curl, $url, $catIds)
{
curl_setopt($curl, CURLOPT_URL, $url);
$output = curl_exec($curl);
$result = array();
$categoryDom = str_get_html($output);
$catData = $categoryDom->find('form[name="categories"]', 0);
/**
* 0 -> Text notice
* 1 -> Category name
* 2 -> Category image
* 3 -> Upload image
* 4 -> Sort Order
* 5 -> Buttons
*/
$catElem = $catData->find('td');
$parentId = $catIds['cPath'];
if (strpos($parentId, '_')) {
$parentId = explode('_', $parentId);
$parentId = $parentId[1];
}
$result['parent_id'] = $parentId;
$result['category_id'] = $catIds['cID'];
$result['category_name'] = $catElem[1]->find('input', 0)->value;
// Usually parent categories don't have an image assigned.
$image = $catElem[2]->find('img', 0)->src;
$result['category_image'] = $image === '/images/' ? null : $image;
$result['sort_order'] = $catElem[4]->find('input[name="sort_order"]', 0)->value;
$result['link'] = $url;
return $result;
}
开发者ID:vladhorecica,项目名称:curl-script,代码行数:31,代码来源:categories.php
示例19: parse_links
protected function parse_links()
{
$this->parse_angelbote();
$this->parse_marken();
$get = $this->get(self::URL);
$response = $this->getResponse();
$content = str_get_html($response);
$box = $content->find('.kategorien', 0);
$a = $box->find('a');
for ($i = 0; $i < count($a); $i++) {
$kats = $a[$i]->getAttribute('href');
$this->position = 1;
$kat_1 = $this->get_content($kats, '.kat_gruppe');
$a1 = $kat_1->find('a');
for ($y = 0; $y < count($a1); $y++) {
$kats_1 = $a1[$y]->getAttribute('href');
$this->position = 1;
$con = $this->parse_pagination($kats_1, $kats_1);
while (TRUE) {
$pagination = $con->find('.paging', 0);
if (!$pagination || !$pagination->find('.paginierung', 0)->find('.text_rechts', 0)) {
break;
}
$next = $pagination->find('.paginierung', 0)->find('.text_rechts', 0)->find('span', 0)->find('a', 0);
if (!$next) {
break;
}
$link = $next->getAttribute('href');
$con = $this->parse_pagination($link, $kats_1);
}
}
}
// $this->save_data();
}
开发者ID:AlexBGD,项目名称:DrKaske,代码行数:34,代码来源:AponeoScraper.php
示例20: url2html
/**
* 获取url指向的网页内容 url2html
* @author hani <[email]>
* @param string url [description]
* @return string html [description]
*/
function url2html($url = '', $header)
{
// header("Content-Type:text/html; charset=gbk2333");
import("Org.Net.simple_html_dom");
$timeout = 15;
//构造请求头
if (!isset($header)) {
$header = array("User-Agent : Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: zh,zh-cn;q=0.8,en-us;q=0.5,en;q=0.3", "Cookie: Hm_lvt_aedd3972ca50f4fd67b4d7e025fa000c=1421985654,1422084096,1422084097,1422176563; bdshare_firstime=1421560690892; PHPSESSID=hu43skm3rnkof8qdvdngqmpnq7; Hm_lpvt_aedd3972ca50f4fd67b4d7e025fa000c=1422176873; sso_back_url=%7B%220%22%3A%22index%5C%2Fintern%22%2C%22id%22%3A8395%7D");
}
//1 初始化
$ch = curl_init();
//2 设置变量
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
//3 执行并获取html文档
$output = curl_exec($ch);
if ($output === FALSE) {
echo "curl error: " . curl_error($ch);
}
$info = curl_getinfo($ch);
echo '获取' . $info['url'] . '耗时' . $info['total_time'] . '秒';
//dump($info);
//4 释放curl句柄
curl_close($ch);
$html = str_get_html($output);
return $html;
}
开发者ID:hani1990,项目名称:phpspider,代码行数:35,代码来源:function.php
注:本文中的str_get_html函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论