本文整理汇总了PHP中tidy_clean_repair函数的典型用法代码示例。如果您正苦于以下问题:PHP tidy_clean_repair函数的具体用法?PHP tidy_clean_repair怎么用?PHP tidy_clean_repair使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tidy_clean_repair函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的PHP代码示例。
示例1: load_html
function load_html($html)
{
$tidy = tidy_parse_string($html);
tidy_clean_repair($tidy);
$html = tidy_get_html($tidy);
phpQuery::unloadDocuments();
return phpQuery::newDocumentHTML($html);
}
开发者ID:richthegeek,项目名称:Misc,代码行数:8,代码来源:tidyPQ.php
示例2: afterRender
public function afterRender($event, $view)
{
if (!extension_loaded('tidy')) {
return;
}
$options = array('hide-comments' => true, 'tidy-mark' => false, 'indent' => true, 'indent-spaces' => 4, 'new-blocklevel-tags' => 'article,header,footer,section,nav', 'new-inline-tags' => 'video,audio,canvas,ruby,rt,rp', 'doctype' => '<!DOCTYPE HTML>', 'sort-attributes' => 'alpha', 'vertical-space' => false, 'output-xhtml' => true, 'wrap' => 150, 'wrap-attributes' => false, 'break-before-br' => false);
$buffer = tidy_parse_string($view->getContent(), $options, 'utf8');
tidy_clean_repair($buffer);
$buffer = str_replace(array('<html lang="en" xmlns="http://www.w3.org/1999/xhtml">', '<html xmlns="http://www.w3.org/1999/xhtml">'), '<!DOCTYPE html>', $buffer);
$buffer = str_replace(">\n</script>", "></script>", $buffer);
$view->setContent((string) $buffer);
}
开发者ID:niden,项目名称:kolibri,代码行数:12,代码来源:Tidy.php
示例3: output
function output($title = '', $body = '', $head = '')
{
global $settings, $authid, $checkleft, $checkright, $head, $error, $error_die;
if (theme('output_error') != false) {
$body = theme('output_error');
$title = 'Error';
$panels = false;
$lowerpanel = false;
$panel = '';
unset($error_die);
} else {
$panels = true;
}
//display panels
if ($panels != false) {
$panel = theme('displaypanels');
$lowerpanel = theme('displaylowerpanel');
}
if (isset($error) && !empty($error)) {
$errors = '<br />' . theme('title', 'Error') . theme('start_content') . '<div class="errors"><ul>';
foreach ($error as $error1) {
$errors .= '<li>' . $error1 . '</li>';
}
$errors .= '</ul></div>' . theme('end_content');
unset($error);
} else {
$errors = '';
}
if (isset($_GET['page']) && $_GET['page'] > 1) {
$title = $title . ' - Page ' . $_GET['page'];
}
$output = theme('head', stripslashes($title), $head) . '<body>';
if ($settings['maintenance_mode'] == 'on') {
$output .= '<div class="titlebg">WARNING: Maintenance Mode is on</div>';
}
$output .= '<div id="container">
' . theme('top') . theme('links');
$output .= $panel;
//display the data
$output .= $errors . '<br />' . stripslashes($body);
$output .= $lowerpanel . theme('footer');
//SEO Friendly Links
include IN_PATH . '/functions/seofriendlyurls.php';
//Check if the tidy library is installed
if (extension_loaded('tidy')) {
//yay it is, lets clean up all the HTML, so it looks all nice in View Source in your browser :)
$options = array("indent" => true, 'wrap' => 0);
$output = tidy_parse_string($output, $options);
tidy_clean_repair($output);
}
die($output);
}
开发者ID:eodivision,项目名称:eoCMS,代码行数:52,代码来源:output.php
示例4: tidy
/**
* Turn a string or array into valid, standards-compliant (x)HTML
*
* Uses configuraton options in tidy.conf - which should minimally have show-body-only set to yes
*
* @param mixed $text The data to be tidied up
* @return mixed $result Tidied data
*/
function tidy($text)
{
static $tidy_funcs;
static $tidy_conf;
if (!isset($tidy_conf)) {
$tidy_conf = SETTINGS_INC . 'tidy.conf';
}
if (is_array($text)) {
$result = array();
foreach (array_keys($text) as $key) {
$result[$key] = tidy($text[$key]);
}
return $result;
}
// determine what tidy libraries are available
if (empty($tidy_funcs)) {
$tidy_funcs = get_extension_funcs('tidy');
}
$tidy_1_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) !== false;
$tidy_2_lib_available = !empty($tidy_funcs) && array_search('tidy_setopt', $tidy_funcs) === false;
$tidy_command_line_available = TIDY_EXE ? file_exists(TIDY_EXE) : false;
$text = protect_string_from_tidy($text);
$text = '<html><body>' . $text . '</body></html>';
if ($tidy_2_lib_available) {
$tidy = new tidy();
$tidy->parseString($text, $tidy_conf, 'utf8');
$tidy->cleanRepair();
$result = $tidy;
} elseif ($tidy_1_lib_available) {
tidy_load_config($tidy_conf);
tidy_set_encoding('utf8');
tidy_parse_string($text);
tidy_clean_repair();
$result = tidy_get_output();
} elseif ($tidy_command_line_available) {
$arg = escapeshellarg($text);
// escape the bad stuff in the text
$cmd = 'echo ' . $arg . ' | ' . TIDY_EXE . ' -q -config ' . $tidy_conf . ' 2> /dev/null';
// the actual command - pipes the input to tidy which diverts its output to the random file
$result = shell_exec($cmd);
// execute the command
} else {
trigger_error('tidy does not appear to be available within php or at the command line - no tidying is taking place.');
$result = $text;
}
return trim($result);
}
开发者ID:hunter2814,项目名称:reason_package,代码行数:55,代码来源:tidy.php
示例5: internal
/**
* Use the HTML tidy PECL extension to use the tidy library in-process,
* saving the overhead of spawning a new process. Currently written to
* the PHP 4.3.x version of the extension, may not work on PHP 5.
*
* 'pear install tidy' should be able to compile the extension module.
*/
private static function internal($text)
{
global $wgTidyConf;
$fname = 'Parser::internalTidy';
wfProfileIn($fname);
tidy_load_config($wgTidyConf);
tidy_set_encoding('utf8');
tidy_parse_string($text);
tidy_clean_repair();
if (tidy_get_status() == 2) {
// 2 is magic number for fatal error
// http://www.php.net/manual/en/function.tidy-get-status.php
$cleansource = null;
} else {
$cleansource = tidy_get_output();
}
wfProfileOut($fname);
return $cleansource;
}
开发者ID:Jobava,项目名称:diacritice-meta-repo,代码行数:26,代码来源:Tidy.php
示例6: apply
/**
* tidy the data
*
* @access public
* @param string data
* @return string compressed data
*/
function apply($data)
{
if (!function_exists('tidy_parse_string')) {
return $data;
}
/**
* tidy 1.0
*/
if (function_exists('tidy_setopt') && is_array($this->_params)) {
foreach ($this->_params as $opt => $value) {
tidy_setopt($opt, $value);
}
tidy_parse_string($data);
tidy_clean_repair();
$data = tidy_get_output();
} else {
$tidy = tidy_parse_string($data, $this->_params);
tidy_clean_repair($tidy);
$data = tidy_get_output($tidy);
}
return $data;
}
开发者ID:jwest00724,项目名称:Joomla-1.0,代码行数:29,代码来源:Tidy.php
示例7: return_parsed_bbcode
function return_parsed_bbcode($message, $nowrap = false)
{
// never strip_tags here, see Page.Talks for details
$message = str_replace("[b]", "<b>", $message);
$message = str_replace("[/b]", "</b>", $message);
$message = str_replace("[i]", "<i>", $message);
$message = str_replace("[/i]", "</i>", $message);
$message = str_replace("[u]", "<u>", $message);
$message = str_replace("[/u]", "</u>", $message);
$message = str_replace("[center]", "<div align=\"center\">", $message);
$message = str_replace("[/center]", "</div>", $message);
$message = str_replace("[left]", "<div align=\"left\">", $message);
$message = str_replace("[/left]", "</div>", $message);
$message = str_replace("[right]", "<div align=\"right\">", $message);
$message = str_replace("[/right]", "</div>", $message);
$message = str_replace("[ol]", "<ol>", $message);
$message = str_replace("[ul]", "<ul>", $message);
$message = str_replace("[li]", "<li>", $message);
$message = str_replace("[/ol]", "</ol>", $message);
$message = str_replace("[/ul]", "</ul>", $message);
$message = str_replace("[br]", "<br>", $message);
$message = eregi_replace("\\[img\\]([^\\[]*)\\[/img\\]", "<img src=\"\\1\" border=\"0\">", $message);
$message = eregi_replace("\\[url\\](https?://[^\\[]*)\\[/url\\]", "<a href=\"\\1\">\\1</a>", $message);
if (function_exists("tidy_get_output")) {
if (!$nowrap) {
$config = array('indent' => FALSE, 'output-xhtml' => TRUE, 'show-body-only' => TRUE, 'wrap' => 80);
} else {
$config = array('indent' => FALSE, 'output-xhtml' => TRUE, 'show-body-only' => TRUE);
}
tidy_set_encoding('UTF8');
foreach ($config as $key => $value) {
tidy_setopt($key, $value);
}
tidy_parse_string($message);
tidy_clean_repair();
$message = tidy_get_output();
}
return $message;
}
开发者ID:esokullu,项目名称:grou.ps,代码行数:39,代码来源:bbcode.php
示例8: TidyClean
function TidyClean()
{
if (!class_exists('tidy')) {
if (function_exists('tidy_parse_string')) {
//use procedural style for compatibility with PHP 4.3
tidy_set_encoding($this->Encoding);
foreach ($this->TidyConfig as $key => $value) {
tidy_setopt($key, $value);
}
tidy_parse_string($this->html);
tidy_clean_repair();
$this->html = tidy_get_output();
} else {
print "<b>No tidy support. Please enable it in your php.ini.\r\nOnly basic cleaning is beeing applied\r\n</b>";
}
} else {
//PHP 5 only !!!
$tidy = new tidy();
$tidy->parseString($this->html, $this->TidyConfig, $this->Encoding);
$tidy->cleanRepair();
$this->html = $tidy;
}
}
开发者ID:ergun805,项目名称:eOgr,代码行数:23,代码来源:HTMLCleaner.php
示例9: tidy_output
function tidy_output($init)
{
$init->process();
ob_start();
ob_start();
$output = $init->run();
$tidy = 'xhtml';
if ($tidy == 'xhtml') {
$options = array('output-xhtml' => true, 'indent' => true, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8');
$output = tidy_parse_string($output, $options);
tidy_clean_repair($output);
}
if ($tidy == 'html') {
$options = array('output-html' => true, 'indent' => true, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8', 'clean' => true);
$output = tidy_parse_string($output, $options);
tidy_clean_repair($output);
}
echo $output;
ob_end_flush();
header('Content-Length: ' . ob_get_length());
ob_end_flush();
}
开发者ID:savonix,项目名称:nexista,代码行数:22,代码来源:tidy.php
示例10: tidyThis
private function tidyThis($source)
{
switch ($this->tidy_mode) {
case 'exec':
$tmp_file = $this->tmp_dir . md5($source) . '.txt';
file_put_contents($tmp_file, $source);
exec("tidy -utf8 -indent -asxhtml -numeric -bare -quiet {$tmp_file}", $tidy);
unlink($tmp_file);
return implode("\n", $tidy);
break;
case 'php':
$tidy = tidy_parse_string($source);
return tidy_clean_repair($tidy);
break;
default:
return $source;
break;
}
}
开发者ID:rhertzog,项目名称:lcs,代码行数:19,代码来源:hkit.class.php
示例11: tidyFix
/**
* Use HTML Tidy to validate the $text
* Only runs when $config['HTML_Tidy'] is off
*
* @param string $text The html content to be checked. Passed by reference
*/
static function tidyFix(&$text, $ignore_config = false)
{
global $config;
if (!$ignore_config) {
if (empty($config['HTML_Tidy']) || $config['HTML_Tidy'] == 'off') {
return true;
}
}
if (!function_exists('tidy_parse_string')) {
return false;
}
$options = array();
$options['wrap'] = 0;
//keeps tidy from wrapping... want the least amount of space changing as possible.. could get rid of spaces between words with the str_replaces below
$options['doctype'] = 'omit';
//omit, auto, strict, transitional, user
$options['drop-empty-paras'] = true;
//drop empty paragraphs
$options['output-xhtml'] = true;
//need this so that <br> will be <br/> .. etc
$options['show-body-only'] = true;
$options['hide-comments'] = false;
//$options['anchor-as-name'] = true; //default is true, but not alwasy availabel. When true, adds an id attribute to anchor; when false, removes the name attribute... poorly designed, but we need it to be true
//
// php4
//
if (function_exists('tidy_setopt')) {
$options['char-encoding'] = 'utf8';
gp_edit::tidyOptions($options);
$tidy = tidy_parse_string($text);
tidy_clean_repair();
if (tidy_get_status() === 2) {
// 2 is magic number for fatal error
// http://www.php.net/manual/en/function.tidy-get-status.php
$tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy)));
return false;
}
$text = tidy_get_output();
//
// php5
//
} else {
$tidy = tidy_parse_string($text, $options, 'utf8');
tidy_clean_repair($tidy);
if (tidy_get_status($tidy) === 2) {
// 2 is magic number for fatal error
// http://www.php.net/manual/en/function.tidy-get-status.php
$tidyErrors[] = 'Tidy found serious XHTML errors: <br/>' . nl2br(htmlspecialchars(tidy_get_error_buffer($tidy)));
return false;
}
$text = tidy_get_output($tidy);
}
return true;
}
开发者ID:GedionChang,项目名称:gpEasy-CMS,代码行数:60,代码来源:editing.php
示例12: cleanXHTML
function cleanXHTML($string)
{
if (!extension_loaded('tidy')) {
if (!dl('tidy.so')) {
return strip_tags($string, '<br><a>');
}
}
tidy_setopt('output-xhtml', true);
tidy_setopt('doctype', 'omit');
tidy_setopt('show-body-only', true);
tidy_parse_string($string);
tidy_clean_repair();
$clean = tidy_get_output();
if (!$clean) {
return strip_tags($string, '<br><a>');
}
return $clean;
}
开发者ID:radicalsuz,项目名称:amp,代码行数:18,代码来源:RSS.inc.php
示例13: process
public function process($html, $url, $smart_tidy = true)
{
$this->reset();
// use user submitted config and merge it with regular one
if (isset($this->userSubmittedConfig)) {
$this->debug('Using user-submitted site config');
$this->config = $this->userSubmittedConfig;
if ($this->config->autodetect_on_failure()) {
$this->debug('Merging user-submitted site config with site config files associated with this URL and/or content');
$this->config->append($this->buildSiteConfig($url, $html));
}
} else {
$this->config = $this->buildSiteConfig($url, $html);
}
// do string replacements
if (!empty($this->config->find_string)) {
if (count($this->config->find_string) == count($this->config->replace_string)) {
$html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
$this->debug("Strings replaced: {$_count} (find_string and/or replace_string)");
} else {
$this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
}
unset($_count);
}
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
$html = $tidy->value;
}
unset($tidy);
}
// load and parse html
if ($this->parserOverride) {
// from querystring: &parser=xxx
$_parser = $this->parserOverride;
} else {
// from site config file: parser: xxx
$_parser = $this->config->parser();
}
// for backword compatibility...
if ($_parser == 'html5lib') {
$_parser = 'html5php';
}
if (!in_array($_parser, $this->allowedParsers)) {
$this->debug("HTML parser {$_parser} not listed, using " . $this->defaultParser . " instead");
$_parser = $this->defaultParser;
}
$this->debug("Attempting to parse HTML with {$_parser}");
$this->readability = new Readability($html, $url, $_parser);
// we use xpath to find elements in the given HTML document
// see http://en.wikipedia.org/wiki/XPath_1.0
$xpath = new DOMXPath($this->readability->dom);
// try to get next page link
foreach ($this->config->next_page_link as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->nextPageUrl = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $item) {
if ($item instanceof DOMElement && $item->hasAttribute('href')) {
$this->nextPageUrl = $item->getAttribute('href');
break 2;
} elseif ($item instanceof DOMAttr && $item->value) {
$this->nextPageUrl = $item->value;
break 2;
}
}
}
}
// check if this is a native ad
foreach ($this->config->native_ad_clue as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->nativeAd = true;
break;
}
}
// try to get title
foreach ($this->config->title as $pattern) {
// $this->debug("Trying $pattern");
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->title = trim($elems);
$this->debug('Title expression evaluated as string: ' . $this->title);
$this->debug("...XPath match: {$pattern}");
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->title = $elems->item(0)->textContent;
$this->debug('Title matched: ' . $this->title);
$this->debug("...XPath match: {$pattern}");
// remove title from document
//.........这里部分代码省略.........
开发者ID:oxmcvusd,项目名称:full-text-rss-3.4,代码行数:101,代码来源:ContentExtractor.php
示例14: __construct
/**
* Create instance of Readability
* @param string UTF-8 encoded string
* @param string (optional) URL associated with HTML (for footnotes)
* @param string (optional) Which parser to use for turning raw HTML into a DOMDocument
* @param boolean (optional) Use tidy
*/
function __construct($html, $url = null, $parser = 'libxml', $use_tidy = true)
{
$this->url = $url;
$this->debugText = 'Parsing URL: ' . $url . "\n";
if ($url) {
$this->domainRegExp = '/' . strtr(preg_replace('/www\\d*\\./', '', parse_url($url)['host']), array('.' => '\\.')) . '/';
}
mb_internal_encoding("UTF-8");
mb_http_output("UTF-8");
mb_regex_encoding("UTF-8");
$this->imageCache = new ImageCaching();
// HACK: dirty cleanup to replace some stuff; shouldn't use regexps with HTML but well...
if (!$this->flagIsActive(self::FLAG_DISABLE_PREFILTER)) {
try {
foreach ($this->pre_filters as $search => $replace) {
$html = preg_replace($search, $replace, $html);
}
unset($search, $replace);
} catch (Exception $e) {
$this->debugText .= "Cleaning raw HTML failed. Ignoring: " . $e->getMessage();
}
}
if (trim($html) === '') {
$html = '<html></html>';
}
/**
* Use tidy (if it exists).
* This fixes problems with some sites which would otherwise trouble DOMDocument's HTML parsing.
* Although sometimes it makes matters worse, which is why there is an option to disable it.
*
**/
if ($use_tidy && function_exists('tidy_parse_string')) {
$this->debugText .= 'Tidying document' . "\n";
$tidy = tidy_parse_string($html, $this->tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$this->tidied = true;
$html = $tidy->value;
$html = preg_replace('/<html[^>]+>/i', '<html>', $html);
$html = preg_replace('/[\\r\\n]+/is', "\n", $html);
}
unset($tidy);
}
$html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
if ($parser == 'html5lib' && ($this->dom = HTML5_Parser::parse($html))) {
// all good
} else {
libxml_use_internal_errors(true);
$this->dom = new DOMDocument();
$this->dom->preserveWhiteSpace = false;
@$this->dom->loadHTML($html, LIBXML_NOBLANKS | LIBXML_COMPACT | LIBXML_NOERROR);
}
$this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
}
开发者ID:yangchenghu,项目名称:full-text-rss,代码行数:61,代码来源:Readability.php
示例15: nexista_devBuffer
function nexista_devBuffer($init)
{
$init->process();
ob_start();
ob_start();
header('Cache-Control: no-cache, must-revalidate');
header('Last-Modified: ' . gmdate("D, d M Y H:i:s") . ' GMT');
nexista_development_console();
$output = $init->run();
if (isset($_GET['view_flow'])) {
if ($_GET['view_flow'] == 'true') {
nexista_view_flow();
}
}
if ($_GET['client_view_flow'] == 'true') {
$mynid = $_GET['nid'];
$_SESSION['client_view_flow'] = 'true';
} elseif ($_GET['client_view_flow'] == 'false') {
$mynid = $_GET['nid'];
$_SESSION['client_view_flow'] = 'false';
}
if ($_SESSION['client_view_flow'] == 'true') {
$flow_viewport = nexista_view_flow();
}
$output = str_replace('</body>', '', $output);
$output = str_replace('</html>', '', $output);
$output .= $flow_viewport;
$output .= nexista_final_notices($cache_type, 'dev');
$output .= '</body></html>';
$tidy = false;
if ($tidy == 'xhtml') {
$options = array('output-xhtml' => true, 'indent' => true, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8', 'clean' => true);
$output = tidy_parse_string($output, $options);
tidy_clean_repair($output);
}
if ($tidy == 'html') {
$options = array('output-html' => true, 'indent' => true, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8', 'clean' => true);
$output = tidy_parse_string($output, $options);
tidy_clean_repair($output);
}
echo $output;
ob_end_flush();
header('Content-Length: ' . ob_get_length());
ob_end_flush();
}
开发者ID:savonix,项目名称:nexista,代码行数:45,代码来源:dev_buffer.php
示例16: nv_valid_html
/**
* nv_valid_html()
*
* @param string $html
* @param mixed $config
* @param string $encoding
* @return
*/
function nv_valid_html($html, $config, $encoding = 'utf8')
{
global $sys_info;
if ($sys_info['supports_tidy'] == "class") {
$tidy = new tidy();
$tidy->parseString($html, $config, $encoding);
$tidy->cleanRepair();
return $tidy;
}
if ($sys_info['supports_tidy'] == "func") {
$tidy = tidy_parse_string($html, $config, $encoding);
tidy_clean_repair();
return $tidy;
}
return $html;
}
开发者ID:atarubi,项目名称:nuke-viet,代码行数:24,代码来源:functions.php
示例17: viewAction
function viewAction()
{
if (!$this->validateProblemAccess()) {
return;
}
$prob = $this->view->prob;
$this->view->content_html = file_get_contents(get_file_name("data/problems/" . $this->_request->get("probid") . "/index.html"));
if (function_exists("tidy_parse_string") && $this->_request->get("tidy") != "false") {
/* tidy to XHTML strict */
$opt = array("output-xhtml" => true, "add-xml-decl" => true, "bare" => true, "clean" => true, "quote-ampersand" => true, "doctype" => "strict");
$tidy = tidy_parse_string($this->view->content_html, $opt);
tidy_clean_repair($tidy);
$this->view->content_html = tidy_get_output($tidy);
$this->fixImages();
/* redo the tidy, I agree it's slow, but easy way out. :) */
$opt = array("output-xhtml" => true, "doctype" => "strict", "show-body-only" => true);
$tidy = tidy_parse_string($this->view->content_html, $opt);
tidy_clean_repair($tidy);
$this->view->content_html = tidy_get_output($tidy);
}
if ($this->_request->get("plain") == "true") {
$this->_helper->layout->disableLayout();
$this->_helper->viewRenderer->setNoRender();
$this->getResponse()->setBody($this->view->content_html);
}
}
开发者ID:rajatkhanduja,项目名称:opc,代码行数:26,代码来源:ProblemsController.php
示例18: process
public function process($html, $url, $smart_tidy = true)
{
$this->reset();
// extract host name
$host = @parse_url($url, PHP_URL_HOST);
if (!($this->config = SiteConfig::build($host))) {
// no match, check HTML for fingerprints
if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
$this->config = SiteConfig::build($_fphost);
}
unset($_fphost);
if (!$this->config) {
// no match, so use defaults
$this->config = new SiteConfig();
}
}
//echo count($this->config->body);
// store copy of config in our static cache array in case we need to process another URL
SiteConfig::add_to_cache($host, $this->config);
// do string replacements
foreach ($this->config->replace_string as $_repl) {
$html = str_replace($_repl[0], $_repl[1], $html);
}
unset($_repl);
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy && function_exists('tidy_parse_string') && $smart_tidy) {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
$html = $tidy->value;
}
unset($tidy);
}
// load and parse html
$this->readability = new Readability($html, $url);
// we use xpath to find elements in the given HTML document
// see http://en.wikipedia.org/wiki/XPath_1.0
$xpath = new DOMXPath($this->readability->dom);
// try to get title
foreach ($this->config->title as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Title expression evaluated as string');
$this->title = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->debug('Title matched');
$this->title = $elems->item(0)->textContent;
// remove title from document
try {
$elems->item(0)->parentNode->removeChild($elems->item(0));
} catch (DOMException $e) {
// do nothing
}
break;
}
}
// try to get author (if it hasn't already been set)
if (empty($this->author)) {
foreach ($this->config->author as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Author expression evaluated as string');
if (trim($elems) != '') {
$this->author[] = trim($elems);
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) {
continue;
}
$this->author[] = trim($elem->textContent);
}
if (!empty($this->author)) {
break;
}
}
}
}
// try to get language
$_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
foreach ($_lang_xpath as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
if (trim($elems) != '') {
$this->language = trim($elems);
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) {
continue;
}
//.........这里部分代码省略.........
开发者ID:oxmcvusd,项目名称:full-text-rss-1,代码行数:101,代码来源:ContentExtractor.php
示例19: tidyFix
/**
* Use HTML Tidy to validate the $text
* Only runs when $config['HTML_Tidy'] is off
*
* @param string $text The html content to be checked. Passed by reference
*/
public static function tidyFix(&$text, $ignore_config = false)
{
global $config;
if (!$ignore_config) {
if (empty($config['HTML_Tidy']) || $config['HTML_Tidy'] == 'off') {
return true;
}
}
if (!function_exists('tidy_parse_string')) {
return false;
}
$options = array();
$options['wrap'] = 0;
//keeps tidy from wrapping... want the least amount of space changing as possible.. could get rid of spaces between words with the str_replaces below
$options['doctype'] = 'omit';
//omit, auto, strict, transitional, user
$options['drop-empty-paras'] = true;
//drop empty paragraphs
$options['output-xhtml'] = true;
//need this so that <br> will be <br/> .. etc
$options['show-body-only'] = true;
$options['hide-comments'] = false;
$tidy = tidy_parse_string($text, $options, 'utf8');
tidy_clean_repair($tidy);
if (tidy_get_status($tidy) === 2) {
// 2 is magic number for fatal error
// http://www.php.net/manual/en/function.tidy-get-status.php
return false;
}
$text = tidy_get_output($tidy);
return true;
}
开发者ID:Bouhnosaure,项目名称:Typesetter,代码行数:38,代码来源:Editing.php
示例20: tidy_parse_string
<?php
/* create tidy resource based on HTML string */
$a = tidy_parse_string("<HTML></HTML>");
tidy_clean_repair($a);
// repair the given HTML
$out = tidy_get_output($a);
// get output
echo nl2br(htmlspecialchars($out));
开发者ID:SandyS1,项目名称:presentations,代码行数:9,代码来源:tidy.php
注:本文中的tidy_clean_repair函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论