本文整理汇总了PHP中Zend_Search_Lucene_Document_Html类的典型用法代码示例。如果您正苦于以下问题:PHP Zend_Search_Lucene_Document_Html类的具体用法?PHP Zend_Search_Lucene_Document_Html怎么用?PHP Zend_Search_Lucene_Document_Html使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Zend_Search_Lucene_Document_Html类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的PHP代码示例。
示例1: _indexate
protected function _indexate($url)
{
if (!stristr($url, 'http://')) {
$url = HTTP_HOST . $url;
}
$url = substr($url, -1) == '/' ? substr($url, 0, -1) : $url;
if (!in_array($url, $this->_indexedUrl)) {
if (stristr($url, HTTP_HOST)) {
array_push($this->_indexedUrl, $url);
$html = file_get_contents($url);
libxml_use_internal_errors(true);
$doc = Zend_Search_Lucene_Document_Html::loadHTML($html);
libxml_use_internal_errors(false);
if (preg_match('/<\\!--index-->(.*)<\\!--\\/index-->/isu', $html, $matches)) {
$html = $matches[1];
}
$html = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $html);
$html = strip_tags($html);
$doc->addField(Zend_Search_Lucene_Field::Text('content', $html, 'utf-8'));
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('body', '', 'utf-8'));
$doc->addField(Zend_Search_Lucene_Field::Text('url', $url, 'utf-8'));
$this->_indexHandle->addDocument($doc);
Zend_Registry::get('Logger')->info('Search index is created: ' . $url, Zend_Log::INFO);
foreach ($doc->getLinks() as $link) {
$temp = explode('.', $link);
$ext = end($temp);
if ($link == $ext || in_array($ext, array('php', 'html', 'txt', 'htm'))) {
$this->_indexate($link);
}
}
}
}
}
开发者ID:kytvi2p,项目名称:ZettaFramework,代码行数:33,代码来源:CronController.php
示例2: insert
/**
* Inserts the provided action
*/
public function insert()
{
if (!$this->shouldIndex()) {
return;
}
throw new sfException(__CLASS__ . ' not implemented');
extract($this->getActionProperties());
$output = $this->executeAction($params);
$content = $output->getContent();
$doc = Zend_Search_Lucene_Document_Html::loadHtml($content);
$doc->addField('sfl_title', $output->getLastTitle(), 2);
$doc->addField('sfl_uri', $this->getUri($params));
$doc->addField('sfl_description', $content);
$doc->addField('sfl_type', 'action');
$categories = $this->getActionCategories();
if (count($categories)) {
foreach ($categories as $category) {
$this->addCategory($category);
}
$doc->addField('sfl_category', implode(', ', $categories));
}
$doc->addField('sfl_categories_cache', serialize($categories));
$guid = $this->getGuid($params);
$this->addDocument($doc, $guid, 'action');
$this->getSearch()->getEventDispatcher()->notify(new sfEvent($this, 'indexer.log', array('Inserted action "%s" of module "%s" to index', $this->getAction(), $this->getModule())));
return $this;
}
开发者ID:rande,项目名称:sfSolrPlugin,代码行数:30,代码来源:sfLuceneActionIndexer.class.php
示例3: highlightMatches
public function highlightMatches($inputHTML)
{
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
$colorIndex = 0;
$this->highlightMatchesDOM($doc, $colorIndex);
return $doc->getHTML();
}
开发者ID:hackingman,项目名称:TubeX,代码行数:7,代码来源:Query.php
示例4: testHtml
public function testHtml()
{
$doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
$this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
$doc->highlight('document', '#66ffff');
$this->assertEquals($doc->getHTML(), "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html>\n<head><title>Page title</title></head>\n<body><p><b style=\"color:black;background-color:#66ffff\">Document</b> body.</p></body>\n</html>\n");
$doc = Zend_Search_Lucene_Document_Html::loadHTMLFile(dirname(__FILE__) . '/_files/_indexSource/contributing.documentation.html', true);
$this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
$this->assertTrue(array_values($doc->getHeaderLinks()) == array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
$this->assertTrue(array_values($doc->getLinks()) == array('contributing.bugs.html', 'contributing.wishlist.html', 'developers.documentation.html', 'faq.translators-revision-tracking.html', 'index.html', 'contributing.html'));
}
开发者ID:jorgenils,项目名称:zend-framework,代码行数:11,代码来源:DocumentTest.php
示例5: testHtml
public function testHtml()
{
$doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
$this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
$doc->highlight('document', '#66ffff');
$this->assertTrue(strpos($doc->getHTML(), "<b style=\"color:black;background-color:#66ffff\">Document</b> body.") !== false);
$doc = Zend_Search_Lucene_Document_Html::loadHTMLFile(dirname(__FILE__) . '/_indexSource/_files/contributing.documentation.html', true);
$this->assertTrue($doc instanceof Zend_Search_Lucene_Document_Html);
$this->assertTrue(array_values($doc->getHeaderLinks()) == array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
$this->assertTrue(array_values($doc->getLinks()) == array('contributing.bugs.html', 'contributing.wishlist.html', 'developers.documentation.html', 'faq.translators-revision-tracking.html', 'index.html', 'contributing.html'));
}
开发者ID:jon9872,项目名称:zend-framework,代码行数:11,代码来源:DocumentTest.php
示例6: extract
/**
* Returns a string containing the text in the given HTML document.
*
* @param String $filename Full filesystem path to the file to process.
* @return String Text extracted from the file.
*/
public static function extract($filename)
{
if (!file_exists($filename)) {
return '';
}
try {
$doc = Zend_Search_Lucene_Document_Html::loadHTMLFile($filename, true);
} catch (Exception $e) {
return '';
}
return $doc->body;
}
开发者ID:veronico12,项目名称:lucene-silverstripe-plugin,代码行数:18,代码来源:ZendSearchLuceneHtmlExtractor.php
示例7: addurl
public function addurl()
{
// use a local file for purpose of demo.
$filename = MODPATH . "kosearch" . DIRECTORY_SEPARATOR . "examples" . DIRECTORY_SEPARATOR . "kohana_home.html";
// Note: the Search class is responsible for loading the Zend libraries, so as we
// want to instantiate Zend_Search_Lucene_Document_Html prior to calling singleton,
// we must first call Search::instance()->load_search_libs();
Search::instance()->load_search_libs();
$doc = Zend_Search_Lucene_Document_Html::loadHTMLFile($filename, TRUE, "utf-8");
Search::instance()->addDocument($doc);
$this->index('Kohana page successfully added ↓ <a href="#form2" title="scroll down">scroll down</a> ↓');
}
开发者ID:ascseb,项目名称:kosearch,代码行数:12,代码来源:search_example.php
示例8: testHtmlNoFollowLinks
public function testHtmlNoFollowLinks()
{
$html = '<HTML>' . '<HEAD><TITLE>Page title</TITLE></HEAD>' . '<BODY>' . 'Document body.' . '<a href="link1.html">Link 1</a>.' . '<a href="link2.html" rel="nofollow">Link 1</a>.' . '</BODY>' . '</HTML>';
$oldNoFollowValue = Zend_Search_Lucene_Document_Html::getExcludeNoFollowLinks();
Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(false);
$doc1 = Zend_Search_Lucene_Document_Html::loadHTML($html);
$this->assertTrue($doc1 instanceof Zend_Search_Lucene_Document_Html);
$this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));
Zend_Search_Lucene_Document_Html::setExcludeNoFollowLinks(true);
$doc2 = Zend_Search_Lucene_Document_Html::loadHTML($html);
$this->assertTrue($doc2 instanceof Zend_Search_Lucene_Document_Html);
$this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
}
开发者ID:lortnus,项目名称:zf1,代码行数:13,代码来源:DocumentTest.php
示例9: indexFile
/**
* index a file
*
* @author Jörn Dreyer <[email protected]>
*
* @param string $path the path of the file
*
* @return bool
*/
public static function indexFile($path = '', $user = null)
{
if (!Filesystem::isValidPath($path)) {
return;
}
if ($path === '') {
//ignore the empty path element
return false;
}
if (is_null($user)) {
$view = Filesystem::getView();
$user = \OCP\User::getUser();
} else {
$view = new \OC\Files\View('/' . $user . '/files');
}
if (!$view) {
Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN);
return false;
}
$root = $view->getRoot();
$pk = md5($root . $path);
// the cache already knows mime and other basic stuff
$data = $view->getFileInfo($path);
if (isset($data['mimetype'])) {
$mimetype = $data['mimetype'];
if ('text/html' === $mimetype) {
$doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path));
} else {
if ('application/msword' === $mimetype) {
// FIXME uses ZipArchive ... make compatible with OC\Files\Filesystem
//$doc = Zend_Search_Lucene_Document_Docx::loadDocxFile(OC\Files\Filesystem::file_get_contents($path));
//no special treatment yet
$doc = new \Zend_Search_Lucene_Document();
} else {
$doc = new \Zend_Search_Lucene_Document();
}
}
// store fscacheid as unique id to lookup by when deleting
$doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk));
// Store document URL to identify it in the search results
$doc->addField(\Zend_Search_Lucene_Field::Text('path', $path));
$doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size']));
$doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimetype));
self::extractMetadata($doc, $path, $view, $mimetype);
Lucene::updateFile($doc, $path, $user);
return true;
} else {
Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR);
return false;
}
}
开发者ID:CDN-Sparks,项目名称:owncloud,代码行数:60,代码来源:indexer.php
示例10: indexWebsite
/**
* @param string $websiteId
* @return string
*/
public function indexWebsite($websiteId)
{
$websiteService = new Website('Website');
if (!$websiteService->existsWebsiteAlready($websiteId)) {
throw new CmsException('602', __METHOD__, __LINE__);
}
// Zum Rendern muss die Business-Schicht verwendet werden
$renderBusiness = new BusinessRender('Render');
$modulService = new Modul('Modul');
$pageService = new Page('Page');
$allPageIds = $pageService->getIdsByWebsiteId($websiteId);
$indexFileOfWebsite = $this->getIndexFileForWebsite($websiteId);
if (is_array($allPageIds) && count($allPageIds) > 0) {
if (file_exists($indexFileOfWebsite)) {
$index = \Zend_Search_Lucene::open($indexFileOfWebsite);
$numberOfIndexedDocuments = $index->numDocs();
for ($id = 0; $id < $numberOfIndexedDocuments; ++$id) {
if (!$index->isDeleted($id)) {
$document = $index->delete($id);
}
}
} else {
$index = \Zend_Search_Lucene::create($indexFileOfWebsite);
}
foreach ($allPageIds as $pageId) {
$pageContent = $this->getPageContent($websiteId, $pageId);
if ($this->isStoreContentEnabled()) {
$document = \Zend_Search_Lucene_Document_Html::loadHTML($pageContent, true, 'UTF-8');
} else {
$document = \Zend_Search_Lucene_Document_Html::loadHTML($pageContent, false, 'UTF-8');
}
$document->addField(\Zend_Search_Lucene_Field::unIndexed('md5', md5($pageContent)));
$document->addField(\Zend_Search_Lucene_Field::unIndexed('pageId', $pageId));
$index->addDocument($document);
}
$index->commit();
$index->optimize();
unset($index);
}
return $indexFileOfWebsite;
}
开发者ID:rukzuk,项目名称:rukzuk,代码行数:45,代码来源:Indexer.php
示例11: highlightMatchesDOM
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
/** @todo implementation */
$words = array();
$matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*'), preg_quote($this->_pattern->text, '/')) . '$/';
if (@preg_match('/\\pL/u', 'a') == 1) {
// PCRE unicode support is turned on
// add Unicode modifier to the match expression
$matchExpression .= 'u';
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($doc->getFieldUtf8Value('body'), 'UTF-8');
foreach ($tokens as $token) {
if (preg_match($matchExpression, $token->getTermText()) === 1) {
$words[] = $token->getTermText();
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
开发者ID:ismaelmelus,项目名称:home,代码行数:24,代码来源:Range.php
示例12: testLoadHtmlWithAttributesInTagHTML
/**
* @group ZF-10686
*/
public function testLoadHtmlWithAttributesInTagHTML()
{
$doc = Zend_Search_Lucene_Document_Html::loadHTML('<HTML lang="en_US"><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
$this->assertEquals('Page title ', $doc->title);
}
开发者ID:SustainableCoastlines,项目名称:loveyourwater,代码行数:8,代码来源:DocumentTest.php
示例13: indexFile
/**
* index a file
*
* @author Jörn Dreyer <[email protected]>
*
* @param string $path the path of the file
*
* @return bool
*/
public static function indexFile($path = '', $user = null)
{
if (!Filesystem::isValidPath($path)) {
return;
}
if ($path === '') {
//ignore the empty path element
return false;
}
if (is_null($user)) {
$view = Filesystem::getView();
$user = \OCP\User::getUser();
} else {
$view = new \OC\Files\View('/' . $user . '/files');
}
if (!$view) {
Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN);
return false;
}
if (!$view->file_exists($path)) {
Util::writeLog('search_lucene', 'file vanished, ignoring', Util::DEBUG);
return true;
}
$root = $view->getRoot();
$pk = md5($root . $path);
// the cache already knows mime and other basic stuff
$data = $view->getFileInfo($path);
if (isset($data['mimetype'])) {
$mimeType = $data['mimetype'];
// initialize plain lucene document
$doc = new \Zend_Search_Lucene_Document();
// index content for local files only
$localFile = $view->getLocalFile($path);
if ($localFile) {
//try to use special lucene document types
if ('text/plain' === $mimeType) {
$body = $view->file_get_contents($path);
if ($body != '') {
$doc->addField(\Zend_Search_Lucene_Field::UnStored('body', $body));
}
} else {
if ('text/html' === $mimeType) {
//TODO could be indexed, even if not local
$doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path));
} else {
if ('application/pdf' === $mimeType) {
$doc = Pdf::loadPdf($view->file_get_contents($path));
// commented the mimetype checks, as the zend classes only understand docx and not doc files.
// FIXME distinguish doc and docx, xls and xlsx, ppt and pptx, in oc core mimetype helper ...
//} else if ('application/msword' === $mimeType) {
} else {
if (strtolower(substr($data['name'], -5)) === '.docx') {
$doc = \Zend_Search_Lucene_Document_Docx::loadDocxFile($localFile);
//} else if ('application/msexcel' === $mimeType) {
} else {
if (strtolower(substr($data['name'], -5)) === '.xlsx') {
$doc = \Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($localFile);
//} else if ('application/mspowerpoint' === $mimeType) {
} else {
if (strtolower(substr($data['name'], -5)) === '.pptx') {
$doc = \Zend_Search_Lucene_Document_Pptx::loadPptxFile($localFile);
} else {
if (strtolower(substr($data['name'], -4)) === '.odt') {
$doc = Odt::loadOdtFile($localFile);
} else {
if (strtolower(substr($data['name'], -4)) === '.ods') {
$doc = Ods::loadOdsFile($localFile);
}
}
}
}
}
}
}
}
}
// Store filecache id as unique id to lookup by when deleting
$doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk));
// Store filename
$doc->addField(\Zend_Search_Lucene_Field::Text('filename', $data['name'], 'UTF-8'));
// Store document path to identify it in the search results
$doc->addField(\Zend_Search_Lucene_Field::Text('path', $path, 'UTF-8'));
$doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size']));
$doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimeType));
//self::extractMetadata($doc, $path, $view, $mimeType);
Lucene::updateFile($doc, $path, $user);
return true;
} else {
Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR);
return false;
}
//.........这里部分代码省略.........
开发者ID:omusico,项目名称:isle-web-framework,代码行数:101,代码来源:indexer.php
示例14: _extractText
private function _extractText($guid, $systemName, $fileName, $mimeType, $lang = 'id')
{
$query = "SELECT * FROM KutuRelatedItem where itemGuid='{$guid}' AND relateAs='RELATED_FILE'";
$results = $this->getDbHandler($lang)->query($query);
$rowset = $results->fetchAll(PDO::FETCH_OBJ);
if (count($rowset)) {
$row = $rowset[0];
$parentCatalogGuid = $row->relatedGuid;
if (!empty($systemName)) {
$fileName = $systemName;
}
$sDir1 = ROOT_DIR . DIRECTORY_SEPARATOR . 'uploads' . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR . $fileName;
$sDir2 = ROOT_DIR . DIRECTORY_SEPARATOR . 'uploads' . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR . $parentCatalogGuid . DIRECTORY_SEPARATOR . $fileName;
$sDir = '';
if (file_exists($sDir1)) {
$sDir = $sDir1;
} else {
if (file_exists($sDir2)) {
$sDir = $sDir2;
}
}
if (!empty($sDir)) {
$outpath = $sDir . '.txt';
switch ($mimeType) {
case 'application/pdf':
//$ch = curl_init('http://175.103.48.153:8983/solr/corehol/update/extract?literal.id='.$guid.'&literal.name=content&commit=true');
/*$ch = curl_init('http://175.103.48.153:8983/solr/corehol/update/extract?literal.id='.$guid.'&fmap.content=content&commit=true');
curl_setopt ($ch, CURLOPT_POSTFIELDS, array('myfile'=>'@'.$sDir));
curl_setopt ($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-type:multipart/form-data'));
$result = curl_exec ($ch);*/
/*$mapping_array = [
"literal.id" => "$guid",
"fmap.content" => "content",
"commit" => "true"
];
$ch = curl_init();
$solr_extraction_endpoint = "http://192.168.0.61:8983/solr/corehol/update/extract";
curl_setopt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_URL, ($solr_extraction_endpoint . '?' . http_build_query($mapping_array,'','&')));
$cfile = curl_file_create($sDir);
curl_setopt($ch, CURLOPT_POSTFIELDS, array('myfile' => $cfile));
if(!curl_exec($ch) == TRUE)
{
throw new Exception('Curl Error:' . curl_error($ch));
echo "<br/>Curl Error:<br/>" . curl_error($ch);
}
curl_close($ch);
die;*/
//curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type:multipart/form-data'));
/*$cfile = $this->getCurlValue($sDir,'multipart/form-data',$fileName);
$data = array('file' => $cfile);
$ch = curl_init();
$options = array(CURLOPT_URL => ($solr_extraction_endpoint . '?' . http_build_query($mapping_array,'','&')),
CURLOPT_RETURNTRANSFER => true,
CURLINFO_HEADER_OUT => true, //Request header
CURLOPT_HEADER => true, //Return header
CURLOPT_SSL_VERIFYPEER => false, //Don't veryify server certificate
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $data
);
curl_setopt_array($ch, $options);
$result = curl_exec($ch);
$header_info = curl_getinfo($ch,CURLINFO_HEADER_OUT);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($result, 0, $header_size);
$body = substr($result, $header_size);
curl_close($ch);*/
//system('curl "http://192.168.0.61:8983/solr/corehol/update/extract?literal.id="'.$guid.'"&fmap.content=content&commit=true" -F "myfile=@"'.$sDir);
//system('curl "'.($solr_extraction_endpoint . '?' . http_build_query($mapping_array,'','&')).'" -F "myfile=@"'.$sDir);
$pdfExtractor = $this->_pdfExtractor;
system("{$pdfExtractor} " . $sDir . ' ' . $outpath, $ret);
if ($ret == 0) {
$value = file_get_contents($outpath);
unlink($outpath);
echo 'content PDF: ' . $sDir . ' ' . strlen($value) . "\n";
if (strlen($value) > 20) {
return (new Pandamp_Utility_Posts())->sanitize_post_content($value);
} else {
echo "content file kosong\n";
return '';
}
}
if ($ret == 127) {
print "Could not find pdftotext tool.\n";
}
return '';
if ($ret == 1) {
print "Could not find pdf file.\n";
}
return '';
break;
case 'text/html':
case 'text/plain':
$docHtml = Zend_Search_Lucene_Document_Html::loadHTMLFile($sDir);
//.........这里部分代码省略.........
开发者ID:hukumonline,项目名称:admin,代码行数:101,代码来源:SolrController.php
示例15: htmlFragmentHighlightMatches
/**
* Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
*
* @param string $inputHtmlFragment
* @param string $encoding Input HTML string encoding
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
* @return string
*/
public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
{
if ($highlighter === null) {
// require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
}
$inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>' . iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
/** Zend_Search_Lucene_Document_Html */
// require_once 'Zend/Search/Lucene/Document/Html.php';
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
$highlighter->setDocument($doc);
$this->_highlightMatches($highlighter);
return $doc->getHtmlBody();
}
开发者ID:netvlies,项目名称:zf,代码行数:22,代码来源:Query.php
示例16: highlightMatchesDOM
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$doc->highlight($this->_term->text, $this->_getHighlightColor($colorIndex));
}
开发者ID:jorgenils,项目名称:zend-framework,代码行数:10,代码来源:Term.php
示例17: _getHTMLResponse
/**
* Gets our HTML from MySQL Workbench for us.
*
* @access private
* @param Zend_Response $response
* @return String
*
*/
private function _getHTMLResponse($response)
{
if (200 === $response->getStatus()) {
$doc = Zend_Search_Lucene_Document_Html::loadHTML($response->getBody());
return $doc->getHTML();
}
return false;
}
开发者ID:baphled,项目名称:zend_phpunit_fixtures,代码行数:16,代码来源:DynamicDB.php
示例18: onIndexContent
/**
*
* @param $article
* @param $isNew
*/
function onIndexContent($article, $isNew = false)
{
//FIXME move the content type tests and following transformations to the helper
global $mainframe;
$pk = $article->id;
if (!$isNew) {
JuceneHelper::removeFromIndex('pk:' . $pk);
}
$index = JuceneHelper::getIndex();
$xml_field = substr($article->fulltext, 0, 5) != '<?xml' ? $article->introtext : $article->fulltext;
if (substr($xml_field, 0, 5) == '<?xml') {
$dom = new DOMDocument();
$pmml = true;
$xslt = new DOMDocument();
$error = false;
//load xslt stylesheet
if (!@$xslt->load(JPATH_SITE . DS . 'administrator' . DS . 'components' . DS . 'com_jucene' . DS . 'xslt/jucene.xsl')) {
$error = true;
$this->raiseMessage("XSLTLOADERROR", 'error');
}
$proc = new XSLTProcessor();
if (!$proc->importStylesheet($xslt)) {
$error = true;
$this->raiseMessage("XSLTIMPORTERROR", 'error');
}
unset($artcile->fulltext);
unset($record->introtext);
if ($dom->loadXML($xml_field) && !$error && $pmml) {
//simplify the document - prepare it for the indexation process
$xslOutput = $proc->transformToXml($dom);
//create new DOM document to preserve output and transform the XML to the indexable one
$transXml = new DOMDocument();
$transXml->preserveWhitespace = false;
@$transXml->loadXML($xslOutput);
//unset unneccessary variables
unset($xslOutput);
unset($dom);
unset($xslt);
//index every assoc rule as document with same credentials
if (!$error) {
$rules = $transXml->getElementsByTagName("AssociationRule");
$rulesCount = $rules->length;
if ($rulesCount == 0) {
$error = true;
$this->raiseMessage('XMLDOCUMENTNORULES', 'error');
}
$rule_doc_position = 0;
foreach ($rules as $rule) {
$additional['rating'] = 0;
$additional['position'] = $rule_doc_position;
JPluginHelper::importPlugin('content');
$dispatcher =& JDispatcher::getInstance();
$results = $dispatcher->trigger('onIndexPmml', array($rule, $additional));
$rule_doc_position++;
}
}
}
} else {
$zendDoc = Zend_Search_Lucene_Document_Html::loadHTML($article->fulltext, false, UTF - 8);
$index->addDocument($zendDoc);
}
}
开发者ID:KIZI,项目名称:sewebar-cms,代码行数:67,代码来源:contindexpmml.php
示例19: createIndexedDocument
/**
* @param AJXP_Node $ajxpNode
* @param Zend_Search_Lucene_Interface $index
* @throws Exception
* @return Zend_Search_Lucene_Document
*/
public function createIndexedDocument($ajxpNode, &$index)
{
if (!empty($this->metaFields)) {
$ajxpNode->loadNodeInfo(false, false, "all");
} else {
$ajxpNode->loadNodeInfo();
}
$ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION));
$parseContent = $this->indexContent;
if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) {
$parseContent = false;
}
if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) {
$doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl());
} elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) {
$realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
$doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile);
} elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) {
$realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
$doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile);
} elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) {
$realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
$doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile);
} else {
$doc = new Zend_Search_Lucene_Document();
}
if ($doc == null) {
throw new Exception("Could not load document");
}
$doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding());
$doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding());
$doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding());
$doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding());
$doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared"));
$doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime)));
$doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize));
$ajxpMime = $ajxpNode->ajxp_mime;
if (empty($ajxpMime)) {
$doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)));
} else {
$doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime));
}
// Store a cached copy of the metadata
$serializedMeta = base64_encode(serialize($ajxpNode->metadata));
$doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
if (isset($ajxpNode->indexableMetaKeys["shared"])) {
foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) {
if ($ajxpNode->{$sharedField}) {
$doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField}));
}
}
}
foreach ($this->metaFields as $field) {
if ($ajxpNode->{$field} != null) {
$doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding());
}
}
if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) {
$privateDoc = new Zend_Search_Lucene_Document();
$privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding()));
$privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding()));
$privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user"));
$privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId()));
foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) {
if ($ajxpNode->{$userField}) {
$privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField}));
}
}
$privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
$index->addDocument($privateDoc);
}
if ($parseContent) {
$body = $this->extractIndexableContent($ajxpNode);
if (!empty($body)) {
$doc->addField(Zend_Search_Lucene_Field::unStored("body", $body));
}
}
$index->addDocument($doc);
return $doc;
}
开发者ID:thermalpaste,项目名称:pydio-core,代码行数:86,代码来源:class.AjxpLuceneIndexer.php
示例20: rebuild_search_indexes
function rebuild_search_indexes()
{
global $success_msg;
global $error_msg;
global $warning_msg;
global $all_settings;
$index_folder = get_setting('search_indexes_folder', $all_settings);
try {
$index = new Zend_Search_Lucene($index_folder, true);
setlocale(LC_CTYPE, 'en_US');
foreach (get_all_html_files(dirname(__FILE__)) as $html_file => $html_url) {
if (can_index_html_file($html_file)) {
$file_content = file_get_contents($html_file);
$file_content = '<html>' . strstr($file_content, '<head');
$doc = Zend_Search_Lucene_Document_Html::loadHTML($file_content);
$doc->addField(Zend_Search_Lucene_Field::Text('url', $html_url, 'UTF-8'));
$index->addDocument($doc);
flush();
}
}
$broken_urls = array();
foreach (get_dynamic_urls(get_setting('search_dynamic_pages', $all_settings)) as $url) {
$headers = get_headers($url);
if (strrpos($headers[0], '200')) {
$content = file_get_contents($url);
$content = '<html>' . strstr($content, '<head');
$doc = Zend_Search_Lucene_Document_Html::loadHTML($content);
$doc->addField(Zend_Search_Lucene_Field::Text('url', $url, 'UTF-8'));
$index
|
请发表评论