在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
1.获取页面闭合带id标签数据
View Code
1 <?php 2 header("Content-type: text/html; charset=utf-8"); 3 /** 4 * $tag_id HTML tag_id like 5 * $url web url 6 * $tag HTML tag 7 * $data HTML data if $url set to false 8 * @example echo getWebTag('','http://mail.163.com/html/mail_intro/','ul'); 9 */ 10 function getWebTag($tag_id,$url=false,$tag='div',$data=false){ 11 if($url !== false){ 12 $data = file_get_contents( $url ); 13 } 14 $charset_pos = stripos($data,'charset'); 15 if($charset_pos) { 16 if(stripos($data,'utf-8',$charset_pos)) { 17 $data = iconv('utf-8','utf-8',$data); 18 }else if(stripos($data,'gb2312',$charset_pos)) { 19 $data = iconv('gb2312','utf-8',$data); 20 }else if(stripos($data,'gbk',$charset_pos)) { 21 $data = iconv('gbk','utf-8',$data); 22 } 23 } 24 25 preg_match_all('/<'.$tag.'/i',$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有div前缀 26 preg_match_all('/<\/'.$tag.'/i',$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有div后缀 27 $hit = strpos($data,$tag_id); 28 if($hit == -1) return false; //未命中 29 $divs = array(); //合并所有div 30 foreach($pre_matches[0] as $index=>$pre_div){ 31 $divs[(int)$pre_div[1]] = 'p'; 32 $divs[(int)$suf_matches[0][$index][1]] = 's'; 33 } 34 35 //对div进行排序 36 $sort = array_keys($divs); 37 asort($sort); 38 39 $count = count($pre_matches[0]); 40 foreach($pre_matches[0] as $index=>$pre_div){ 41 //<div $hit <div+1 时div被命中 42 if(($pre_matches[0][$index][1] < $hit) && ($hit < $pre_matches[0][$index+1][1])){ 43 $deeper = 0; 44 //弹出被命中div前的div 45 while(array_shift($sort) != $pre_matches[0][$index][1] && ($count--)) continue; 46 //对剩余div进行匹配,若下一个为前缀,则向下一层,$deeper加1, 47 //否则后退一层,$deeper减1,$deeper为0则命中匹配,计算div长度 48 foreach($sort as $key){ 49 if($divs[$key] == 'p') $deeper++; 50 else if($deeper == 0) { 51 $length = $key-$pre_matches[0][$index][1]; 52 break; 53 }else { 54 $deeper--; 55 } 56 } 57 $hitDivString = substr($data,$pre_matches[0][$index][1],$length).'</'.$tag.'>'; 58 break; 59 } 60 } 61 return $hitDivString; 62 } 63 64 echo getWebTag('>); 65 echo getWebTag('>); 66 echo getWebTag('>); 67 68 //End_php 1.1 由(1)改进为获取页面任意标签,参考《颠覆想象的php解析获取跨域HTML标签》
View Code
1 <?php 2 header("Content-type: text/html; charset=utf-8"); 3 /** 4 * $tag_id HTML tag_id like 5 * $url web url 6 * $tag HTML tag 7 * $data HTML data if $url set to false 8 * $first Only get the first match 9 * @example 10 var_dump(getWebTag('','http://mail.163.com/html/mail_intro/','ul')); 11 */ 12 function getWebTag($tag_id,$url=false,$tag='div',$data=false,$first=false){ 13 //默认采用URL获取数据 14 if($url !== false){ 15 $data = file_get_contents( $url ); 16 } 17 //页面编码判定及转码 18 $charset_pos = stripos($data,'charset'); 19 if($charset_pos) { 20 if(stripos($data,'charset=utf-8',$charset_pos)) { 21 $data = iconv('utf-8','utf-8',$data); 22 }else if(stripos($data,'charset=gb2312',$charset_pos)) { 23 $data = iconv('gb2312','utf-8',$data); 24 }else if(stripos($data,'charset=gbk',$charset_pos)) { 25 $data = iconv('gbk','utf-8',$data); 26 } 27 } 28 29 //匹配命中标签至数组$hits 30 preg_match_all('/<'.$tag.'[^<]*?'.$tag_id.'/i',$data,$hits,PREG_OFFSET_CAPTURE); 31 if(count($hits[0]) === 0) { //未命中,直接返回 32 return '没有匹配项!'; 33 } 34 35 preg_match_all('/<'.$tag.'/i',$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有HTML标签前缀 36 preg_match_all('/<\/'.$tag.'/i',$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有HTML标签后缀 37 38 //判断是否<div></div>格式,是则添加结束标签,否则为false; 注:img、input等可能不是这种格式,此时$suf_matches[0]为空。 39 if(!empty($suf_matches[0])) $endTag = '</'.$tag.'>'; 40 else $endTag = false; 41 42 //合并所有HTML标签 43 $htmltags = array(); 44 if($endTag !== false){ 45 foreach($pre_matches[0] as $index=>$pre_div){ 46 $htmltags[(int)$pre_matches[0][$index][1]] = 'p'; 47 $htmltags[(int)$suf_matches[0][$index][1]] = 's'; 48 } 49 }else{ 50 foreach($pre_matches[0] as $index=>$pre_div){ 51 //非<div></div>格式,获取前缀下标后的第一个>作为标签结束 52 $suf_matches[0][$index][1] = stripos($data,'>',$pre_matches[0][$index][1])+1; 53 54 $htmltags[(int)$pre_matches[0][$index][1]] = 'p'; 55 $htmltags[(int)$suf_matches[0][$index][1]] = 's'; 56 } 57 } 58 //对所有HTML标签按index进行排序 59 $sort = array_keys($htmltags); 60 asort($sort); 61 62 //开始获取命中字符串 63 $hitTagStrings = array(); 64 foreach($hits[0] as $hit){ 65 $hit = $hit[1]; //获取命中index 66 67 $count = count($sort); //循环控制,$count--避免无限循环 68 foreach($pre_matches[0] as $index=>$pre_div){ 69 //最后一个$pre_matches[0][$index+1]会造成数组出界,因此设置其index等于总长度 70 if(!isset($pre_matches[0][$index+1][1])) $pre_matches[0][$index+1][1] = strlen($data); 71 72 //<div $hit <div+1 时div被命中 73 if(($pre_matches[0][$index][1] <= $hit) && ($hit < $pre_matches[0][$index+1][1])){ 74 $deeper = 0; 75 //弹出被命中HTML标签前的所有HTML标签 76 while(array_shift($sort) != $pre_matches[0][$index][1] && ($count--)) continue; 77 //对剩余HTML标签进行匹配,若下一个为前缀(p),则向下一层,$deeper加1, 78 //否则后退一层,$deeper减1,$deeper为0则命中匹配结束标记,计算div长度 79 foreach($sort as $key){ 80 if($htmltags[$key] == 'p') { //进入子层 81 $deeper++; 82 }else if($deeper == 0) { //碰到结束标记 83 $length = $key-$pre_matches[0][$index][1]; //长度等于结束标记index 减去 前缀index 84 break; 85 }else { //碰到子层结束标记 86 $deeper--; 87 } 88 } 89 $hitTagStrings[] = substr($data,$pre_matches[0][$index][1],$length).$endTag; 90 break; 91 } 92 } 93 //若只获取第一个匹配项,退出循环 94 if($first && count($hitTagStrings) == 1) break; 95 } 96 97 return $hitTagStrings; 98 } 99 100 //直接用例 101 var_dump(getWebTag('>)); 102 103 /* //注释这句即可显示 104 //ajax请求用例,必要参数:dataType:'json',type:'POST' 105 $tag_id = urldecode($_POST['tag_id']); 106 $url = urldecode($_POST['url']); 107 $tag = isset($_POST['tag'])? urldecode($_POST['tag']) : 'div'; 108 $data = urldecode($_POST['data']); 109 $first = (urldecode($_POST['first']) == 'checked')? true : false; 110 foreach($_POST as $key => $value){ 111 if($value == 'EmPtYValue') $$key = false; 112 } 113 echo json_encode(getWebTag($tag_id,$url,$tag,$data,$first)); 114 //*/ 115 116 //End_php
2.虚拟POST数据至远程服务器并获取返回数据
View Code
1 <?php 2 header("Content-type: text/html; charset=utf-8"); 3 /** 4 * $url web url 5 * $post POST data 6 * @example 7 $data = array ( 8 'type' => 'text', 9 'inputValue' => '哈哈' 10 ); 11 $result = Post('http://tool.anzhuoxiazai.com:80//servlet/QRServlet', $data); 12 echo str_replace("src='","src='http://tool.anzhuoxiazai.com/",$result); 13 */ 14 function Post($url, $post = null) { 15 $context = array(); 16 if (is_array($post)) { 17 ksort($post); 18 $context['http'] = array ( 19 'timeout'=>60, 20 'method' => 'POST', 21 'content' => http_build_query($post) 22 ); 23 } 24 return file_get_contents($url, false, stream_context_create($context)); 25 } 26 27 $data = array ( 28 'type' => 'text', 29 'inputValue' => '哈哈' 30 ); 31 $result = Post('http://tool.anzhuoxiazai.com:80//servlet/QRServlet', $data); 32 echo str_replace("src='","src='http://tool.anzhuoxiazai.com/",$result); 33 //End_php
3.文件夹复制
View Code
1 /** 2 * Copy file or folder from source to destination, it can do 3 * recursive copy as well and is very smart 4 * It recursively creates the dest file or directory path if there weren't exists 5 * Situtaions : 6 * - Src:/home/test/file.txt ,Dst:/home/test/b ,Result:/home/test/b -> If source was file copy file.txt name with b as name to destination 7 * - Src:/home/test/file.txt ,Dst:/home/test/b/ ,Result:/home/test/b/file.txt -> If source was file Creates b directory if does not exsits and copy file.txt into it 8 * - Src:/home/test ,Dst:/home/ ,Result:/home/test/** -> If source was directory copy test directory and all of its content into dest 9 * - Src:/home/test/ ,Dst:/home/ ,Result:/home/**-> if source was direcotry copy its content to dest 10 * - Src:/home/test ,Dst:/home/test2 ,Result:/home/test2/** -> if source was directoy copy it and its content to dest with test2 as name 11 * - Src:/home/test/ ,Dst:/home/test2 ,Result:->/home/test2/** if source was directoy copy it and its content to dest with test2 as name 12 * @todo 13 * - Should have rollback technique so it can undo the copy when it wasn't successful 14 * - Auto destination technique should be possible to turn off 15 * - Supporting callback function 16 * - May prevent some issues on shared enviroments : http://us3.php.net/umask 17 * @param $source //file or folder 18 * @param $dest ///file or folder 19 * @param $options //folderPermission,filePermission 20 * @return boolean 21 */ 22 function smartCopy($source, $dest, $options=array('folderPermission'=>0755,'filePermission'=>0755)) 23 { 24 $result=false; 25 26 if (is_file($source)) { 27 if ($dest[strlen($dest)-1]=='/') { 28 if (!file_exists($dest)) { 29 cmfcDirectory::makeAll($dest,$options['folderPermission'],true); 30 } 31 $__dest=$dest."/".basename($source); 32 } else { 33 $__dest=$dest; 34 } 35 $result=copy($source, $__dest); 36 chmod($__dest,$options['filePermission']); 37 38 } elseif(is_dir($source)) { 39 if ($dest[strlen($dest)-1]=='/') { 40 if ($source[strlen($source)-1]=='/') { 41 //Copy only contents 42 } else { 43 //Change parent itself and its contents 44 $dest=$dest.basename($source); 45 @mkdir($dest); 46 chmod($dest,$options['filePermission']); 47 } 48 } else { 49 if ($source[strlen($source)-1]=='/') { 50 //Copy parent directory with new name and all its content 51 @mkdir($dest,$options['folderPermission']); 52 chmod($dest,$options['filePermission']); 53 } else { 54 //Copy parent directory with new name and all its content 55 @mkdir($dest,$options['folderPermission']); 56 chmod($dest,$options['filePermission']); 57 } 58 } 59 60 $dirHandle=opendir($source); 61 while($file=readdir($dirHandle)) 62 { 63 if($file!="." && $file!="..") 64 { 65 if(!is_dir($source."/".$file)) { 66 $__dest=$dest."/".$file; 67 } else { 68 $__dest=$dest."/".$file; 69 } 70 //echo "$source/$file ||| $__dest<br />"; 71 $result=smartCopy($source."/".$file, $__dest, $options); 72 } 73 } 74 closedir($dirHandle); 75 76 } else { 77 $result=false; 78 } 79 return $result; 80 }
4.文件遍历(可匹配模式)
View Code
1 /* |
2022-08-30
2022-08-17
2022-11-06
2022-07-18
2022-08-18
请发表评论