-
php正则替换处理HTML页面的方法
这篇文章主要介绍了php正则替换处理HTML页面的方法,涉及php针对html页面常见元素的匹配技巧,需要的朋友可以参考下,本文实例讲述了php正则替换处理HTML页面的方法,分享给大家供大家参考,具体如下:
- <?php
- if(!defined('BASEPATH')) exit('No direct script access allowed');
- /**
- * HTML替换处理类,考虑如下几种替换
- * 1. img src : '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
- * 2. a href : '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i'
- * 3. ifram.src : '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
- * 4. frame src : '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
- * 5. js : '/window.open([( ]+?)([\'" ]+?)(.+?)([ )+?])/i'
- * 6. css : '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i'
- */
- class Myreplace {
- private $moudle_array = array('udata','tdata','tresult','dresult');
- private $content;
- private $relative_dirname;
- private $projectid;
- private $moudle;
- function __construct() {
- $this->CI = &get_instance ();
- }
- /**
- * 替换
- * @param string $content HTML内容
- * @param string $relative 相对路径
- * @param int $projectid 项目id
- * @moudle string $moudle 模板标识: udata,tdata,tresult,dresult
- */
- public function my_replace($content,$relative,$projectid,$moudle) {
- $this->content = $content;
- $this->relative_dirname = $relative;
- $this->projectid = $projectid;
- if(in_array(strtolower($moudle),$this->moudle_array))
- $this->moudle = $moudle;
- else exit;
- switch($this->moudle) {
- case 'udata':
- $this->CI->load->model('mupload_data','model');
- break;
- case 'tdata':
- $this->CI->load->model('taskdata','model');
- break;
- case 'tresult':
- $this->CI->load->model('taskresult','model');
- break;
- case 'dresult':
- $this->CI->load->model('dmsresult','model');
- break;
- default:
- break;
- }
- $pattern = '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
- $content = preg_replace_callback( $pattern, array($this, 'image_replace') , $content );
- $pattern = '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i';
- $content = preg_replace_callback( $pattern, array($this, 'html_replace') , $content );
- $pattern = '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
- $content = preg_replace_callback( $pattern, array($this, 'iframe_replace') , $content );
- $pattern = '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
- $content = preg_replace_callback( $pattern, array($this, 'frame_replace'), $content );
- $pattern = '/window.open([( ]+?)([\'" ]+?)(.+?)([ )]+?)/i';
- $content = preg_replace_callback( $pattern, array($this, 'js_replace'), $content );
- $pattern = '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i';
- $content = preg_replace_callback( $pattern, array($this, 'css_replace'), $content);
- return $content;
- }
- private function image_replace($matches) {
- if(count($matches) < 4) return '';
- if( emptyempty($matches[3]) ) return '';
- $matches[3] = rtrim($matches[3],'\'"/');
- //获取图片的id
- $parent_dir_num = substr_count( $matches[3], '../');
- $relative_dirname = $this->relative_dirname;
- for($i=0; $i<$parent_dir_num; $i++) {
- $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
- }
- $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
- $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
- //输出
- if( !emptyempty($image_id) ) {
- if($this->moudle == 'dresult') {
- return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
- } else {
- return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
- }
- } else {
- return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
- }
- }
- private function html_replace( $matches ) {
- if(count($matches) < 4) return '';
- if( emptyempty($matches[3]) ) return '';
- //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
- //if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3]))
- // return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
- $matches[3] = rtrim($matches[3],'\'"/');
- //处理锚点
- if(substr_count($matches[3],'#')>0)
- $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
- //获取html的id
- $parent_dir_num = substr_count( $matches[3], '../');
- $relative_dirname = $this->relative_dirname;
- for($i=0; $i<$parent_dir_num; $i++) {
- $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
- }
- $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
- $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
- //输出
- if( !emptyempty($txtfile_id ) ) {
- if($this->moudle == 'dresult') {
- return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
- } else {
- return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
- }
- } else {
- return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
- }
- }
- private function iframe_replace( $matches ) {
- if(count($matches) < 4) return '';
- if( emptyempty($matches[3]) ) return '';
- $matches[3] = rtrim($matches[3],'\'"/');
- //处理锚点
- if(substr_count($matches[3],'#')>0)
- $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
- //获取html的id
- $parent_dir_num = substr_count( $matches[3], '../');
- $relative_dirname = $this->relative_dirname;
- for($i=0; $i<$parent_dir_num; $i++) {
- $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
- }
- $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
- $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
- //输出
- if( !emptyempty($txtfile_id ) ) {
- if($this->moudle == 'dresult') {
- return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
- } else {
- return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
- }
- } else {
- return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
- }
- }
- private function frame_replace( $matches ) {
- if(count($matches) < 4) return '';
- if( emptyempty($matches[3]) ) return '';
- $matches[3] = rtrim($matches[3],'\'"/');
- //处理锚点
- if(substr_count($matches[3],'#')>0)
- $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
- //获取html的id
- $parent_dir_num = substr_count( $matches[3], '../');
- $relative_dirname = $this->relative_dirname;
- for($i=0; $i<$parent_dir_num; $i++) {
- $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
- }
- $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
- $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
- //输出
- if( !emptyempty($txtfile_id ) ) {
- if($this->moudle == 'dresult') {
- return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
- } else {
- return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
- }
- } else {
- return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
- }
- }
- private function js_replace( $matches ){
- if(count($matches) < 4) return '';
- if( emptyempty($matches[3]) ) return '';
- //处理链接
- $arr_html = split(',',$matches[3]);
- $href = $arr_html[0];
- $other = '';
- for($i=0; $i<count($arr_html); $i++)
- $other = $arr_html[$i].", ";
- $other = rtrim($other,"\, ");
- $href =rtrim($href,'\'\"');
- //处理锚点
- if(substr_count($href,'#')>0)
- return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
- //获取html的id
- $parent_dir_num = substr_count( $href, '../');
- $relative_dirname = $this->relative_dirname;
- for($i=0; $i<$parent_dir_num; $i++) {
- $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
- }
- $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($href,'./');
- $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
- //输出
- if( !emptyempty($txtfile_id ) ) {
- if($this->moudle == 'dresult') {
- return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
- } else {
- return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
- }
- } else {
- return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];
- }
- }
- private function css_replace( $matches ) {
- if(count($matches) < 5) return '';
- if( emptyempty($matches[4]) ) return '';
- $matches[4] = rtrim($matches[4],'\'"/');
- //获取图片的id
- $parent_dir_num = substr_count( $matches[4], '../');
- $relative_dirname = $this->relative_dirname;
- for($i=0; $i<$parent_dir_num; $i++) {
- $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
- }
- $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[4],'./');
- $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
- //输出
- if( !emptyempty($image_id) ) {
- if($this->moudle == 'dresult') {
- return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
- } else {
- return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
- }
- } else {
- return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
- }
- }
- }
- /* End of Myreplace.php */
- /* Location: /application/libraries/Myreplace.php */
出处:http://www.phpfensi.com/php/20210528/16006.html
栏目列表
最新更新
nodejs爬虫
Python正则表达式完全指南
爬取豆瓣Top250图书数据
shp 地图文件批量添加字段
爬虫小试牛刀(爬取学校通知公告)
【python基础】函数-初识函数
【python基础】函数-返回值
HTTP请求:requests模块基础使用必知必会
Python初学者友好丨详解参数传递类型
如何有效管理爬虫流量?
SQL SERVER中递归
2个场景实例讲解GaussDB(DWS)基表统计信息估
常用的 SQL Server 关键字及其含义
动手分析SQL Server中的事务中使用的锁
openGauss内核分析:SQL by pass & 经典执行
一招教你如何高效批量导入与更新数据
天天写SQL,这些神奇的特性你知道吗?
openGauss内核分析:执行计划生成
[IM002]Navicat ODBC驱动器管理器 未发现数据
初入Sql Server 之 存储过程的简单使用
这是目前我见过最好的跨域解决方案!
减少回流与重绘
减少回流与重绘
如何使用KrpanoToolJS在浏览器切图
performance.now() 与 Date.now() 对比
一款纯 JS 实现的轻量化图片编辑器
关于开发 VS Code 插件遇到的 workbench.scm.
前端设计模式——观察者模式
前端设计模式——中介者模式
创建型-原型模式