-
php解析mht文件转换成html的实例
php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。
- <?php
- /**
- * 针对Mht格式的文件进行解析
- * 使用例子:
- *
- * function mhtmlParseBody($filename) {
- if (file_exists ( $filename )) {
- if (is_dir ( $filename )) return false;
- $filename = strtolower ( $filename );
- if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
- $o_mhtml = new mhtml ();
- $o_mhtml->set_file ( $filename );
- $o_mhtml->extract ();
- return $o_mhtml->get_part_to_file(0);
- }
- return null;
- }
- function mhtmlParseAll($filename) {
- if (file_exists ( $filename )) {
- if (is_dir ( $filename )) return false;
- $filename = strtolower ( $filename );
- if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
- $o_mhtml = new mhtml ();
- $o_mhtml->set_file ( $filename );
- $o_mhtml->extract ();
- return $o_mhtml->get_all_part_file();
- }
- return null;
- }
- */
- classmhtparse {
- var$file=''
- var$boundary=''
- var$filedata=''
- var$countparts= 1;
- var$log=''
- functionextract() {
- $this->read_filedata ();
- $this->file_parts ();
- return1;
- }
- functionset_file($p) {
- $this->file =$p;
- }
- functionget_log() {
- return$this->log;
- }
- functionfile_parts() {
- $lines=<a href="/tags.php/explode/" target="_blank">explode</a>("\n",<a href="/tags.php/substr/" target="_blank">substr</a>($this->filedata, 0, 8192 ) );
- <a href="/tags.php/foreach/" target="_blank">foreach</a>($linesas$line) {
- $line= trim ($line);
- if(strpos($line,'=') !== FALSE) {
- if(strpos($line,'boundary', 0 ) !== FALSE) {
- $range=$this->getrange ($line,'"','"', 0 );
- $this->boundary ="--".$range['range'];
- $this->filedata =str_replace($line,'',$this->filedata );
- break;
- }
- }
- }
- if($this->boundary !='') {
- $this->filedata =explode($this->boundary,$this->filedata );
- unset ($this->filedata [0] );
- $this->filedata =array_values($this->filedata );
- $this->countparts =count($this->filedata );
- }else{
- $tmp=$this->filedata;
- $this->filedata =array(
- $tmp
- );
- }
- }
- functionget_all_part_file() {
- return$this->filedata;
- }
- functionget_part_to_file($i) {
- $line_data_start= 0;
- $encoding=''
- $part_lines=explode("\n", ltrim ($this->filedata [$i] ) );
- foreach($part_linesas$line_id=>$line) {
- $line= trim ($line);
- if($line=='') {
- if(trim ($part_lines[0] ) =='--')
- return1;
- $line_data_start=$line_id;
- break;
- }
- if(strpos($line,':') !== FALSE) {
- $pos=strpos($line,':');
- $k=strtolower( trim (substr($line, 0,$pos) ) );
- $v= trim (substr($line,$pos+ 1,strlen($line) ) );
- if($k=='content-transfer-encoding') {
- $encoding=$v;
- }
- if($k=='content-location') {
- $location=$v;
- }
- if($k=='content-type') {
- $contenttype=$v;
- }
- }
- }
- foreach($part_linesas$line_id=>$line) {
- if($line_id<=$line_data_start)
- $part_lines[$line_id] =''
- }
- $part_lines= implode ('',$part_lines);
- if($encoding=='base64')
- $part_lines=base64_decode($part_lines);
- elseif($encoding=='quoted-printable')
- $part_lines= imap_qprint ($part_lines);
- return$part_lines;
- }
- functionread_filedata() {
- $handle=<a href="/tags.php/fopen/" target="_blank">fopen</a>($this->file,'r');
- $this->filedata =fread($handle,filesize($this->file ) );
- fclose ($handle);
- }
- functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos= 0) {
- /*
- * $str="sssss { x { xx } {xx{xx } x} x} sssss"; $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693
- */
- if(emptyempty($Beginmark_str))
- $Beginmark_str='{'
- $Beginmark_str_len=strlen($Beginmark_str);
- if(emptyempty($Endmark_str))
- $Endmark_str='}'
- $Endmark_str_len=strlen($Endmark_str);
- /* $Start_pos_cache = 0; */
- do{
- /* !algus */
- if(!is_int($Begin_firstOccurence_pos))
- $Start_pos_cache=$Start_pos;
- /* ?algus-test */
- $Start_pos_cache= @strpos($subject,$Beginmark_str,$Start_pos_cache);
- /* this is possible start for range */
- if(is_int($Start_pos_cache)) {
- /* skip */
- $Start_pos_cache= ($Start_pos_cache+$Beginmark_str_len);
- /* test possible range start pos */
- if(is_int($Begin_firstOccurence_pos)) {
- if($Start_pos_cache<$range_end_pos)
- $rangeClean= 0;
- elseif($Start_pos_cache>$range_end_pos)
- $rangeClean= 1;
- }
- /* here it is */
- if(!is_int($Begin_firstOccurence_pos))
- $Begin_firstOccurence_pos=$Start_pos_cache;
- }/* VIGA NR 0 ALGUST EI OLE */
- if(!is_int($Start_pos_cache)) {
- /* !algus */
- /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */
- if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache<$range_end_pos))
- $rangeClean= 1;
- else
- returnfalse;
- }
- if(is_int($Begin_firstOccurence_pos)and($rangeClean!= 1)) {
- if(!is_int($End_pos_cache))
- $End_sequel_pos=$Begin_firstOccurence_pos;
- $End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);
- /* ok */
- if(is_int($End_pos_cache)and($rangeClean!= 1)) {
- $range_current_lenght= ($End_pos_cache-$Begin_firstOccurence_pos);
- $End_sequel_pos= ($End_pos_cache+$Endmark_str_len);
- $range_end_pos=$End_pos_cache;
- }
- /* VIGA NR 2 LOPPU EI LEITUD */
- if(!is_int($End_pos_cache))
- if($End_pos_cache== false)
- returnfalse;
- }
- }while($rangeClean< 1 );
- if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght))
- $Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght);
- else
- returnfalse;
- returnarray(
- 'range'=>$Range,
- 'begin'=>$Begin_firstOccurence_pos,
- 'end'=>$End_sequel_pos
- );
- }// end getrange()
- }// class
- ?>
出处:http://www.phpfensi.com/php/20180808/11064.html
栏目列表
最新更新
nodejs爬虫
Python正则表达式完全指南
爬取豆瓣Top250图书数据
shp 地图文件批量添加字段
爬虫小试牛刀(爬取学校通知公告)
【python基础】函数-初识函数
【python基础】函数-返回值
HTTP请求:requests模块基础使用必知必会
Python初学者友好丨详解参数传递类型
如何有效管理爬虫流量?
SQL SERVER中递归
2个场景实例讲解GaussDB(DWS)基表统计信息估
常用的 SQL Server 关键字及其含义
动手分析SQL Server中的事务中使用的锁
openGauss内核分析:SQL by pass & 经典执行
一招教你如何高效批量导入与更新数据
天天写SQL,这些神奇的特性你知道吗?
openGauss内核分析:执行计划生成
[IM002]Navicat ODBC驱动器管理器 未发现数据
初入Sql Server 之 存储过程的简单使用
这是目前我见过最好的跨域解决方案!
减少回流与重绘
减少回流与重绘
如何使用KrpanoToolJS在浏览器切图
performance.now() 与 Date.now() 对比
一款纯 JS 实现的轻量化图片编辑器
关于开发 VS Code 插件遇到的 workbench.scm.
前端设计模式——观察者模式
前端设计模式——中介者模式
创建型-原型模式