php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。
代码如下 | 复制代码 |
/** * 针对Mht格式的文件进行解析 * 使用例子: * * function mhtmlParseBody($filename) {
if (file_exists ( $filename )) { if (is_dir ( $filename )) return false;
$filename = strtolower ( $filename ); if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
$o_mhtml = new mhtml (); $o_mhtml->set_file ( $filename ); $o_mhtml->extract (); return $o_mhtml->get_part_to_file(0);
} return null; }
function mhtmlParseAll($filename) {
if (file_exists ( $filename )) { if (is_dir ( $filename )) return false;
$filename = strtolower ( $filename ); if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
$o_mhtml = new mhtml (); $o_mhtml->set_file ( $filename ); $o_mhtml->extract (); return $o_mhtml->get_all_part_file();
} return null; } */
classmhtparse {
var$file='' var$boundary='' var$filedata='' var$countparts= 1; var$log=''
functionextract() { $this->read_filedata (); $this->file_parts ();
return1; }
functionset_file($p) { $this->file =$p; }
functionget_log() { return$this->log; }
functionfile_parts() { $lines=explode("n",substr($this->filedata, 0, 8192 ) ); foreach($linesas$line) { $line= trim ($line); if(strpos($line,'=') !== FALSE) { if(strpos($line,'boundary', 0 ) !== FALSE) { $range=$this->getrange ($line,'"','"', 0 ); $this->boundary ="--".$range['range']; $this->filedata =str_replace($line,'',$this->filedata ); break; } } } if($this->boundary !='') { $this->filedata =explode($this->boundary,$this->filedata ); unset ($this->filedata [0] ); $this->filedata =array_values($this->filedata ); $this->countparts =count($this->filedata ); }else{ $tmp=$this->filedata; $this->filedata =array( $tmp ); } }
functionget_all_part_file() { return$this->filedata; }
functionget_part_to_file($i) { $line_data_start= 0; $encoding='' $part_lines=explode("n", ltrim ($this->filedata [$i] ) ); foreach($part_linesas$line_id=>$line) { $line= trim ($line); if($line=='') { if(trim ($part_lines[0] ) =='--') return1; $line_data_start=$line_id; break; } if(strpos($line,':') !== FALSE) { $pos=strpos($line,':'); $k=strtolower( trim (substr($line, 0,$pos) ) ); $v= trim (substr($line,$pos+ 1,strlen($line) ) ); if($k=='content-transfer-encoding') { $encoding=$v; } if($k=='content-location') { $location=$v; } if($k=='content-type') { $contenttype=$v; } } }
foreach($part_linesas$line_id=>$line) { if($line_id<=$line_data_start) $part_lines[$line_id] ='' }
$part_lines= implode ('',$part_lines); if($encoding=='base64') $part_lines=base64_decode($part_lines); elseif($encoding=='quoted-printable') $part_lines= imap_qprint ($part_lines);
return$part_lines; }
functionread_filedata() { $handle=fopen($this->file,'r'); $this->filedata =fread($handle,filesize($this->file ) ); fclose ($handle); }
functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos= 0) { /* * $str="sssss { x { xx } {xx{xx } x} x} sssss"; $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,[email protected],http://www.hot.ee/ukjoesaar,+3725110693 */ if(empty($Beginmark_str)) $Beginmark_str='{' $Beginmark_str_len=strlen($Beginmark_str);
if(empty($Endmark_str)) $Endmark_str='}' $Endmark_str_len=strlen($Endmark_str);
/* $Start_pos_cache = 0; */ do{ /* !algus */ if(!is_int($Begin_firstOccurence_pos)) $Start_pos_cache=$Start_pos;
/* ?algus-test */ $Start_pos_cache= @strpos($subject,$Beginmark_str,$Start_pos_cache);
/* this is possible start for range */ if(is_int($Start_pos_cache)) { /* skip */ $Start_pos_cache= ($Start_pos_cache+$Beginmark_str_len); /* test possible range start pos */ if(is_int($Begin_firstOccurence_pos)) { if($Start_pos_cache<$range_end_pos) $rangeClean= 0; elseif($Start_pos_cache>$range_end_pos) $rangeClean= 1; } /* here it is */ if(!is_int($Begin_firstOccurence_pos)) $Begin_firstOccurence_pos=$Start_pos_cache; }/* VIGA NR 0 ALGUST EI OLE */
if(!is_int($Start_pos_cache)) { /* !algus */ /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */ if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache<$range_end_pos)) $rangeClean= 1; else returnfalse; } if(is_int($Begin_firstOccurence_pos)and($rangeClean!= 1)) { if(!is_int($End_pos_cache)) $End_sequel_pos=$Begin_firstOccurence_pos;
$End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);
/* ok */ if(is_int($End_pos_cache)and($rangeClean!= 1)) { $range_current_lenght= ($End_pos_cache-$Begin_firstOccurence_pos); $End_sequel_pos= ($End_pos_cache+$Endmark_str_len); $range_end_pos=$End_pos_cache; } /* VIGA NR 2 LOPPU EI LEITUD */ if(!is_int($End_pos_cache)) if($End_pos_cache== false) returnfalse; } }while($rangeClean< 1 );
if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght)) $Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght); else returnfalse;
returnarray( 'range'=>$Range, 'begin'=>$Begin_firstOccurence_pos, 'end'=>$End_sequel_pos ); }// end getrange() }// class ?> |