php解析mht文件转换成html的实例

作者:袖梨 2022-06-24

php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。

 

 代码如下复制代码

 

/**

 * 针对Mht格式的文件进行解析

* 使用例子:

*

* function mhtmlParseBody($filename) {

 

    if (file_exists ( $filename )) {

        if (is_dir ( $filename )) return false;

         

        $filename = strtolower ( $filename );

        if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;

             

         

        $o_mhtml = new mhtml ();

        $o_mhtml->set_file ( $filename );

        $o_mhtml->extract ();

        return $o_mhtml->get_part_to_file(0);

 

    }

    return null;

}

 

function mhtmlParseAll($filename) {

 

    if (file_exists ( $filename )) {

        if (is_dir ( $filename )) return false;

 

        $filename = strtolower ( $filename );

        if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;

             

 

        $o_mhtml = new mhtml ();

        $o_mhtml->set_file ( $filename );

        $o_mhtml->extract ();

        return $o_mhtml->get_all_part_file();

 

    }

    return null;

}

*/

 

classmhtparse {

 

    var$file=''

    var$boundary=''

    var$filedata=''

    var$countparts= 1;

    var$log=''

     

    functionextract() {

        $this->read_filedata ();

        $this->file_parts ();

 

        return1;

    }

     

    functionset_file($p) {

        $this->file =$p;

    }

     

    functionget_log() {

        return$this->log;

    }

     

    functionfile_parts() {

        $lines=explode("n",substr($this->filedata, 0, 8192 ) );

        foreach($linesas$line) {

            $line= trim ($line);

            if(strpos($line,'=') !== FALSE) {

                if(strpos($line,'boundary', 0 ) !== FALSE) {

                    $range=$this->getrange ($line,'"','"', 0 );

                    $this->boundary ="--".$range['range'];

                    $this->filedata =str_replace($line,'',$this->filedata );

                    break;

                }

            }

        }

        if($this->boundary !='') {

            $this->filedata =explode($this->boundary,$this->filedata );

            unset ($this->filedata [0] );

            $this->filedata =array_values($this->filedata );

            $this->countparts =count($this->filedata );

        }else{

            $tmp=$this->filedata;

            $this->filedata =array(

                    $tmp

            );

        }

    }

     

    functionget_all_part_file() {

        return$this->filedata;

    }

     

    functionget_part_to_file($i) {

        $line_data_start= 0;

        $encoding=''

        $part_lines=explode("n", ltrim ($this->filedata [$i] ) );

        foreach($part_linesas$line_id=>$line) {

            $line= trim ($line);

            if($line=='') {

                if(trim ($part_lines[0] ) =='--')

                    return1;

                $line_data_start=$line_id;

                break;

            }

            if(strpos($line,':') !== FALSE) {

                $pos=strpos($line,':');

                $k=strtolower( trim (substr($line, 0,$pos) ) );

                $v= trim (substr($line,$pos+ 1,strlen($line) ) );

                if($k=='content-transfer-encoding') {

                    $encoding=$v;

                }

                if($k=='content-location') {

                    $location=$v;

                }

                if($k=='content-type') {

                    $contenttype=$v;

                }

            }

        }

         

        foreach($part_linesas$line_id=>$line) {

            if($line_id<=$line_data_start)

                $part_lines[$line_id] =''

        }

         

        $part_lines= implode ('',$part_lines);

        if($encoding=='base64')

            $part_lines=base64_decode($part_lines);

        elseif($encoding=='quoted-printable')

            $part_lines= imap_qprint ($part_lines);

         

        return$part_lines;

    }

     

    functionread_filedata() {

        $handle=fopen($this->file,'r');

        $this->filedata =fread($handle,filesize($this->file ) );

        fclose ($handle);

    }

     

    functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos= 0) {

        /*

         *  $str="sssss { x { xx } {xx{xx } x} x} sssss";  $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: "  x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo  $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out:  array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos)  | false v1.1 2004-2006,Uku-Kaarel  J5esaar,[email protected],http://www.hot.ee/ukjoesaar,+3725110693

         */

        if(empty($Beginmark_str))

            $Beginmark_str='{'

        $Beginmark_str_len=strlen($Beginmark_str);

         

        if(empty($Endmark_str))

            $Endmark_str='}'

        $Endmark_str_len=strlen($Endmark_str);

         

        /* $Start_pos_cache = 0; */

        do{

            /* !algus */

            if(!is_int($Begin_firstOccurence_pos))

                $Start_pos_cache=$Start_pos;

                 

                /* ?algus-test */

            $Start_pos_cache= @strpos($subject,$Beginmark_str,$Start_pos_cache);

             

            /* this is possible start for range */

            if(is_int($Start_pos_cache)) {

                /* skip */

                $Start_pos_cache= ($Start_pos_cache+$Beginmark_str_len);

                /* test possible range start pos */

                if(is_int($Begin_firstOccurence_pos)) {

                    if($Start_pos_cache<$range_end_pos)

                        $rangeClean= 0;

                    elseif($Start_pos_cache>$range_end_pos)

                        $rangeClean= 1;

                }

                /* here it is */

                if(!is_int($Begin_firstOccurence_pos))

                    $Begin_firstOccurence_pos=$Start_pos_cache;

            }/* VIGA NR 0 ALGUST EI OLE */

             

            if(!is_int($Start_pos_cache)) {

                /* !algus */

    /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */

    if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache<$range_end_pos))

                    $rangeClean= 1;

                else

                    returnfalse;

            }

            if(is_int($Begin_firstOccurence_pos)and($rangeClean!= 1)) {

                if(!is_int($End_pos_cache))

                    $End_sequel_pos=$Begin_firstOccurence_pos;

                 

                $End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);

                 

                /* ok */

                if(is_int($End_pos_cache)and($rangeClean!= 1)) {

                    $range_current_lenght= ($End_pos_cache-$Begin_firstOccurence_pos);

                    $End_sequel_pos= ($End_pos_cache+$Endmark_str_len);

                    $range_end_pos=$End_pos_cache;

                }

                /* VIGA NR 2 LOPPU EI LEITUD */

                if(!is_int($End_pos_cache))

                    if($End_pos_cache== false)

                        returnfalse;

            }

        }while($rangeClean< 1 );

         

        if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght))

            $Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght);

        else

            returnfalse;

         

        returnarray(

                'range'=>$Range,

                'begin'=>$Begin_firstOccurence_pos,

                'end'=>$End_sequel_pos

        );

    }// end getrange()

}// class

?>

 

相关文章

精彩推荐