首先,需要下载一个simple_html_dom第三方扩展库,具体下载方式和使用详情可以查看:simple_html_dom的使用.
需要环境支持file_get_contents()函数和curl的支持,具体代码如下:
include_once('simple_html_dom.php');
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_HEADER,false);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
$output = curl_exec($ch);
curl_close($ch);
$html = new simple_html_dom();
$html->load($output);
$images = array();
$arr = array();
foreach($html->find('li a') as $element){
if( preg_match('#^http://tech.sina.com.cn/it/[d]{4}-[d]{1,2}-[d]{1,2}/[d]+.shtml$#i',$element->href)){
array_push($images,$element->href);
}
}
$images = array_unique($images);
sort($images);
for($i=0;$iload($data);
$arr = array();
foreach($html->find('h1#artibodyTitle') as $element){
$arr['title']= @iconv('gbk','utf-8', $element->innertext);
;
}
$str = '';
foreach($html->find('div#artibody p') as $element){
$str.= $element;
}
$arr['content'] = $str;
foreach($html->find('div.img_wrapper img') as $element){
$arr['alt'] =$element->alt;
$data = file_get_contents($element->src);
$info = getimagesize($element->src);//get image information
switch($info[2]){
case 1:
$str = 'gif';
break;
case 2:
$str = 'jpg';
break;
case 3:
$str = 'png';
break;
default:
continue;
break;
}
$filename = time().rand(1,999999).'.'.$str;
if(!is_dir($dirname)){
mkdir($dirname,0777,true);
}
$fp = fopen($dirname.$filename,'w');
fwrite($fp,$data);
fclose($fp);
$arr['img'] = $dirname.$filename;
}
return $arr;
}
?>
如果大家有更好的建议,欢迎提出来