A-A+

微信公众号文章采集源码PHP

2014年09月26日 16:22 学习笔记 暂无评论 阅读 2,661 views 次

【注意:此文章为博主原创文章!转载需注意,请带原文链接,至少也要是txt格式!】

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
 
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>测试专用</title>
 
</head>
<body>
<?php
$id=$_GET["id"];
$wzid=$_GET["wzid"];
 
$aaa=friendshow1($wzid,$id);
 
function unicode_decode($name)
 {
 // 转换编码,将Unicode编码转换成可以浏览的utf-8编码
 $pattern = '/([\w]+)|(\\\u([\w]{4}))/i';
 preg_match_all($pattern, $name, $matches);
 if (!empty($matches))
 {
 $name = '';
 for ($j = 0; $j < count($matches[0]); $j++)
 {
 $str = $matches[0][$j];
 if (strpos($str, '\\u') === 0)
 {
 $code = base_convert(substr($str, 2, 2), 16, 10);
 $code2 = base_convert(substr($str, 4), 16, 10);
 $c = chr($code).chr($code2);
 $c = iconv('UCS-2', 'UTF-8', $c);
 $name .= $c;
 }
 else
 {
 $name .= $str;
 }
 }
 }
 $name = str_replace('blockquote','</br>',$name);
 $name = str_replace('zealer','聚壶堂',$name);
 $name = str_replace('拆','',$name);
 $name = str_replace('锤子','',$name);
 $name = str_replace('手机','紫砂壶',$name);
 $name = str_replace('刘翔','',$name);
 $name = str_replace('屏幕','壶嘴',$name);
 $name = str_replace('相机','壶身',$name);
 $name = str_replace('续航','印章',$name);
 $name = str_replace('电池','沸水',$name);
 $name = str_replace('王自如','品牌',$name);
 return $name;
}
 
function friendshow1($wzid,$id)
{
	$ch = curl_init("http://weixin.sogou.com/gzhjs?cb=sogou.weixin.gzhcb&openid={$wzid}&page={$id}");
	$user_agent = "Baiduspider+(+http://www.baidu.com/search/spider.htm)";
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_REFERER, $user_agent); //伪造来路
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    $data = curl_exec($ch);
    curl_close($ch);
	//$data = str_replace('\\"','"',$data);
	$data = str_replace('\\/','/',$data);
    return $data;
    //var_dump($data);
}
 
function unicode($str)
{
	$str= preg_replace("#\\\u([0-9a-f]+)#ie","iconv('UCS-2','UTF-8', pack('H4', '\\1'))",$str);
    return $str;
    //var_dump($data);
}
 
preg_match_all("|weixin.gzhcb(.*)],\"totalItems|isU",$aaa,$rmms);
preg_match_all("|<title><!\[CDATA\[(.*)\]\]></title>.*<url><!\[CDATA\[(.*)\]\]></url>.*</tplid>.*</tplid>|isU",$rmms[1][0],$rmmsmz);
foreach ($rmmsmz[1] as $key => $value){
?>
 
---------1-<?php echo unicode($rmmsmz[1][$key])?>-2------3-<?php echo $rmmsmz[2][$key]?>-4---------
</br ..><a target="_blank" rel="nofollow" href="<?php echo $rmmsmz[2][$key]?>"><?php echo unicode($rmmsmz[1][$key])?></a></br></br>
 
<?php
}
?>
 
</body>
</html>

举例:123.php?wzid=oIWsFt3_mbiyHGUnQIA1K8fl_ZGg&id=1

标签:

给我留言