加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
baidupan.js 2.68 KB
一键复制 编辑 原始数据 按行查看 历史
fox 提交于 2017-06-17 16:13 . init
let cheerio = require('cheerio')
let http = require('http')
let fs = require('fs')
let url = require('url')
let userAgent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'
getBaidu('美剧硅谷')
// let href = 'https://www.baidu.com/s?wd=%E7%BE%8E%E5%89%A7%E7%A1%85%E8%B0%B7&rsv_spt=1&rsv_iqid=0xf6367ddb0007432e&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&rsv_enter=1&inputT=4491'
// get302(href)
// geturl(href)
function geturl(u) {
let u1 = url.parse(u)
console.log(u1)
}
function get302(u) {
let req = http.request({
method: 'Get',
host: url.parse(u).host,
path: u
}, res => {
if (res.statusCode == 302) {
console.log(res.statusCode)
console.log(res.headers.location)
return res.headers.location
} else {
return u
}
})
req.setHeader('User-Agent', userAgent)
req.end()
}
function getBaidu(q, path) {
q = encodeURI(q)
// let buf = Buffer.from('', 'utf-8');
let option = {
method: 'GET',
host: 'www.baidu.com',
path: path ? path : `/s?wd=${q}&rsv_spt=1&rsv_iqid=0xf6367ddb0007432e&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&rsv_enter=1&inputT=4491`
}
let req = http.request(option, res => {
// console.log(res.statusCode)
// console.log(res.headers)
if (res.statusCode == 302 || res.statusCode == 301) {
get302(res)
return
}
let rawData = '',
results = [],
pages = []
res.on('data', function(d) {
rawData += d.toString()
})
res.on('end', function() {
fs.writeFile('/tmp/baidu.html', rawData, function() {})
$ = cheerio.load(rawData)
let alist = $('.t>a')
let pagelist = $('#page>a')
for (let i = 0; i < pagelist.length; i++) {
pages.push('http://www.baidu.com' + pagelist.eq(i).attr('href'))
}
if (alist.length > 0) {
for (let i = 0; i < alist.length; i++) {
let href = alist.eq(i).attr('href')
let text = alist.eq(i).text()
results.push({ 'text': text, 'href': href })
console.log(`${text} `)
}
}
for (let i = 0; i < pages.length; i++) {
getBaidu(null, pages[i])
if (results.length >= 100) {
break;
}
}
console.log(results.length)
})
});
req.setHeader('User-Agent', userAgent)
// req.write(buf);
req.end();
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化