代码拉取完成,页面将自动刷新
let cheerio = require('cheerio')
let http = require('http')
let fs = require('fs')
let url = require('url')
let userAgent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'
getBaidu('美剧硅谷')
// let href = 'https://www.baidu.com/s?wd=%E7%BE%8E%E5%89%A7%E7%A1%85%E8%B0%B7&rsv_spt=1&rsv_iqid=0xf6367ddb0007432e&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&rsv_enter=1&inputT=4491'
// get302(href)
// geturl(href)
function geturl(u) {
let u1 = url.parse(u)
console.log(u1)
}
function get302(u) {
let req = http.request({
method: 'Get',
host: url.parse(u).host,
path: u
}, res => {
if (res.statusCode == 302) {
console.log(res.statusCode)
console.log(res.headers.location)
return res.headers.location
} else {
return u
}
})
req.setHeader('User-Agent', userAgent)
req.end()
}
function getBaidu(q, path) {
q = encodeURI(q)
// let buf = Buffer.from('', 'utf-8');
let option = {
method: 'GET',
host: 'www.baidu.com',
path: path ? path : `/s?wd=${q}&rsv_spt=1&rsv_iqid=0xf6367ddb0007432e&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&rsv_enter=1&inputT=4491`
}
let req = http.request(option, res => {
// console.log(res.statusCode)
// console.log(res.headers)
if (res.statusCode == 302 || res.statusCode == 301) {
get302(res)
return
}
let rawData = '',
results = [],
pages = []
res.on('data', function(d) {
rawData += d.toString()
})
res.on('end', function() {
fs.writeFile('/tmp/baidu.html', rawData, function() {})
$ = cheerio.load(rawData)
let alist = $('.t>a')
let pagelist = $('#page>a')
for (let i = 0; i < pagelist.length; i++) {
pages.push('http://www.baidu.com' + pagelist.eq(i).attr('href'))
}
if (alist.length > 0) {
for (let i = 0; i < alist.length; i++) {
let href = alist.eq(i).attr('href')
let text = alist.eq(i).text()
results.push({ 'text': text, 'href': href })
console.log(`${text} `)
}
}
for (let i = 0; i < pages.length; i++) {
getBaidu(null, pages[i])
if (results.length >= 100) {
break;
}
}
console.log(results.length)
})
});
req.setHeader('User-Agent', userAgent)
// req.write(buf);
req.end();
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。