代码拉取完成,页面将自动刷新
import urllib.request
import mechanize
from bs4 import BeautifulSoup
# Create a Browser
browser = mechanize.Browser()
# Disable loading robots.txt
browser.set_handle_robots(False)
browser.addheaders = [('User-agent',
'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98;)')]
movie_title = input("Enter movie title: ")
movie_types = ('feature', 'tv_movie', 'tv_series', 'tv_episode', 'tv_special',
'tv_miniseries', 'documentary', 'video_game', 'short', 'video', 'tv_short')
# Navigate
browser.open('http://www.imdb.com/search/title')
# Choose a form
browser.select_form(nr=1)
browser['title'] = movie_title
# Check all the boxes of movie types
for m_type in movie_types:
browser.find_control(type='checkbox', nr=0).get(m_type).selected = True
# Submit
fd = browser.submit()
soup = BeautifulSoup(fd.read(), 'html5lib')
# Updated from td tag to h3 tag
for div in soup.findAll('h3', {'class': 'lister-item-header'}, limit=1):
a = div.findAll('a')[0]
hht = 'http://www.imdb.com' + a.attrs['href']
print(hht)
page = urllib.request.urlopen(hht)
soup2 = BeautifulSoup(page.read(), 'html.parser')
find = soup2.find
print("Title: " + find(itemprop='name').get_text().strip())
print("Duration: " + find(itemprop='duration').get_text().strip())
print("Director: " + find(itemprop='director').get_text().strip())
print("Genre: " + find(itemprop='genre').get_text().strip())
print("IMDB rating: " + find(itemprop='ratingValue').get_text().strip())
print("Summary: " + find(itemprop='description').get_text().strip())
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。