代码拉取完成,页面将自动刷新
同步操作将从 張廣勤 势由心生/vdcode5 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import time
import math
from PIL import Image
from PIL import ImageGrab
import requests
from hashlib import md5
from bs4 import BeautifulSoup
opt=Options()
opt.add_experimental_option('excludeSwitches',['enable-automation'])#关闭自动化提示
opt.add_argument("--start-maximized")#最大化
opt.add_argument('ignore-certificate-errors')#加上不出现私密连接
bro=webdriver.Chrome(r'C:\Users\hsk\Downloads\chromedriver_win32 (1)/chromedriver.exe',options=opt)
bro.get('https://10.6.133.106/ydata/login.do')
bro.implicitly_wait(10)
username=bro.find_element_by_name("userId")
password=bro.find_element_by_name("passwd")
vdcode=bro.find_element_by_name("imgtvalidate")
vdcode_img=bro.find_element_by_xpath('//*[@id="img"]')
#time.sleep(20)
username.send_keys('140700zgq')
#time.sleep(8)
password.send_keys('lwzb*123!')
location=vdcode_img.location
size=vdcode_img.size
print(location,size)
box=(location['x'],location['y']+70,location['x']+size['width'],location['y']+70+size['height'])
print(box)
bro.save_screenshot('c:\\users\\hsk\desktop\\vdcode_3.png')
img=Image.open('c:\\users\\hsk\desktop\\vdcode_3.png')
vdcode_path='c:\\users\\hsk\\desktop\\vdcode_4.png'
frame=ImageGrab.grab(box)
#frame.show()
frame.save(vdcode_path)
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
chaojiying = Chaojiying_Client('zgq001', 'zgq071726', '924951')
im = open(vdcode_path, 'rb').read()
result=chaojiying.PostPic(im, 1004)['pic_str']
print(result)
vdcode.send_keys(result)
#time.sleep(2)
login=bro.find_element_by_xpath('/html/body/map/area[1]')
#time.sleep(3)
login.click()
bro.switch_to.frame('olp_leftFrame')
bro.find_element_by_xpath('//*[@id="item6"]').click()
bro.find_element_by_xpath('//*[@id="M04"]/tbody/tr[1]/td[2]/a').click()
bro.switch_to.default_content()
#回到主窗口
#切换到主框架
#//*[@id="tables"]/tbody/tr[21]/td[4]/a
#选择月报
#选择月份
#选择服务业财务月报
#退出框架
bro.switch_to.frame('olp_mainFrame')
#bro.find_element_by_xpath('//*[@id="tables"]/tbody/tr[21]/td[3]/a').click()
#bro.find_element(By.ID,'periodType')
# 创建Select对象
select = Select(bro.find_element(By.ID, "periodType"))
# 通过 Select 对象选中月报
select.select_by_visible_text("月报")
select = Select(bro.find_element(By.ID, "period"))
# 通过 Select 对象选中2021年11月
select.select_by_visible_text("2021年11月")
#点击服务业月报//*[@id="tables"]/tbody/tr[23]/td[4]/a
bro.find_element(By.XPATH,'//*[@id="tables"]/tbody/tr[23]/td[4]/a').click()
bro.switch_to.default_content()
for handle in bro.window_handles:
# 先切换到该窗口
bro.switch_to.window(handle)
#print(handle)
# 得到该窗口的标题栏字符串,判断是不是我们要操作的那个窗口
#'必应'可,'Bing'可?
if '基层数据查询' in bro.title:
# 如果是,那么这时候WebDriver对象就是对应的该窗口,正好,跳出循环,
break
# wd.title属性是当前窗口的标题栏文本
print(bro.title)
bro.switch_to.frame('workFrame')
select = Select(bro.find_element(By.ID, "querytemp"))
# 通过 Select 对象选中
select.select_by_visible_text("0规上服务业财务月报")
#标题
bt=bro.find_elements(By.CSS_SELECTOR,'tr.tab-bt>td')
for btx in bt:
print(btx.text)
#"写入文件"
with open(r'.\data1.csv', 'w', encoding='utf-8') as f:
for btx in bt:
f.write(btx.text + ',')
f.write('\n')
with open(r'.\data1.csv','r',encoding='utf-8') as f:
#str=f.read()
#print(str)
lt=f.readlines()
print(lt[0])
print(type(lt[0]))
lt[0]=lt[0].replace(', ,',',')
lt[0]=lt[0].replace('操作,','查看,往期,')
lt[0]=lt[0].replace(r'[201-1] 行业代码(GB/T4754-2017)','行业代码')
lt[0] = lt[0].replace(r'1-本月;营业收入;千元', '本期营收')
lt[0] = lt[0].replace(r'上年同期;营业收入;千元', '上期营收')
print(lt[0])
print('#'*30)
print(lt)
with open(r'.\data1.csv','w',encoding='utf-8') as f:
f.writelines(lt)
#30行数据
datas=bro.find_elements(By.CSS_SELECTOR,'tr.tab-bt~tr')
for data in datas:
print(data.text)
total_page=bro.find_element(By.ID,'total_page')
print(total_page.text)
total_num=int(bro.find_element(By.ID,'total_num').text)
total_page=math.ceil(total_num/30)
print(total_page)
page=1
for page in range(1,total_page+1):
#退格符,两次,出现两次
bro.find_element(By.XPATH,'/html/body/form/table/tbody/tr[2]/td[2]/table[2]/tbody/tr/td/input').send_keys('\b\b')
bro.find_element(By.XPATH,'/html/body/form/table/tbody/tr[2]/td[2]/table[2]/tbody/tr/td/input').send_keys(page)
bro.find_element(By.XPATH,'/html/body/form/table/tbody/tr[2]/td[2]/table[2]/tbody/tr/td/a[2]').click()
trs=bro.find_elements(By.CSS_SELECTOR,'tr.tab-bt~tr')
for tr in trs:
print(tr.text)
with open(r'.\data1.csv', 'a', encoding='utf-8') as f:
for tr in trs:
tds = tr.find_elements(By.TAG_NAME, 'td')
for td in tds:
f.write(td.text + ',')
f.write('\n')
bro.switch_to.default_content()
bro.quit()
'''
#显示全部170条记录
#记下当前窗口
mainWindow = bro.current_window_handle
#点击每页30条--切换到小窗口--输入170--确定--回到主窗口
bro.find_element(By.XPATH,'/html/body/form/table/tbody/tr[2]/td[2]/table[2]/tbody/tr/td/a[1]').click()
bro.switch_to.default_content()
for handle in bro.window_handles:
# 先切换到该窗口
bro.switch_to.window(handle)
# 得到该窗口的标题栏字符串,判断是不是我们要操作的那个窗口
#print(bro.title)
if ' ' in bro.title:
# 如果是,那么这时候WebDriver对象就是对应的该该窗口,正好,跳出循环,
break
bro.find_element(By.XPATH,'//*[@id="pageSize"]').send_keys('170')
bro.find_element(By.XPATH,'/html/body/div/span[1]/input').click()
bro.switch_to.window(mainWindow)
for handle in bro.window_handles:
# 先切换到该窗口
bro.switch_to.window(handle)
# 得到该窗口的标题栏字符串,判断是不是我们要操作的那个窗口
print(bro.title)
if '基层数据查询' in bro.title:
# 如果是,那么这时候WebDriver对象就是对应的该该窗口,正好,跳出循环,
break
bro.switch_to.frame('workFrame')
#单数element
table=bro.find_element(By.CSS_SELECTOR,'table#selTable')
trs=table.find_elements(By.TAG_NAME,'tr')
for tr in trs:
tds=tr.find_elements(By.TAG_NAME,'td')
for td in tds:
print(td.text)
with open(r'.\data.csv','w',encoding='utf-8') as f:
for tr in trs:
tds=tr.find_elements(By.TAG_NAME,'td')
for td in tds:
f.write(td.text+',')
f.write('\n')
bro.switch_to.default_content()
'''
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。