代码拉取完成,页面将自动刷新
# coding=utf-8
"""
@header Doc_forma_check.py
@abstract
@MyBlog: http://www.kuture.com.cn
@author Created by Kuture on 2020/10/10
@version 1.0.0 2020/10/10 Creation()
@Copyright © 2020年 Mr.Li All rights reserved
"""
import os
import docx
import shutil
import zipfile
import tempfile
from lxml import etree
from docx import Document
from aktime.TimeCalcu import time_calcu, time_statistic
# docx file preprocessing class
class DocPreprocessing(object):
@time_calcu
def __init__(self, file_path):
self._file_path = file_path
self._data_document = Document(file_path)
# file read and display
@time_calcu
def doc_read(self):
for index, para in enumerate(self._data_document.paragraphs):
try:
print('{} ----- Text:{}'.format(index, para.text))
except Exception as error:
print('')
# word transform to xml
@time_calcu
def trans_word_to_xml(self):
body_xml_str = self._data_document._body._element.xml
body_xml = etree.fromstring(body_xml_str)
with open('00.xml', 'w') as sf:
sf.write(etree.tounicode(body_xml))
# xml transform to word file
def trans_xml_to_word(self, xml_path, save_path):
with open(xml_path, 'r') as rf:
xml_content = rf.read()
print(xml_content)
if __name__ == '__main__':
file_path = 'Resource/202034201534141597908854460.docx'
xml_path = './00.xml'
save_path = './00.docx'
doc_process = DocPreprocessing(file_path)
# doc_process.doc_read()
# doc_process.trans_word_to_xml()
doc_process.trans_xml_to_word(xml_path, save_path)
# time_statistic()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。