加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
Doc_forma_check.py 1.69 KB
一键复制 编辑 原始数据 按行查看 历史
AustinKuture 提交于 2020-10-10 18:24 . 项目初始化
# coding=utf-8
"""
@header Doc_forma_check.py
@abstract
@MyBlog: http://www.kuture.com.cn
@author Created by Kuture on 2020/10/10
@version 1.0.0 2020/10/10 Creation()
@Copyright © 2020年 Mr.Li All rights reserved
"""
import os
import docx
import shutil
import zipfile
import tempfile
from lxml import etree
from docx import Document
from aktime.TimeCalcu import time_calcu, time_statistic
# docx file preprocessing class
class DocPreprocessing(object):
@time_calcu
def __init__(self, file_path):
self._file_path = file_path
self._data_document = Document(file_path)
# file read and display
@time_calcu
def doc_read(self):
for index, para in enumerate(self._data_document.paragraphs):
try:
print('{} ----- Text:{}'.format(index, para.text))
except Exception as error:
print('')
# word transform to xml
@time_calcu
def trans_word_to_xml(self):
body_xml_str = self._data_document._body._element.xml
body_xml = etree.fromstring(body_xml_str)
with open('00.xml', 'w') as sf:
sf.write(etree.tounicode(body_xml))
# xml transform to word file
def trans_xml_to_word(self, xml_path, save_path):
with open(xml_path, 'r') as rf:
xml_content = rf.read()
print(xml_content)
if __name__ == '__main__':
file_path = 'Resource/202034201534141597908854460.docx'
xml_path = './00.xml'
save_path = './00.docx'
doc_process = DocPreprocessing(file_path)
# doc_process.doc_read()
# doc_process.trans_word_to_xml()
doc_process.trans_xml_to_word(xml_path, save_path)
# time_statistic()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化