master

分支 (1)

管理

管理

master

docformatcheck
/
Doc_forma_check.py

# coding=utf-8

"""
    @header Doc_forma_check.py
    @abstract

    @MyBlog: http://www.kuture.com.cn
    @author  Created by Kuture on 2020/10/10
    @version 1.0.0 2020/10/10 Creation()

    @Copyright © 2020年 Mr.Li All rights reserved
"""
import os
import docx
import shutil
import zipfile
import tempfile
from lxml import etree
from docx import Document
from aktime.TimeCalcu import time_calcu, time_statistic


# docx file preprocessing class
class DocPreprocessing(object):

    @time_calcu
    def __init__(self, file_path):
        self._file_path = file_path
        self._data_document = Document(file_path)

    # file read and display
    @time_calcu
    def doc_read(self):

        for index, para in enumerate(self._data_document.paragraphs):
            try:
                print('{} ----- Text:{}'.format(index, para.text))
            except Exception as error:
                print('')

    # word transform to xml
    @time_calcu
    def trans_word_to_xml(self):

        body_xml_str = self._data_document._body._element.xml
        body_xml = etree.fromstring(body_xml_str)

        with open('00.xml', 'w') as sf:
            sf.write(etree.tounicode(body_xml))

    # xml transform to word file
    def trans_xml_to_word(self, xml_path, save_path):

        with open(xml_path, 'r') as rf:
            xml_content = rf.read()
        print(xml_content)


if __name__ == '__main__':

    file_path = 'Resource/202034201534141597908854460.docx'
    xml_path = './00.xml'
    save_path = './00.docx'


    doc_process = DocPreprocessing(file_path)
    # doc_process.doc_read()
    # doc_process.trans_word_to_xml()
    doc_process.trans_xml_to_word(xml_path, save_path)


    # time_statistic()