加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
fmt_md.py 7.38 KB
一键复制 编辑 原始数据 按行查看 历史
#!/usr/bin/env python3
# @Date : 2020-05-23
# @Author : Bright Li (brt2@qq.com)
# @Link : https://gitee.com/brt2
# @Version : 0.0.2
import os
import re
import datetime
import glob
from collections import defaultdict
from doc_parser import MarkdownParser, NullMarkdownFile
from util.imgfmt import png2jpg, resize
def png2jpg_for_md(path_png):
""" 验证png转换后体积明显缩小,
否则添加前缀,避免修改后再次上传时重复进行格式转换 """
new_file_prefix="keepng_"
if os.path.basename(path_png).startswith(new_file_prefix):
return path_png
path_jpg = png2jpg(path_png, 85)
if path_jpg == path_png:
path_jpg = os.path.join(os.path.dirname(path_png),
new_file_prefix + os.path.basename(path_png))
os.rename(path_png, path_jpg)
return path_jpg
class MarkdownFormatter(MarkdownParser):
def format(self, resize_imgs=False):
# self._update_categories()
if not self.check_list["find_TOC"]:
toc_index = self.check_list["index_H2"]
if toc_index is None:
toc_index = 0
self.insert_text(toc_index, "[TOC]\n\n")
if self.check_list["index_H1"]:
self.pop_text(self.check_list["index_H1"])
self.update_meta()
self._update_serial_num()
# 图像处理
if self.get_images("http") and input("是否尝试下载超链接图片?[Y/n]: ").lower() != "n":
# self.unlock_text()
self.download_img()
# 默认启用 png -> jpg
self.convert_png2jpg()
# 对于高分辨率图像进行压缩
if resize_imgs:
self.resize_high_resolution()
# 判断下载图像的size,执行resize或压缩
self.compress_bigimg()
def overwrite(self):
with open(self.file_path, "w", encoding="utf8") as fp:
fp.writelines(self.get_text())
print(f"Markdown文件已保存【{self.file_path}】")
def _make_meta_line(self):
def list_as_str(data: list):
# str(data) -> 单引号,不符合markdown标准
return "[\"" + "\",\"".join(data) + "\"]" if data else "[]"
self.metadata["date"] = str(datetime.date.today())
# date数据由于使用eval()反序列化,故必须使用""作为字符串
str_md_info = f"""<!--
+++
title = "{self.metadata['title']}"
description = "{self.metadata['description']}"
date = "{self.metadata['date']}"
weight = {self.metadata['weight']}
tags = {list_as_str(self.metadata.get('tags'))}
categories = {list_as_str(self.metadata.get('categories'))}
keywords = {list_as_str(self.metadata.get('keywords'))}
+++ -->
"""
return str_md_info
def update_meta(self):
meta_line = self._make_meta_line()
if self.meta_range[0] is not None:
# def _remove_old_meta(self):
__text_lines = self.get_text()
meta_start, meta_end = self.meta_range
if meta_start is not None:
self.set_text(__text_lines[:meta_start] + __text_lines[meta_end +1:])
# def _insert_meta(self):
self.insert_text(0, meta_line)
self.meta_range = [None, None]
else:
self.modify_text(0, meta_line)
def _update_serial_num(self):
""" 使用3级序号:1.2.4. xxx """
x, y, z = 0, 0, 0
def get_serial():
serial_num = ""
for i in [x, y, z]:
if i:
serial_num += f"{i}."
else:
break
return serial_num
pattern_headline = re.compile(r"(#+) +(\d+\.\S*)? *(.*)")
def update_line(line):
serial_num = get_serial()
# if self.check_list["has_serial_num"]:
# prefix, _, text = line.split(maxsplit=2)
# else:
# prefix, text = line.split(maxsplit=1)
prefix, _, text = re.match(pattern_headline, line).groups()
return f"{prefix} {serial_num} {text}"
for index, line in enumerate(self.get_text()):
if line.startswith("## "):
x += 1
y,z = 0,0
self.modify_text(index, update_line(line))
elif line.startswith("### "):
y += 1
z = 0
self.modify_text(index, update_line(line))
elif line.startswith("#### "):
z += 1
self.modify_text(index, update_line(line))
def download_img(self):
from util.imgfmt import download_src
dict_images = self.get_images("http")
# 生成图像目录
dir_img, _ = os.path.splitext(self.file_path)
if not os.path.exists(dir_img):
os.makedirs(dir_img)
self.process_images(dict_images, lambda url: os.path.relpath(download_src(url, dir_img),
os.path.dirname(self.file_path)))
def convert_png2jpg(self):
dict_images = self.get_images("png")
self.process_images(dict_images, lambda path_png: os.path.relpath(png2jpg_for_md(path_png),
os.path.dirname(self.file_path)))
# 删除原png文件
# for path_img in dict_images.values():
# os.remove(path_img)
def compress_jpg(self):
pass
def compress_bigimg(self):
pass
def resize_high_resolution(self, save_as_jpg=True):
dict_images_png = self.get_images("png")
dict_images_jpg = self.get_images("jpg")
callback = lambda url: resize(url, ratio=0.6, min_size=10240,
max_shape=[680,680], save_as_jpg=save_as_jpg)
self.process_images({**dict_images_png, **dict_images_jpg}, callback)
#####################################################################
def getopt():
import argparse
parser = argparse.ArgumentParser("格式化mkdocs文档", description="")
parser.add_argument("-p", "--path", action="store", help="解析文件路径,可以是文件或目录")
return parser.parse_args()
def format_one_doc(fmt, path_file):
fmt.load_file(path_file)
fmt.format(resize_imgs=False)
def format_dir(fmt, path_dir):
list_files = glob.glob(f"{path_dir}/**/*.md", recursive=True)
err_files = defaultdict(list)
for path_file in list_files:
try:
format_one_doc(fmt, path_file)
except NullMarkdownFile:
err_files["NullMarkdownFile"].append(path_file)
if err_files:
for key, list_files in err_files.items():
print(f">> 错误文件类型【{key}】")
print(f" 文件列表: {list_files}")
def format_anything(fmt, path):
if os.path.isdir(path):
format_dir(fmt, path)
else:
format_one_doc(fmt, path)
if __name__ == "__main__":
args = getopt()
fmt = MarkdownFormatter()
if args.path:
format_anything(fmt, args.path)
else:
path = input("\n请输入待处理文件path(支持直接拖拽): ")
while True:
path = path.strip().strip('"')
if os.path.exists(path):
format_anything(fmt, path)
fmt.overwrite()
else:
print(f"Error: File [{path}] NOT found.")
path = input("继续输入path,按[Q]退出: ")
if path.lower() == "q":
break
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化