From adf54398da2878f006492fbce2c7f269fd3683e1 Mon Sep 17 00:00:00 2001 From: fuchao Date: Wed, 22 May 2024 18:22:43 +0800 Subject: [PATCH] =?UTF-8?q?api=20dump=E7=BB=9F=E8=AE=A1=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E6=94=AF=E6=8C=81md=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- troubleshooter/docs/api/migrator/api_dump.md | 2 +- .../migrator/api_dump/api_dump_compare.py | 6 +++--- .../migrator/api_dump/apis_match/apis_match.py | 2 +- .../migrator/api_dump/ms_dump/hooks.py | 15 ++++++++++----- .../migrator/api_dump/pt_dump/dump/dump.py | 15 ++++++++++----- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/troubleshooter/docs/api/migrator/api_dump.md b/troubleshooter/docs/api/migrator/api_dump.md index 42bce56..f505e14 100644 --- a/troubleshooter/docs/api/migrator/api_dump.md +++ b/troubleshooter/docs/api/migrator/api_dump.md @@ -75,7 +75,7 @@ output_path # 输出目录 - `api_dump_info.pkl`文件为网络在dump时按照API的执行顺序保存的信息,文件项格式如下: ``` - [数据名称,保留字段,保留字段,数据类型,数据shape,[最大值,最小值,均值]] + [数据名称,保留字段,保留字段,数据类型,数据shape,[最大值,最小值,均值], md5值] ``` 当数据为bool类型或关闭统计信息保存时,最大值/最小值/均值会显示为`NAN`。 diff --git a/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py b/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py index d059b1b..f004270 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py @@ -70,7 +70,7 @@ def _get_npy_list(apis, io, file_dict): def _get_npy_shape_map(pkl_path): def _read_line(line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line return {prefix: data_shape} ret = {} @@ -481,7 +481,7 @@ def print_mindtorch_summary_result( def compare_mindtorch_summary(origin_pkl_path, target_pkl_path, name_map_list, frame_names, **print_kwargs): def get_api_info(pkl_path): def _read_line(line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line return {prefix: (data_type, data_shape, data_summary)} ret = {} @@ -530,7 +530,7 @@ def compare_mindtorch_summary(origin_pkl_path, target_pkl_path, name_map_list, f def compare_summary(origin_pkl_path, target_pkl_path, name_map_list, **print_kwargs): def get_api_info(pkl_path): def _read_line(line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line return {prefix: (data_shape, data_summary)} ret = {} diff --git a/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py b/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py index df8997f..acc2570 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py @@ -176,7 +176,7 @@ class APIList: _get_uni_io(self.api_list, self.framework) def _read_line(self, line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line api_data = APIDataNode(data_shape, data_type, data_summary) def _read_prefix(prefix): diff --git a/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py b/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py index a89dc00..6207693 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py @@ -11,6 +11,7 @@ from functools import lru_cache from pathlib import Path from xml.etree.ElementPath import ops +import hashlib import mindspore as ms from mindspore import Tensor from mindspore.common import mutable @@ -154,12 +155,13 @@ class DumpUtil(object): class DataInfo(object): - def __init__(self, data, save_data, summary_data, dtype, shape): + def __init__(self, data, save_data, summary_data, dtype, shape, md5_nume): self.data = data self.save_data = save_data self.summary_data = summary_data self.dtype = dtype self.shape = shape + self.md5_nume = md5_nume def get_not_float_tensor_info(data, compute_summary): @@ -182,7 +184,8 @@ def get_not_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def get_scalar_data_info(data, compute_summary): @@ -190,7 +193,8 @@ def get_scalar_data_info(data, compute_summary): summary_data = [data, data, data] else: summary_data = [math.nan] * 3 - return DataInfo(data, data, summary_data, str(type(data)), []) + md5_nume = hashlib.md5(str(data).encode()).hexdigest() + return DataInfo(data, data, summary_data, str(type(data)), [], md5_nume) def get_float_tensor_info(data, compute_summary): @@ -208,7 +212,8 @@ def get_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, dtype, tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def set_dump_path(fpath=None): @@ -261,7 +266,7 @@ def dump_data(dump_file_name, dump_step, prefix, data_info, dump_type): else: np.save(output_path, data_info.save_data) os.chmod(output_path, 0o400) - json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data], f) + json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data, data_info.md5_nume], f) f.write('\n') diff --git a/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py b/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py index 08bf1d1..4c19a62 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py @@ -25,6 +25,7 @@ from collections import defaultdict from functools import lru_cache, partial import re +import hashlib import numpy as np import torch @@ -53,12 +54,13 @@ NNCount = defaultdict(int) class DataInfo(object): - def __init__(self, data, save_data, summary_data, dtype, shape): + def __init__(self, data, save_data, summary_data, dtype, shape, md5_nume): self.data = data self.save_data = save_data self.summary_data = summary_data self.dtype = dtype self.shape = shape + self.md5_nume = md5_nume def get_not_float_tensor_info(data, compute_summary): @@ -81,7 +83,8 @@ def get_not_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def get_scalar_data_info(data, compute_summary): @@ -89,7 +92,8 @@ def get_scalar_data_info(data, compute_summary): summary_data = [data, data, data] else: summary_data = [math.nan] * 3 - return DataInfo(data, data, summary_data, str(type(data)), []) + md5_nume = hashlib.md5(str(data).encode()).hexdigest() + return DataInfo(data, data, summary_data, str(type(data)), [], md5_nume) def get_float_tensor_info(data, compute_summary): @@ -103,7 +107,8 @@ def get_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def json_dump_condition(prefix): @@ -162,7 +167,7 @@ def dump_data(dump_file_name, dump_step, prefix, data_info, dump_npy): else: np.save(output_path, data_info.save_data) os.chmod(output_path, 0o400) - json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data], f) + json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data, data_info.md5_nume], f) f.write('\n') -- Gitee