diff --git a/troubleshooter/docs/api/migrator/api_dump.md b/troubleshooter/docs/api/migrator/api_dump.md index 42bce56abda9dcfc945e6a38826d918364eec462..f505e14877a0c13ef17d9815c9dfaaf3465aa067 100644 --- a/troubleshooter/docs/api/migrator/api_dump.md +++ b/troubleshooter/docs/api/migrator/api_dump.md @@ -75,7 +75,7 @@ output_path # 输出目录 - `api_dump_info.pkl`文件为网络在dump时按照API的执行顺序保存的信息,文件项格式如下: ``` - [数据名称,保留字段,保留字段,数据类型,数据shape,[最大值,最小值,均值]] + [数据名称,保留字段,保留字段,数据类型,数据shape,[最大值,最小值,均值], md5值] ``` 当数据为bool类型或关闭统计信息保存时,最大值/最小值/均值会显示为`NAN`。 diff --git a/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py b/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py index d059b1b0fc523d5b1bed0f84a6a39d3daff6f8f3..f00427062809a9cf40bab13362db3df8810c8eea 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/api_dump_compare.py @@ -70,7 +70,7 @@ def _get_npy_list(apis, io, file_dict): def _get_npy_shape_map(pkl_path): def _read_line(line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line return {prefix: data_shape} ret = {} @@ -481,7 +481,7 @@ def print_mindtorch_summary_result( def compare_mindtorch_summary(origin_pkl_path, target_pkl_path, name_map_list, frame_names, **print_kwargs): def get_api_info(pkl_path): def _read_line(line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line return {prefix: (data_type, data_shape, data_summary)} ret = {} @@ -530,7 +530,7 @@ def compare_mindtorch_summary(origin_pkl_path, target_pkl_path, name_map_list, f def compare_summary(origin_pkl_path, target_pkl_path, name_map_list, **print_kwargs): def get_api_info(pkl_path): def _read_line(line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line return {prefix: (data_shape, data_summary)} ret = {} diff --git a/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py b/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py index df8997f48206739a87f9a27f9e5119a5cf7529b8..acc25705a0d364d451e58d20828aac6c043ca3ec 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/apis_match/apis_match.py @@ -176,7 +176,7 @@ class APIList: _get_uni_io(self.api_list, self.framework) def _read_line(self, line): - prefix, dump_step, _, data_type, data_shape, data_summary = line + prefix, dump_step, _, data_type, data_shape, data_summary, md5_nume = line api_data = APIDataNode(data_shape, data_type, data_summary) def _read_prefix(prefix): diff --git a/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py b/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py index a89dc00845eee76e8ab4a3116d710e21399aa657..6207693ddcaa77dbd9603345358a90f9b6561543 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/ms_dump/hooks.py @@ -11,6 +11,7 @@ from functools import lru_cache from pathlib import Path from xml.etree.ElementPath import ops +import hashlib import mindspore as ms from mindspore import Tensor from mindspore.common import mutable @@ -154,12 +155,13 @@ class DumpUtil(object): class DataInfo(object): - def __init__(self, data, save_data, summary_data, dtype, shape): + def __init__(self, data, save_data, summary_data, dtype, shape, md5_nume): self.data = data self.save_data = save_data self.summary_data = summary_data self.dtype = dtype self.shape = shape + self.md5_nume = md5_nume def get_not_float_tensor_info(data, compute_summary): @@ -182,7 +184,8 @@ def get_not_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def get_scalar_data_info(data, compute_summary): @@ -190,7 +193,8 @@ def get_scalar_data_info(data, compute_summary): summary_data = [data, data, data] else: summary_data = [math.nan] * 3 - return DataInfo(data, data, summary_data, str(type(data)), []) + md5_nume = hashlib.md5(str(data).encode()).hexdigest() + return DataInfo(data, data, summary_data, str(type(data)), [], md5_nume) def get_float_tensor_info(data, compute_summary): @@ -208,7 +212,8 @@ def get_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, dtype, tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def set_dump_path(fpath=None): @@ -261,7 +266,7 @@ def dump_data(dump_file_name, dump_step, prefix, data_info, dump_type): else: np.save(output_path, data_info.save_data) os.chmod(output_path, 0o400) - json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data], f) + json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data, data_info.md5_nume], f) f.write('\n') diff --git a/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py b/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py index 08bf1d1bef24d61202db88723ccd5828633e1e62..4c19a627e5b2fb01fa355cab2742ed013175425d 100644 --- a/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py +++ b/troubleshooter/troubleshooter/migrator/api_dump/pt_dump/dump/dump.py @@ -25,6 +25,7 @@ from collections import defaultdict from functools import lru_cache, partial import re +import hashlib import numpy as np import torch @@ -53,12 +54,13 @@ NNCount = defaultdict(int) class DataInfo(object): - def __init__(self, data, save_data, summary_data, dtype, shape): + def __init__(self, data, save_data, summary_data, dtype, shape, md5_nume): self.data = data self.save_data = save_data self.summary_data = summary_data self.dtype = dtype self.shape = shape + self.md5_nume = md5_nume def get_not_float_tensor_info(data, compute_summary): @@ -81,7 +83,8 @@ def get_not_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def get_scalar_data_info(data, compute_summary): @@ -89,7 +92,8 @@ def get_scalar_data_info(data, compute_summary): summary_data = [data, data, data] else: summary_data = [math.nan] * 3 - return DataInfo(data, data, summary_data, str(type(data)), []) + md5_nume = hashlib.md5(str(data).encode()).hexdigest() + return DataInfo(data, data, summary_data, str(type(data)), [], md5_nume) def get_float_tensor_info(data, compute_summary): @@ -103,7 +107,8 @@ def get_float_tensor_info(data, compute_summary): tensor_min = math.nan tensor_mean = math.nan summary_data = [tensor_max, tensor_min, tensor_mean] - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) + md5_nume = hashlib.md5(saved_tensor).hexdigest() + return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape), md5_nume) def json_dump_condition(prefix): @@ -162,7 +167,7 @@ def dump_data(dump_file_name, dump_step, prefix, data_info, dump_npy): else: np.save(output_path, data_info.save_data) os.chmod(output_path, 0o400) - json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data], f) + json.dump([prefix, dump_step, [], data_info.dtype, data_info.shape, data_info.summary_data, data_info.md5_nume], f) f.write('\n')