python小练习:涉及print,json,numpy
2024-10-21 05:08:06
枚举参考文件夹中的文件,并与待比较文件件中的同名文件比较是否一致。
#! /usr/bin/python3.6
# -*- coding:utf-8 -*-
import os
import sys
import json
import numpy as np
from sqlalchemy import false
def cmp_file(ref_file: str, dst_file: str) -> bool:
ref_base_name = os.path.basename(ref_file)
dst_base_name = os.path.basename(dst_file)
assert os.path.exists(ref_file), f"ref file not exist: {ref_base_name}"
if not os.path.exists(dst_file):
print(f'dst file not exist: {dst_base_name}')
return false
ref_data = np.fromfile(ref_file, dtype=np.ubyte, count=-1)
dst_data = np.fromfile(dst_file, dtype=np.ubyte, count=-1)
is_equal = np.array_equal(ref_data, dst_data)
print(is_equal, ": ", ref_base_name)
return is_equal
def cmp_dir(ref_dir: str, dst_dir: str) -> None:
print(f'\n==========>>> Start compare {ref_dir} and {dst_dir}')
ref_names = os.listdir(ref_dir)
for name in ref_names:
ref_file = os.path.join(ref_dir, name)
dst_file = os.path.join(dst_dir, name)
cmp_file(ref_file, dst_file)
def main():
if len(sys.argv) < 2:
print('usage: dump_dir_cmp.py dir_config.json')
return
json_file = sys.argv[1]
with open(json_file) as fp:
js_data = json.load(fp)
for dst_dir in js_data['dst_dirs']:
cmp_dir(js_data['ref_dir'], dst_dir)
if (__name__ == '__main__'):
main()
配置样例:
{
"ref_dir": "./dump_data/NPU_DUMPF001_P0/tensorflow_squeezenet_task0_loop0",
"dst_dirs": [
"./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task0_loop0",
"./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task1_loop0",
"./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task2_loop0",
"./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task3_loop0",
"./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task4_loop0",
"./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task5_loop0"
]
}
样例2(re匹配):
#! /usr/bin/python3.6
# -*- coding:utf-8 -*-
# cmp_dump_pickle_dir.py
import os
import re
import sys
import numpy as np
from numpy.linalg import norm
import pickle
import shutil
from sklearn.metrics.pairwise import cosine_similarity
def vec_similarity(v1: np.array, v2: np.array):
sim = cosine_similarity(v1.reshape(1, v1.size), v2.reshape(1, v2.size))
return sim[0][0]
# norm2 = norm(v1) * norm(v2)
# cosine = np.dot(v1,v2) / norm2
# return cosine
def re_find_file(dir: str, op_name: str) -> str:
for fname in os.listdir(dir): # 分组匹配: (...|...)
re_dst = re.search(f"{op_name}_(out_[\S]*|out\d).bin$", fname)
if re_dst is not None:
return re_dst.group()
return None
def cmp_file(ref_file: str, dst_file: str, dtype: str) -> bool:
ref_base_name = os.path.basename(ref_file)
dst_base_name = os.path.basename(dst_file)
assert os.path.exists(ref_file), f"ref file not exist: {ref_base_name}"
assert os.path.exists(dst_file), f"dst file not exist: {dst_base_name}"
ref_data = np.fromfile(ref_file, dtype=dtype, count=-1)
dst_data = np.fromfile(dst_file, dtype=dtype, count=-1)
if dtype == 'float32' or dtype == 'float16':
sim = vec_similarity(ref_data, dst_data)
print(sim > 0.95, f", simularity={sim} : ", ref_base_name)
return (sim > 0.95)
is_equal = np.array_equal(ref_data, dst_data)
print(is_equal, ": ", ref_base_name)
return is_equal
def cmp_dir(ref_dir: str, dst_dir: str) -> None:
print(f'\n==========>>> Start compare {ref_dir} and {dst_dir}')
patten = re.compile(r"_op_out_[\S]*.bin$")
ref_names = os.listdir(ref_dir)
not_exist_ops = []
for ref_name in ref_names:
assert re.match(r"[\S]*_op_out_[\S]*.bin$", ref_name) is not None, f"bad file name: {ref_name}"
dtype = ref_name[ref_name.rfind('_') + 1:ref_name.rfind('.')]
mdl_name = ref_name[0:patten.search(ref_name).span()[0]]
dst_name = re_find_file(dst_dir, mdl_name)
if dst_name is None:
not_exist_ops.append(mdl_name)
continue
ref_file = os.path.join(ref_dir, ref_name)
dst_file = os.path.join(dst_dir, dst_name)
cmp_file(ref_file, dst_file, dtype=dtype)
print(f'\nNot exist ops: {not_exist_ops}')
def dump_pickle_file(pickle_file: str, out_bin_dir: str, force_dtype_u8: bool) -> None:
def is_float_type(data_buff: np.ndarray) -> bool:
return data_buff.dtype == np.float16 or data_buff.dtype == np.float32
with open(pickle_file, "rb") as f:
op_ref = pickle.load(f)
for i, (key, value) in enumerate(op_ref.items()):
data_buff = value.flatten()
# print("layer: ", key, " shape: ", value.shape, " type: ", value.dtype, " size: ", value.size)
dtype = 'uint8' if force_dtype_u8 and is_float_type(data_buff) else data_buff.dtype
print("pickle key: %30s, size: %7d, dtype: %s" % (key, value.itemsize * value.size, data_buff.dtype))
data_buff.tofile(os.path.join(out_bin_dir, key.replace("/", "_") + f"_op_out_{dtype}.bin"))
#print("op ref: type ", type(op_ref), op_ref.size)
#print("op shape: type ", op_ref['data'].shape)
def mkdir(dir: str) -> None:
if os.path.exists(dir):
shutil.rmtree(dir)
os.mkdir(dir)
def main():
assert len(sys.argv) >= 4, 'usage: dump_dir_cmp.py pickle_file pickle_out_dir dst_dump_dir [force_dtype_u8]'
force_dtype_u8 = True if len(sys.argv) >= 5 and sys.argv[4] == 'force_dtype_u8' else False
#np.seterr('raise')
mkdir(sys.argv[2])
dump_pickle_file(sys.argv[1], sys.argv[2], force_dtype_u8)
cmp_dir(sys.argv[2], sys.argv[3])
if (__name__ == '__main__'):
main()
最新文章
- 怎么给我的Office文档加密
- php +html5 websocket 聊天室
- 数据传输:JSON,XML
- Java中类方法与实例方法的区别
- 展讯NAND Flash高级教程【转】
- [转载]C#获取本机IPv4地址
- 第一个asp.net实例——生日邀请以及回函
- activty栈管理
- Java多线程10:join()方法
- javascript原型模式概念解读
- jmeter 测试MD5加密登录接口
- Javascript框架 - ExtJs - 类
- 【读书笔记】iOS-使用传感器
- APP-6-百度地图导航
- (笔记)Mysql命令alter add:增加表的字段
- 注意字符串的strlen与sizeof的差别
- PHP使用FPDF pdf添加水印中文乱码问题 pdf合并版本问题
- ubuntu 访问 共享 windows文件夹
- Android传感器开发
- Quartz 的使用
热门文章
- MariaDB数据库 主-从 部署
- ELK套件部署
- [Python]-pdb模块-单步调试
- .NET静态代码织入——肉夹馍(Rougamo) 发布1.2.0
- Redis 的大 Key 对持久化有什么影响?
- JS 模块化 - 03 AMD 规范与 Require JS
- 使用 Windows 包管理器 (winget) 安装 .Net
- 企业运维 | MySQL关系型数据库在Docker与Kubernetes容器环境中快速搭建部署主从实践
- [题解] Atcoder Beginner Contest ABC 265 Ex No-capture Lance Game DP,二维FFT
- 关于VirtualBox在新建虚拟机时-选择操作系统类型后没有64位的版本选项