Home Page
Search
\begin{md} # 动态语言参数和返回值的一种检查方法 在使用动态语言的大型项目中,往往会由于类型问题带来很多潜藏的Bug。比较明显的有如下两个: 1. 缺少对函数参数类型进行严格校验 2. 函数内存在多个返回不同类型数据/不同数量返回值的返回点 为了减少这个问题带来的麻烦,设计并实现了下面一种处理办法。通过在运行时统计函数参数类型,函数参数个数,函数返回值类型,函数返回值个数等信息, 来侦测异常的函数调用和返回。这里以 Python 为例,进行简单实现。 ## 实现和使用 实现代码如下: ```python import os, sys, json class LogCall(object): def __init__(self) -> None: self.m_call = {} self.m_pure_call = {} self.m_return = {} self.cnt_call = 0 self.cnt_pure_call = 0 self.cnt_return = 0 self.write_call_interval = 500 self.write_return_interval = 500 self.call_low_proportion = 0.01 self.return_low_proportion = 0.01 self.call_filter = ["src"] # 白名单,只统计文件路径路径中含有这些关键字的文件 self.return_filter = ["src"] # 白名单,只统计文件路径路径中含有这些关键字的文件 self.call_blacklist = ["_log"] # 黑名单,不统计文件路径中含有这些关键字的文件 self.return_blacklist = ["_log"] # 黑名单,不统计文件路径中含有这些关键字的文件 self.data_dir = "/home/test/" # python 基础数据类型 self.python_types = ["int", "float", "complex", "str", "bool", "list", "tuple", "set", "dict", "bytes", "bytearray", "NoneType"] self.flag_use_user_type = True # 是否使用 'UserType' 代替各种非原生类型 def print_call(self): """统计数据写入文件""" json_str = json.dumps(self.m_call, indent=2) path = os.path.join(self.data_dir, "__call.json") with open(path, "w") as f: f.write(json_str) def print_pure_call(self): """统计数据写入文件""" json_str = json.dumps(self.m_pure_call, indent=2) path = os.path.join(self.data_dir, "__pure_call.json") with open(path, "w") as f: f.write(json_str) def print_return(self): """统计数据写入文件""" json_str = json.dumps(self.m_return, indent=2) path = os.path.join(self.data_dir, "__return.json") with open(path, "w") as f: f.write(json_str) def print_low_proportion_call(self): """打印低比例的函数调用""" m = {} # low proportion call dict for key, item in self.m_call.items(): total = sum([cnt for cnt in item.values()]) for cnt in item.values(): if cnt / total < self.call_low_proportion: m[key] = item break m_str = json.dumps(m, indent=2) path = os.path.join(self.data_dir, "__call_low_proportion.json") with open(path, "w") as f: f.write(m_str) def print_many_types_pure_call(self): """打印低比例的函数调用""" m = {} # low proportion call dict for key, item in self.m_pure_call.items(): if len(item.keys()) >= 2: m[key] = item m_str = json.dumps(m, indent=2) path = os.path.join(self.data_dir, "__pure_call_many_types.json") with open(path, "w") as f: f.write(m_str) def print_low_proportion_return(self): """打印低比例的函数返回""" m = {} # low proportion return dict for key, item in self.m_return.items(): total = sum([cnt for cnt in item.values()]) for cnt in item.values(): if cnt / total < self.call_low_proportion: m[key] = item break m_str = json.dumps(m, indent=2) path = os.path.join(self.data_dir, "__return_low_proportion.json") with open(path, "w") as f: f.write(m_str) def get_type(self, arg): t = str(type(arg)).split("'")[-2] #
--> int if self.flag_use_user_type and t not in self.python_types: # 只记录原生类型,其他类型一律记录为 UserType t = "UserType" return t def add_call(self, key:str, place_args:dict) -> None: args = [self.get_type(a) for a in place_args.values()] args = str(args) if key in self.m_call: v = self.m_call[key] if args in v: v[args] += 1 else: v[args] = 1 else: self.m_call[key] = {} self.m_call[key][args] = 1 self.cnt_call += 1 if (self.cnt_call % self.write_call_interval) == 0: self.print_call() self.print_low_proportion_call() def add_pure_call(self, key:str, args:dict) -> None: args = [self.get_type(a) for a in args.values()] args = str(args) if key in self.m_pure_call: v = self.m_pure_call[key] if args in v: v[args] += 1 else: v[args] = 1 else: self.m_pure_call[key] = {} self.m_pure_call[key][args] = 1 self.cnt_pure_call += 1 if (self.cnt_pure_call % self.write_call_interval) == 0: self.print_pure_call() self.print_many_types_pure_call() def add_return(self, key:str, returns) -> None: if isinstance(returns, list): rsps = ",".join([self.get_type(a) for a in returns]) rsps = f"[{rsps}]" elif isinstance(returns, tuple): rsps = ",".join([self.get_type(a) for a in returns]) rsps = f"({rsps})" elif isinstance(returns, dict): rsps = ",".join([self.get_type(a) for a in returns.values()]) rsps = f"{{{rsps}}}" else: rsps = self.get_type(returns) if key in self.m_return: v = self.m_return[key] if rsps in v: v[rsps] += 1 else: v[rsps] = 1 else: self.m_return[key] = {} self.m_return[key][rsps] = 1 self.cnt_return += 1 if (self.cnt_return % self.write_return_interval) == 0: self.print_return() self.print_low_proportion_return() def func_trace(frame, event, arg): func_line = frame.f_lineno func_name = frame.f_code.co_name file_name = frame.f_code.co_filename logger:LogCall = mylog if event == "call": # 如果函数有默认参数,则删除实参中的默认参数 code = frame.f_code locals = frame.f_locals args = {} for arg_name in code.co_varnames[:code.co_argcount]: args[arg_name] = locals[arg_name] for keyword in logger.call_blacklist: if keyword in file_name: return func_trace for keyword in logger.call_filter: if keyword in file_name: key = f"{file_name}:{func_line}:{func_name}" logger.add_pure_call(key, args) break caller = frame.f_back if not caller: return func_trace caller_func_line = caller.f_lineno caller_func_name = caller.f_code.co_name caller_file_name = caller.f_code.co_filename for keyword in logger.call_blacklist: if keyword in caller_file_name or keyword in file_name: return func_trace for keyword in logger.call_filter: if keyword in caller_file_name and keyword in file_name: # 调用函数和被调用函数都需要为业务代码,不然会统计到太多的标准库调用 key = f"{caller_file_name}:{caller_func_line}:{caller_func_name}-{file_name}:{func_name}" logger.add_call(key, args) return func_trace elif event == "return": for keyword in logger.return_blacklist: if keyword in file_name: return func_trace for keyword in logger.call_filter: if keyword in file_name: rsps = arg key = f"{file_name}:{func_line}:{func_name}" logger.add_return(key, rsps) return func_trace return func_trace mylog = LogCall() ## 用法 # 在主循环启动函数中添加如下代码 sys.settrace(func_trace) ``` 在项目中简单使用了这个方案进行检测,查看统计数据,发现了一处函数参数过多问题(*args 传了过多数据), 一处返回值类型不对问题(应该返回 bool 类型的接口返回了 int 类型), 一处函数参数类型错误问题。 这个方案还是有一定效果的。如果能够长时间运行,通过分析不同参数调用格式,也能凸显异常的调用。 ## 拓展方案 1. 结合编写了类型提示的函数,检查出不符合类型提示的代码。 2. 讲得到的函数参数调用类型数据和返回值类型数据以类型提示的格式写回到源码文件,并使用静态代码检查工具(pyright 等) 来分析添加了类型提示的代码, 侦测和类型提示不符合的代码。 \end{md}
Home Page