diff --git a/app/helper/memory.py b/app/helper/memory.py index 57e7aeec..7cb0d76c 100644 --- a/app/helper/memory.py +++ b/app/helper/memory.py @@ -2,8 +2,10 @@ import gc import sys import threading import time +import os +import tracemalloc from datetime import datetime -from typing import Optional +from typing import Optional, Dict, List, Tuple import psutil from pympler import muppy, summary, asizeof @@ -30,6 +32,10 @@ class MemoryHelper(metaclass=Singleton): self._memory_snapshot_dir = settings.LOG_PATH / "memory_snapshots" # 保留的快照文件数量 self._keep_count = settings.MEMORY_SNAPSHOT_KEEP_COUNT + + # 启用tracemalloc以获得更详细的内存信息 + if not tracemalloc.is_tracing(): + tracemalloc.start(25) # 保留25个帧 @eventmanager.register(EventType.ConfigChanged) def handle_config_changed(self, event: Event): @@ -108,15 +114,21 @@ class MemoryHelper(metaclass=Singleton): logger.info(f"开始创建内存快照: {snapshot_file}") - # 第一步:写入基本信息和对象类型统计 - self._write_basic_info(snapshot_file, memory_usage) + # 第一步:写入基本信息和系统内存统计 + self._write_system_memory_info(snapshot_file, memory_usage) - # 第二步:分析并写入类实例内存使用情况 + # 第二步:写入Python对象类型统计 + self._write_python_objects_info(snapshot_file) + + # 第三步:分析并写入类实例内存使用情况 self._append_class_analysis(snapshot_file) - # 第三步:分析并写入大内存变量详情 + # 第四步:分析并写入大内存变量详情 self._append_variable_analysis(snapshot_file) + # 第五步:分析内存泄漏和增长趋势 + self._append_memory_leak_analysis(snapshot_file) + logger.info(f"内存快照已保存: {snapshot_file}, 当前内存使用: {memory_usage / 1024 / 1024:.2f} MB") # 清理过期的快照文件(保留最近30个) @@ -125,30 +137,450 @@ class MemoryHelper(metaclass=Singleton): except Exception as e: logger.error(f"创建内存快照失败: {e}") - @staticmethod - def _write_basic_info(snapshot_file, memory_usage): + def _write_system_memory_info(self, snapshot_file, memory_usage): """ - 写入基本信息和对象类型统计 + 写入系统内存信息 """ - # 获取当前进程的内存使用情况 - all_objects = muppy.get_objects() - sum1 = summary.summarize(all_objects) - + process = psutil.Process() + memory_info = process.memory_info() + memory_percent = process.memory_percent() + + # 获取系统总内存信息 + system_memory = psutil.virtual_memory() + + # 获取内存映射信息 + memory_maps = process.memory_maps() + with open(snapshot_file, 'w', encoding='utf-8') as f: f.write(f"内存快照时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") - f.write(f"当前进程内存使用: {memory_usage / 1024 / 1024:.2f} MB\n") f.write("=" * 80 + "\n") - f.write("对象类型统计:\n") + f.write("系统内存使用情况:\n") f.write("-" * 80 + "\n") + f.write(f"当前进程内存使用: {memory_usage / 1024 / 1024:.2f} MB\n") + f.write(f"进程内存使用率: {memory_percent:.2f}%\n") + f.write(f"系统总内存: {system_memory.total / 1024 / 1024 / 1024:.2f} GB\n") + f.write(f"系统可用内存: {system_memory.available / 1024 / 1024 / 1024:.2f} GB\n") + f.write(f"系统内存使用率: {system_memory.percent:.2f}%\n") + f.write(f"进程RSS内存: {memory_info.rss / 1024 / 1024:.2f} MB\n") + f.write(f"进程VMS内存: {memory_info.vms / 1024 / 1024:.2f} MB\n") + f.write(f"进程共享内存: {memory_info.shared / 1024 / 1024:.2f} MB\n") + f.write(f"进程文本段: {memory_info.text / 1024 / 1024:.2f} MB\n") + f.write(f"进程数据段: {memory_info.data / 1024 / 1024:.2f} MB\n") + + # 分析内存映射 + f.write("\n内存映射分析:\n") + f.write("-" * 80 + "\n") + memory_regions = self._analyze_memory_maps(memory_maps) + for region_type, size_mb in memory_regions.items(): + f.write(f"{region_type}: {size_mb:.2f} MB\n") + + f.flush() + def _analyze_memory_maps(self, memory_maps) -> Dict[str, float]: + """ + 分析内存映射,按类型分类统计 + """ + regions = {} + for mmap in memory_maps: + size_mb = mmap.size / 1024 / 1024 + perms = mmap.perms + + if 'r' in perms and 'w' in perms: + region_type = "读写内存" + elif 'r' in perms and 'x' in perms: + region_type = "代码段" + elif 'r' in perms: + region_type = "只读内存" + else: + region_type = "其他内存" + + if region_type in regions: + regions[region_type] += size_mb + else: + regions[region_type] = size_mb + + return regions + + def _write_python_objects_info(self, snapshot_file): + """ + 写入Python对象类型统计信息 + """ + # 获取当前tracemalloc统计 + current, peak = tracemalloc.get_traced_memory() + + # 获取所有对象 + all_objects = muppy.get_objects() + sum1 = summary.summarize(all_objects) + + # 计算Python对象总内存 + python_total_mb = 0 + for line in summary.format_(sum1): + if '|' in line and line.strip() and not line.startswith('=') and not line.startswith('-'): + parts = line.split('|') + if len(parts) >= 3: + try: + size_str = parts[2].strip() + if 'MB' in size_str: + size_mb = float(size_str.replace('MB', '').strip()) + python_total_mb += size_mb + except: + pass + + with open(snapshot_file, 'a', encoding='utf-8') as f: + f.write("\n" + "=" * 80 + "\n") + f.write("Python内存使用情况:\n") + f.write("-" * 80 + "\n") + f.write(f"tracemalloc当前内存: {current / 1024 / 1024:.2f} MB\n") + f.write(f"tracemalloc峰值内存: {peak / 1024 / 1024:.2f} MB\n") + f.write(f"Python对象总内存: {python_total_mb:.2f} MB\n") + f.write(f"未统计内存(可能为C扩展): {self._get_unaccounted_memory():.2f} MB\n") + + f.write("\n对象类型统计:\n") + f.write("-" * 80 + "\n") # 写入对象统计信息 for line in summary.format_(sum1): f.write(line + "\n") - - # 立即刷新到磁盘 + f.flush() - logger.debug("基本信息已写入快照文件") + def _get_unaccounted_memory(self) -> float: + """ + 计算未统计的内存(可能是C扩展、系统缓存等) + """ + try: + # 获取进程总内存 + process = psutil.Process() + total_memory = process.memory_info().rss / 1024 / 1024 # MB + + # 获取Python对象总内存 + all_objects = muppy.get_objects() + sum1 = summary.summarize(all_objects) + + python_total_mb = 0 + for line in summary.format_(sum1): + if '|' in line and line.strip() and not line.startswith('=') and not line.startswith('-'): + parts = line.split('|') + if len(parts) >= 3: + try: + size_str = parts[2].strip() + if 'MB' in size_str: + size_mb = float(size_str.replace('MB', '').strip()) + python_total_mb += size_mb + except: + pass + + return max(0, total_memory - python_total_mb) + except: + return 0.0 + + def _append_memory_leak_analysis(self, snapshot_file): + """ + 分析内存泄漏和增长趋势 + """ + with open(snapshot_file, 'a', encoding='utf-8') as f: + f.write("\n" + "=" * 80 + "\n") + f.write("内存泄漏分析:\n") + f.write("-" * 80 + "\n") + + # 获取tracemalloc统计 + current, peak = tracemalloc.get_traced_memory() + f.write(f"当前tracemalloc内存: {current / 1024 / 1024:.2f} MB\n") + f.write(f"tracemalloc峰值内存: {peak / 1024 / 1024:.2f} MB\n") + + # 获取内存分配统计 + try: + stats = tracemalloc.get_traced_memory() + f.write(f"内存分配统计: {stats}\n") + + # 获取前10个内存分配最多的位置 + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics('lineno') + + f.write("\n内存分配最多的位置 (前10个):\n") + f.write("-" * 80 + "\n") + for i, stat in enumerate(top_stats[:10], 1): + f.write(f"{i:2d}. {stat.count:>8} 个对象, {stat.size / 1024 / 1024:>8.2f} MB\n") + f.write(f" {stat.traceback.format()}\n") + + except Exception as e: + f.write(f"获取内存分配统计失败: {e}\n") + + # 垃圾回收统计 + f.write("\n垃圾回收统计:\n") + f.write("-" * 80 + "\n") + for i in range(3): + count = gc.get_count()[i] + f.write(f"GC代 {i}: {count} 次\n") + + # 获取不可达对象数量 + unreachable = len(gc.garbage) + f.write(f"不可达对象数量: {unreachable}\n") + + f.flush() + + logger.debug("内存泄漏分析已完成并写入") + + def create_detailed_memory_analysis(self): + """ + 创建详细的内存分析报告,专门用于诊断内存问题 + """ + try: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + analysis_file = self._memory_snapshot_dir / f"detailed_memory_analysis_{timestamp}.txt" + + logger.info(f"开始创建详细内存分析: {analysis_file}") + + with open(analysis_file, 'w', encoding='utf-8') as f: + f.write(f"详细内存分析报告 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write("=" * 100 + "\n\n") + + # 1. 系统级内存分析 + self._write_detailed_system_analysis(f) + + # 2. Python对象深度分析 + self._write_detailed_python_analysis(f) + + # 3. 内存映射详细分析 + self._write_detailed_memory_maps(f) + + # 4. 大对象分析 + self._write_detailed_large_objects(f) + + # 5. 内存泄漏检测 + self._write_memory_leak_detection(f) + + logger.info(f"详细内存分析已保存: {analysis_file}") + return analysis_file + + except Exception as e: + logger.error(f"创建详细内存分析失败: {e}") + return None + + def _write_detailed_system_analysis(self, f): + """ + 写入详细的系统内存分析 + """ + f.write("1. 系统级内存分析\n") + f.write("-" * 50 + "\n") + + process = psutil.Process() + memory_info = process.memory_info() + + f.write(f"进程ID: {process.pid}\n") + f.write(f"进程名称: {process.name()}\n") + f.write(f"进程命令行: {' '.join(process.cmdline())}\n\n") + + f.write("内存使用详情:\n") + f.write(f" RSS (物理内存): {memory_info.rss / 1024 / 1024:.2f} MB\n") + f.write(f" VMS (虚拟内存): {memory_info.vms / 1024 / 1024:.2f} MB\n") + f.write(f" 共享内存: {memory_info.shared / 1024 / 1024:.2f} MB\n") + f.write(f" 文本段: {memory_info.text / 1024 / 1024:.2f} MB\n") + f.write(f" 数据段: {memory_info.data / 1024 / 1024:.2f} MB\n") + f.write(f" 库内存: {memory_info.lib / 1024 / 1024:.2f} MB\n") + f.write(f" 脏页: {memory_info.dirty / 1024 / 1024:.2f} MB\n") + + # 系统内存信息 + system_memory = psutil.virtual_memory() + f.write(f"\n系统内存:\n") + f.write(f" 总内存: {system_memory.total / 1024 / 1024 / 1024:.2f} GB\n") + f.write(f" 可用内存: {system_memory.available / 1024 / 1024 / 1024:.2f} GB\n") + f.write(f" 使用率: {system_memory.percent:.2f}%\n") + f.write(f" 缓存: {system_memory.cached / 1024 / 1024 / 1024:.2f} GB\n") + f.write(f" 缓冲区: {system_memory.buffers / 1024 / 1024 / 1024:.2f} GB\n") + + f.write("\n" + "=" * 100 + "\n\n") + + def _write_detailed_python_analysis(self, f): + """ + 写入详细的Python对象分析 + """ + f.write("2. Python对象深度分析\n") + f.write("-" * 50 + "\n") + + # 强制垃圾回收 + collected = gc.collect() + f.write(f"垃圾回收清理对象数: {collected}\n\n") + + # 获取所有对象 + all_objects = muppy.get_objects() + f.write(f"总对象数: {len(all_objects):,}\n") + + # 按类型统计 + type_stats = {} + for obj in all_objects: + obj_type = type(obj).__name__ + if obj_type not in type_stats: + type_stats[obj_type] = {'count': 0, 'size': 0} + type_stats[obj_type]['count'] += 1 + type_stats[obj_type]['size'] += sys.getsizeof(obj) + + # 按大小排序 + sorted_types = sorted(type_stats.items(), key=lambda x: x[1]['size'], reverse=True) + + f.write("对象类型统计 (按内存大小排序):\n") + f.write(f"{'类型':<20} {'数量':<10} {'总大小(MB)':<12} {'平均大小(B)':<12}\n") + f.write("-" * 60 + "\n") + + total_python_memory = 0 + for obj_type, stats in sorted_types[:20]: # 只显示前20个 + size_mb = stats['size'] / 1024 / 1024 + avg_size = stats['size'] / stats['count'] if stats['count'] > 0 else 0 + total_python_memory += size_mb + f.write(f"{obj_type:<20} {stats['count']:<10,} {size_mb:<12.2f} {avg_size:<12.1f}\n") + + f.write(f"\nPython对象总内存: {total_python_memory:.2f} MB\n") + + # 计算未统计内存 + process = psutil.Process() + total_memory = process.memory_info().rss / 1024 / 1024 + unaccounted = total_memory - total_python_memory + f.write(f"未统计内存: {unaccounted:.2f} MB ({unaccounted/total_memory*100:.1f}%)\n") + + f.write("\n" + "=" * 100 + "\n\n") + + def _write_detailed_memory_maps(self, f): + """ + 写入详细的内存映射分析 + """ + f.write("3. 内存映射详细分析\n") + f.write("-" * 50 + "\n") + + process = psutil.Process() + memory_maps = process.memory_maps() + + # 按权限分类 + perm_stats = {} + file_stats = {} + + for mmap in memory_maps: + size_mb = mmap.size / 1024 / 1024 + perms = mmap.perms + + # 按权限统计 + if perms not in perm_stats: + perm_stats[perms] = {'count': 0, 'size': 0} + perm_stats[perms]['count'] += 1 + perm_stats[perms]['size'] += size_mb + + # 按文件统计 + if mmap.path: + if mmap.path not in file_stats: + file_stats[mmap.path] = {'count': 0, 'size': 0} + file_stats[mmap.path]['count'] += 1 + file_stats[mmap.path]['size'] += size_mb + + f.write("按权限分类的内存映射:\n") + f.write(f"{'权限':<10} {'数量':<8} {'大小(MB)':<12}\n") + f.write("-" * 35 + "\n") + for perms, stats in sorted(perm_stats.items(), key=lambda x: x[1]['size'], reverse=True): + f.write(f"{perms:<10} {stats['count']:<8} {stats['size']:<12.2f}\n") + + f.write(f"\n按文件分类的内存映射 (前10个):\n") + f.write(f"{'文件路径':<50} {'大小(MB)':<12}\n") + f.write("-" * 70 + "\n") + for path, stats in sorted(file_stats.items(), key=lambda x: x[1]['size'], reverse=True)[:10]: + if len(path) > 47: + path = path[:44] + "..." + f.write(f"{path:<50} {stats['size']:<12.2f}\n") + + f.write("\n" + "=" * 100 + "\n\n") + + def _write_detailed_large_objects(self, f): + """ + 写入大对象详细分析 + """ + f.write("4. 大对象详细分析\n") + f.write("-" * 50 + "\n") + + all_objects = muppy.get_objects() + large_objects = [] + + for obj in all_objects: + try: + size = asizeof.asizeof(obj) + if size > 1024 * 1024: # 大于1MB的对象 + large_objects.append((obj, size)) + except: + continue + + # 按大小排序 + large_objects.sort(key=lambda x: x[1], reverse=True) + + f.write(f"大对象 (>1MB) 数量: {len(large_objects)}\n\n") + + for i, (obj, size) in enumerate(large_objects[:20], 1): # 只显示前20个 + size_mb = size / 1024 / 1024 + obj_type = type(obj).__name__ + + f.write(f"{i:2d}. {obj_type} - {size_mb:.2f} MB\n") + + # 尝试获取更多信息 + try: + if isinstance(obj, dict): + f.write(f" 字典项数: {len(obj)}\n") + if obj: + sample_keys = list(obj.keys())[:3] + f.write(f" 示例键: {sample_keys}\n") + elif isinstance(obj, (list, tuple)): + f.write(f" 元素数量: {len(obj)}\n") + elif isinstance(obj, str): + f.write(f" 字符串长度: {len(obj)}\n") + if len(obj) > 100: + f.write(f" 内容预览: {obj[:100]}...\n") + else: + f.write(f" 内容: {obj}\n") + elif hasattr(obj, '__dict__'): + f.write(f" 属性数量: {len(obj.__dict__)}\n") + if hasattr(obj, '__class__'): + f.write(f" 类名: {obj.__class__.__name__}\n") + except: + pass + + f.write("\n") + + f.write("=" * 100 + "\n\n") + + def _write_memory_leak_detection(self, f): + """ + 写入内存泄漏检测 + """ + f.write("5. 内存泄漏检测\n") + f.write("-" * 50 + "\n") + + # tracemalloc分析 + current, peak = tracemalloc.get_traced_memory() + f.write(f"tracemalloc当前内存: {current / 1024 / 1024:.2f} MB\n") + f.write(f"tracemalloc峰值内存: {peak / 1024 / 1024:.2f} MB\n") + + try: + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics('lineno') + + f.write(f"\n内存分配最多的位置 (前15个):\n") + f.write("-" * 50 + "\n") + for i, stat in enumerate(top_stats[:15], 1): + f.write(f"{i:2d}. {stat.count:>8} 个对象, {stat.size / 1024 / 1024:>8.2f} MB\n") + for line in stat.traceback.format(): + f.write(f" {line}\n") + f.write("\n") + except Exception as e: + f.write(f"获取tracemalloc统计失败: {e}\n") + + # 垃圾回收分析 + f.write("垃圾回收分析:\n") + f.write("-" * 50 + "\n") + gc_counts = gc.get_count() + f.write(f"GC计数: {gc_counts}\n") + + # 检查不可达对象 + unreachable = len(gc.garbage) + f.write(f"不可达对象数量: {unreachable}\n") + if unreachable > 0: + f.write("不可达对象详情:\n") + for i, obj in enumerate(gc.garbage[:5], 1): # 只显示前5个 + f.write(f" {i}. {type(obj).__name__} - {id(obj)}\n") + + f.write("\n" + "=" * 100 + "\n\n") def _append_class_analysis(self, snapshot_file): """ @@ -455,3 +887,110 @@ class MemoryHelper(metaclass=Singleton): except Exception as e: logger.debug(f"获取变量名失败: {e}") return f"{type(obj).__name__}_{id(obj)}" + + def get_memory_summary(self) -> Dict[str, float]: + """ + 获取内存使用摘要 + """ + try: + process = psutil.Process() + memory_info = process.memory_info() + + # 获取Python对象总内存 + all_objects = muppy.get_objects() + sum1 = summary.summarize(all_objects) + + python_total_mb = 0 + for line in summary.format_(sum1): + if '|' in line and line.strip() and not line.startswith('=') and not line.startswith('-'): + parts = line.split('|') + if len(parts) >= 3: + try: + size_str = parts[2].strip() + if 'MB' in size_str: + size_mb = float(size_str.replace('MB', '').strip()) + python_total_mb += size_mb + except: + pass + + total_memory = memory_info.rss / 1024 / 1024 + unaccounted = total_memory - python_total_mb + + return { + 'total_memory_mb': total_memory, + 'python_objects_mb': python_total_mb, + 'unaccounted_mb': unaccounted, + 'unaccounted_percent': (unaccounted / total_memory * 100) if total_memory > 0 else 0 + } + except Exception as e: + logger.error(f"获取内存摘要失败: {e}") + return {} + + def force_garbage_collection(self): + """ + 强制垃圾回收并返回清理的对象数量 + """ + try: + collected = gc.collect() + logger.info(f"强制垃圾回收完成,清理了 {collected} 个对象") + return collected + except Exception as e: + logger.error(f"强制垃圾回收失败: {e}") + return 0 + + def analyze_memory_growth(self, interval_seconds: int = 300) -> Dict[str, float]: + """ + 分析内存增长趋势 + :param interval_seconds: 分析间隔(秒) + :return: 内存增长信息 + """ + try: + # 获取当前内存使用 + current_summary = self.get_memory_summary() + + # 等待指定时间 + time.sleep(interval_seconds) + + # 获取新的内存使用 + new_summary = self.get_memory_summary() + + if current_summary and new_summary: + growth_info = { + 'total_growth_mb': new_summary['total_memory_mb'] - current_summary['total_memory_mb'], + 'python_growth_mb': new_summary['python_objects_mb'] - current_summary['python_objects_mb'], + 'unaccounted_growth_mb': new_summary['unaccounted_mb'] - current_summary['unaccounted_mb'], + 'growth_rate_mb_per_hour': (new_summary['total_memory_mb'] - current_summary['total_memory_mb']) * 3600 / interval_seconds + } + + logger.info(f"内存增长分析: 总增长 {growth_info['total_growth_mb']:.2f} MB, " + f"Python对象增长 {growth_info['python_growth_mb']:.2f} MB, " + f"未统计增长 {growth_info['unaccounted_growth_mb']:.2f} MB") + + return growth_info + + return {} + + except Exception as e: + logger.error(f"分析内存增长失败: {e}") + return {} + + +# 使用示例 +if __name__ == "__main__": + # 创建内存分析器实例 + memory_helper = MemoryHelper() + + # 获取内存摘要 + summary = memory_helper.get_memory_summary() + print("内存使用摘要:") + for key, value in summary.items(): + print(f" {key}: {value:.2f}") + + # 创建详细分析报告 + analysis_file = memory_helper.create_detailed_memory_analysis() + if analysis_file: + print(f"详细分析报告已保存到: {analysis_file}") + + # 强制垃圾回收 + collected = memory_helper.force_garbage_collection() + print(f"垃圾回收清理了 {collected} 个对象")