Python的性能监控工具

记得标注每种方法是否为侵入式

CPU

pyinstrument

可以生成heat graph和每一行代码的时间，不用自己二次清洗和可视化数据，算是比较简单易用的

内存

针对非复杂对象的数据类型，sys.getsizeof()和pympler可用

import sys

prob = [float(i) for i in range(1000000)]
print(sys.getsizeof(prob ))

# 使用前需要pip install pympler安装pympler
from pympler import asizeof

prob = [float(i) for i in range(1000000)]
print(asizeof.asizeof(prob))

memory_profiler可以逐行监控函数的内存开销，但遇到复杂函数会明显耗时增加甚至卡死

监控的函数可以带输入

# 使用前需要pip install memory_profiler安装memory_profiler
from memory_profiler import profile

@profile(precision=4, stream=open('memory_profiler.log', 'w+'))
def test(prob):
    prob_dict = {}
    width = int(onp.log2(len(prob3)))
    counter = 0
    for p in prob:
        prob_dict[bin(counter)[2:].zfill(width)] = p
        counter += 1
    return prob_dict

prob1 = [0.2, 0.2, 0, 0.6]
test(prob1 )

GPU

使用pytorch

import torch

if not torch.cuda.is_available():
    print("CUDA is not available.")
else:
    num_gpus = torch.cuda.device_count()
    for i in range(num_gpus):
        with torch.cuda.device(i):  # get the i th GPU
            memory_stats = torch.cuda.memory_stats()
            allocated_memory = memory_stats['allocated_bytes.all.current']  # in bytes
            reserved_memory = memory_stats['reserved_bytes.all.current']  # in bytes
            max_allocated_memory = memory_stats['allocated_bytes.all.peak']  # in bytes

            print(f"GPU {i}:")
            print(f"  Allocated GPU memory: {allocated_memory / (1024 ** 3):.2f} GB")
            print(f"  Reserved GPU memory: {reserved_memory / (1024 ** 3):.2f} GB")
            print(f"  Max allocated GPU memory: {max_allocated_memory / (1024 ** 3):.2f} GB")

使用GPUtil

# 使用前需要pip install GPUtil安装GPUtil
import GPUtil

gpus = GPUtil.getGPUs()
for gpu in gpus:
    print(f"GPU {gpu.id}:")
    print(f"  GPU load: {gpu.load * 100:.2f}%")
    print(f"  GPU Memory Used: {gpu.memoryUsed} MB / {gpu.memoryTotal} MB")
    print(f"  GPU Memory Utilization: {gpu.memoryUtil * 100:.2f}%")

py3nvml？

import py3nvml

def monitor_gpus():
    pynvml.nvmlInit()  # 初始化 NVML 库
    try:
        gpu_count = pynvml.nvmlDeviceGetCount()  # 获取 GPU 数量
        for i in range(gpu_count):
            handle = pynvml.nvmlDeviceGetHandleByIndex(i)  # 获取 GPU i 的句柄
            name = pynvml.nvmlDeviceGetName(handle).decode('utf-8')  # 获取 GPU 名称
            memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)  # 获取显存信息

            total_memory = memory_info.total / (1024 ** 3)  # 总显存，单位 GB
            free_memory = memory_info.free / (1024 ** 3)  # 空闲显存，单位 GB
            used_memory = memory_info.used / (1024 ** 3)  # 已用显存，单位 GB
            memory_utilization = (used_memory / total_memory) * 100  # 显存使用率，百分比

            print(f"GPU {i}: {name}")
            print(f"  总显存: {total_memory:.2f} GB")
            print(f"  已用显存: {used_memory:.2f} GB")
            print(f"  空闲显存: {free_memory:.2f} GB")
            print(f"  显存使用率: {memory_utilization:.2f}%\n")
    finally:
        pynvml.nvmlShutdown()  # 释放资源

if __name__ == "__main__":
    monitor_gpus()

Python的性能监控工具

CPU

内存

GPU

推荐阅读更多精彩内容