记得标注每种方法是否为侵入式
CPU
pyinstrument
可以生成heat graph和每一行代码的时间,不用自己二次清洗和可视化数据,算是比较简单易用的
内存
针对非复杂对象的数据类型,sys.getsizeof()
和pympler可用
import sys
prob = [float(i) for i in range(1000000)]
print(sys.getsizeof(prob ))
# 使用前需要pip install pympler安装pympler
from pympler import asizeof
prob = [float(i) for i in range(1000000)]
print(asizeof.asizeof(prob))
memory_profiler可以逐行监控函数的内存开销,但遇到复杂函数会明显耗时增加甚至卡死
监控的函数可以带输入
# 使用前需要pip install memory_profiler安装memory_profiler
from memory_profiler import profile
@profile(precision=4, stream=open('memory_profiler.log', 'w+'))
def test(prob):
prob_dict = {}
width = int(onp.log2(len(prob3)))
counter = 0
for p in prob:
prob_dict[bin(counter)[2:].zfill(width)] = p
counter += 1
return prob_dict
prob1 = [0.2, 0.2, 0, 0.6]
test(prob1 )
GPU
使用pytorch
import torch
if not torch.cuda.is_available():
print("CUDA is not available.")
else:
num_gpus = torch.cuda.device_count()
for i in range(num_gpus):
with torch.cuda.device(i): # get the i th GPU
memory_stats = torch.cuda.memory_stats()
allocated_memory = memory_stats['allocated_bytes.all.current'] # in bytes
reserved_memory = memory_stats['reserved_bytes.all.current'] # in bytes
max_allocated_memory = memory_stats['allocated_bytes.all.peak'] # in bytes
print(f"GPU {i}:")
print(f" Allocated GPU memory: {allocated_memory / (1024 ** 3):.2f} GB")
print(f" Reserved GPU memory: {reserved_memory / (1024 ** 3):.2f} GB")
print(f" Max allocated GPU memory: {max_allocated_memory / (1024 ** 3):.2f} GB")
使用GPUtil
# 使用前需要pip install GPUtil安装GPUtil
import GPUtil
gpus = GPUtil.getGPUs()
for gpu in gpus:
print(f"GPU {gpu.id}:")
print(f" GPU load: {gpu.load * 100:.2f}%")
print(f" GPU Memory Used: {gpu.memoryUsed} MB / {gpu.memoryTotal} MB")
print(f" GPU Memory Utilization: {gpu.memoryUtil * 100:.2f}%")
py3nvml?
import py3nvml
def monitor_gpus():
pynvml.nvmlInit() # 初始化 NVML 库
try:
gpu_count = pynvml.nvmlDeviceGetCount() # 获取 GPU 数量
for i in range(gpu_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i) # 获取 GPU i 的句柄
name = pynvml.nvmlDeviceGetName(handle).decode('utf-8') # 获取 GPU 名称
memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle) # 获取显存信息
total_memory = memory_info.total / (1024 ** 3) # 总显存,单位 GB
free_memory = memory_info.free / (1024 ** 3) # 空闲显存,单位 GB
used_memory = memory_info.used / (1024 ** 3) # 已用显存,单位 GB
memory_utilization = (used_memory / total_memory) * 100 # 显存使用率,百分比
print(f"GPU {i}: {name}")
print(f" 总显存: {total_memory:.2f} GB")
print(f" 已用显存: {used_memory:.2f} GB")
print(f" 空闲显存: {free_memory:.2f} GB")
print(f" 显存使用率: {memory_utilization:.2f}%\n")
finally:
pynvml.nvmlShutdown() # 释放资源
if __name__ == "__main__":
monitor_gpus()