PyTorch Tensor 基础 —— 从 NumPy 到 GPU
目标
- 理解 Tensor 的创建、操作、形状变换
- 掌握 CPU ↔ GPU 迁移
- 理解 Tensor 与 NumPy ndarray 的互操作
- 掌握 autograd 自动微分基础
完整代码
import torch
import numpy as np
# ============================================================
# 1. Tensor 创建
# ============================================================
# 从列表创建
a = torch.tensor([1, 2, 3, 4])
print(f"从列表创建: {a}")
# 从 NumPy 创建(共享内存)
np_arr = np.array([5, 6, 7, 8])
b = torch.from_numpy(np_arr)
np_arr[0] = 99
print(f"NumPy → Tensor (共享内存): {b}") # b[0] 也变成 99
# 特殊张量
zeros = torch.zeros(3, 4) # 全零 3×4
ones = torch.ones(2, 3) # 全一 2×3
rand = torch.rand(3, 3) # [0,1) 均匀分布
randn = torch.randn(3, 3) # 标准正态分布
arange = torch.arange(0, 10, 2) # 类似 Python range
linspace = torch.linspace(0, 1, 5) # 等分
# 指定数据类型
int_tensor = torch.tensor([1, 2, 3], dtype=torch.int32)
float_tensor = torch.tensor([1, 2, 3], dtype=torch.float32)
print(f"\nint32: {int_tensor.dtype}, float32: {float_tensor.dtype}")
# ============================================================
# 2. Tensor 属性
# ============================================================
x = torch.randn(4, 3, 28, 28) # (batch, channel, height, width)
print(f"\n形状: {x.shape}")
print(f"维度数: {x.dim()}")
print(f"元素总数: {x.numel()}")
print(f"数据类型: {x.dtype}")
print(f"所在设备: {x.device}")
# ============================================================
# 3. 索引与切片
# ============================================================
t = torch.arange(1, 13).reshape(3, 4)
print(f"\n原始张量:\n{t}")
print(f"第1行: {t[0]}")
print(f"第2列: {t[:, 1]}")
print(f"前两行后两列:\n{t[:2, -2:]}")
print(f"按条件筛选: {t[t > 5]}")
# 花式索引
indices = torch.tensor([0, 2])
print(f"选取第0和第2行:\n{t[indices]}")
# ============================================================
# 4. 形状操作
# ============================================================
a = torch.arange(8)
print(f"\n原始: {a}")
# reshape / view(view 要求内存连续)
print(f"reshape 2×4:\n{a.reshape(2, 4)}")
print(f"reshape 4×2:\n{a.view(4, 2)}")
# 转置
m = torch.randn(3, 4)
print(f"转置:\n{m.T} 形状: {m.T.shape}")
# 添加维度
x = torch.tensor([1, 2, 3])
print(f"unsqueeze(0): {x.unsqueeze(0).shape}") # (1, 3)
print(f"unsqueeze(1): {x.unsqueeze(1).shape}") # (3, 1)
# 移除维度
y = torch.randn(1, 3, 1, 5)
print(f"squeeze: {y.squeeze().shape}") # (3, 5)
# 拼接
a = torch.randn(2, 3)
b = torch.randn(2, 3)
print(f"dim=0 拼接: {torch.cat([a, b], dim=0).shape}") # (4, 3)
print(f"dim=1 拼接: {torch.cat([a, b], dim=1).shape}") # (2, 6)
print(f"stack 新维度: {torch.stack([a, b], dim=0).shape}") # (2, 2, 3)
# ============================================================
# 5. 数学运算
# ============================================================
a = torch.randn(3, 4)
b = torch.randn(3, 4)
# 逐元素运算
add = a + b
mul = a * b # 逐元素乘法(不是矩阵乘法)
div = a / b
# 矩阵乘法
c = torch.randn(4, 5)
matmul1 = a @ c # Python 3.5+
matmul2 = torch.matmul(a, c)
matmul3 = torch.mm(a, c) # 仅限 2D
# 聚合
print(f"\n求和: {a.sum()}, 均值: {a.mean()}, 最大值: {a.max()}")
print(f"按维度求和: {a.sum(dim=0).shape}") # (4,)
print(f"按维度求和保持维度: {a.sum(dim=0, keepdim=True).shape}") # (1, 4)
# ============================================================
# 6. GPU 迁移
# ============================================================
if torch.cuda.is_available():
device = torch.device("cuda")
x_gpu = x.to(device)
print(f"\n✅ GPU Tensor: {x_gpu.device}")
# GPU 运算
y_gpu = torch.randn(3, 3, device="cuda")
result = x_gpu[:3, :3] @ y_gpu
# 回传到 CPU
result_cpu = result.cpu()
print(f"✅ 回传 CPU: {result_cpu.device}")
# 直接创建在 GPU 上
gpu_tensor = torch.ones(4, 4, device="cuda")
# ============================================================
# 7. autograd 自动微分
# ============================================================
# 需要计算梯度
x = torch.tensor([2.0, 3.0], requires_grad=True)
w = torch.tensor([0.5, 1.0], requires_grad=True)
b = torch.tensor(0.1, requires_grad=True)
# 前向传播:y = w·x + b
y = (w * x).sum() + b
print(f"\ny = {y.item():.4f}")
# 反向传播
y.backward()
print(f"∂y/∂x = {x.grad}") # 应等于 w = [0.5, 1.0]
print(f"∂y/∂w = {w.grad}") # 应等于 x = [2.0, 3.0]
print(f"∂y/∂b = {b.grad}") # 应等于 1.0
# 梯度清零(训练循环中必须)
x.grad.zero_()
w.grad.zero_()
b.grad.zero_()
运行输出示例
从列表创建: tensor([1, 2, 3, 4])
NumPy → Tensor (共享内存): tensor([99, 6, 7, 8])
形状: torch.Size([4, 3, 28, 28])
维度数: 4
元素总数: 9408
y = 4.1000
∂y/∂x = tensor([0.5000, 1.0000])
∂y/∂w = tensor([2., 3.])
∂y/∂b = tensor(1.)
关键要点
| 概念 |
说明 |
torch.tensor() vs torch.Tensor() |
前者是工厂函数(复制数据),后者是构造器(未初始化) |
from_numpy() |
与 NumPy 共享内存,修改会互相影响 |
.to(device) |
通用设备迁移(CPU / CUDA / MPS) |
requires_grad=True |
标记需要追踪梯度 |
.backward() |
自动计算所有 requires_grad=True 张量的梯度 |
.grad |
存储计算出的梯度 |
torch.no_grad() |
上下文管理器,禁用梯度计算(推理时用) |