文档

PyTorch Tensor 基础 —— 从 NumPy 到 GPU

目标

理解 Tensor 的创建、操作、形状变换
掌握 CPU ↔ GPU 迁移
理解 Tensor 与 NumPy ndarray 的互操作
掌握 autograd 自动微分基础

完整代码

import torch
import numpy as np

# ============================================================
# 1. Tensor 创建
# ============================================================

# 从列表创建
a = torch.tensor([1, 2, 3, 4])
print(f"从列表创建: {a}")

# 从 NumPy 创建（共享内存）
np_arr = np.array([5, 6, 7, 8])
b = torch.from_numpy(np_arr)
np_arr[0] = 99
print(f"NumPy → Tensor (共享内存): {b}")  # b[0] 也变成 99

# 特殊张量
zeros = torch.zeros(3, 4)           # 全零 3×4
ones = torch.ones(2, 3)             # 全一 2×3
rand = torch.rand(3, 3)             # [0,1) 均匀分布
randn = torch.randn(3, 3)           # 标准正态分布
arange = torch.arange(0, 10, 2)    # 类似 Python range
linspace = torch.linspace(0, 1, 5) # 等分

# 指定数据类型
int_tensor = torch.tensor([1, 2, 3], dtype=torch.int32)
float_tensor = torch.tensor([1, 2, 3], dtype=torch.float32)
print(f"\nint32: {int_tensor.dtype}, float32: {float_tensor.dtype}")

# ============================================================
# 2. Tensor 属性
# ============================================================

x = torch.randn(4, 3, 28, 28)  # (batch, channel, height, width)
print(f"\n形状: {x.shape}")
print(f"维度数: {x.dim()}")
print(f"元素总数: {x.numel()}")
print(f"数据类型: {x.dtype}")
print(f"所在设备: {x.device}")

# ============================================================
# 3. 索引与切片
# ============================================================

t = torch.arange(1, 13).reshape(3, 4)
print(f"\n原始张量:\n{t}")

print(f"第1行: {t[0]}")
print(f"第2列: {t[:, 1]}")
print(f"前两行后两列:\n{t[:2, -2:]}")
print(f"按条件筛选: {t[t > 5]}")

# 花式索引
indices = torch.tensor([0, 2])
print(f"选取第0和第2行:\n{t[indices]}")

# ============================================================
# 4. 形状操作
# ============================================================

a = torch.arange(8)
print(f"\n原始: {a}")

# reshape / view（view 要求内存连续）
print(f"reshape 2×4:\n{a.reshape(2, 4)}")
print(f"reshape 4×2:\n{a.view(4, 2)}")

# 转置
m = torch.randn(3, 4)
print(f"转置:\n{m.T} 形状: {m.T.shape}")

# 添加维度
x = torch.tensor([1, 2, 3])
print(f"unsqueeze(0): {x.unsqueeze(0).shape}")  # (1, 3)
print(f"unsqueeze(1): {x.unsqueeze(1).shape}")  # (3, 1)

# 移除维度
y = torch.randn(1, 3, 1, 5)
print(f"squeeze: {y.squeeze().shape}")           # (3, 5)

# 拼接
a = torch.randn(2, 3)
b = torch.randn(2, 3)
print(f"dim=0 拼接: {torch.cat([a, b], dim=0).shape}")  # (4, 3)
print(f"dim=1 拼接: {torch.cat([a, b], dim=1).shape}")  # (2, 6)
print(f"stack 新维度: {torch.stack([a, b], dim=0).shape}")  # (2, 2, 3)

# ============================================================
# 5. 数学运算
# ============================================================

a = torch.randn(3, 4)
b = torch.randn(3, 4)

# 逐元素运算
add = a + b
mul = a * b          # 逐元素乘法（不是矩阵乘法）
div = a / b

# 矩阵乘法
c = torch.randn(4, 5)
matmul1 = a @ c              # Python 3.5+
matmul2 = torch.matmul(a, c)
matmul3 = torch.mm(a, c)     # 仅限 2D

# 聚合
print(f"\n求和: {a.sum()}, 均值: {a.mean()}, 最大值: {a.max()}")
print(f"按维度求和: {a.sum(dim=0).shape}")   # (4,)
print(f"按维度求和保持维度: {a.sum(dim=0, keepdim=True).shape}")  # (1, 4)

# ============================================================
# 6. GPU 迁移
# ============================================================

if torch.cuda.is_available():
    device = torch.device("cuda")
    x_gpu = x.to(device)
    print(f"\n✅ GPU Tensor: {x_gpu.device}")

    # GPU 运算
    y_gpu = torch.randn(3, 3, device="cuda")
    result = x_gpu[:3, :3] @ y_gpu

    # 回传到 CPU
    result_cpu = result.cpu()
    print(f"✅ 回传 CPU: {result_cpu.device}")

    # 直接创建在 GPU 上
    gpu_tensor = torch.ones(4, 4, device="cuda")

# ============================================================
# 7. autograd 自动微分
# ============================================================

# 需要计算梯度
x = torch.tensor([2.0, 3.0], requires_grad=True)
w = torch.tensor([0.5, 1.0], requires_grad=True)
b = torch.tensor(0.1, requires_grad=True)

# 前向传播：y = w·x + b
y = (w * x).sum() + b
print(f"\ny = {y.item():.4f}")

# 反向传播
y.backward()
print(f"∂y/∂x = {x.grad}")  # 应等于 w = [0.5, 1.0]
print(f"∂y/∂w = {w.grad}")  # 应等于 x = [2.0, 3.0]
print(f"∂y/∂b = {b.grad}")  # 应等于 1.0

# 梯度清零（训练循环中必须）
x.grad.zero_()
w.grad.zero_()
b.grad.zero_()

运行输出示例

从列表创建: tensor([1, 2, 3, 4])
NumPy → Tensor (共享内存): tensor([99,  6,  7,  8])

形状: torch.Size([4, 3, 28, 28])
维度数: 4
元素总数: 9408

y = 4.1000
∂y/∂x = tensor([0.5000, 1.0000])
∂y/∂w = tensor([2., 3.])
∂y/∂b = tensor(1.)

关键要点

概念	说明
`torch.tensor()` vs `torch.Tensor()`	前者是工厂函数（复制数据），后者是构造器（未初始化）
`from_numpy()`	与 NumPy 共享内存，修改会互相影响
`.to(device)`	通用设备迁移（CPU / CUDA / MPS）
`requires_grad=True`	标记需要追踪梯度
`.backward()`	自动计算所有 `requires_grad=True` 张量的梯度
`.grad`	存储计算出的梯度
`torch.no_grad()`	上下文管理器，禁用梯度计算（推理时用）

信息

路径: /tech-stacks/pytorch/examples/01-tensor-basics.md
更新时间: 2026/5/30