深度学习从入门到实践

说实话，刚开始接触深度学习的时候，我真的是一头雾水。什么神经网络、反向传播、梯度下降，听着就让人头疼。但当我真正动手写代码、跑项目的时候，我发现这玩意儿真的太酷了！今天就和大家分享一下我在深度学习方面的一些学习心得和实践经验。

什么是深度学习？

深度学习是机器学习的一个分支，它使用多层神经网络来学习数据的复杂模式。简单来说，就是让计算机像人一样学习。

深度学习的特点

多层结构：使用多层神经网络
自动特征提取：自动学习特征，不需要人工设计
端到端学习：从输入直接学习到输出
大数据驱动：需要大量数据进行训练
计算密集型：需要强大的计算资源

深度学习基础

1. 神经网络基础

# 简单的神经网络实现
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # 初始化权重和偏置
        self.weights1 = np.random.randn(input_size, hidden_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size)
        self.bias2 = np.zeros((1, output_size))
        
    def forward(self, X):
        # 前向传播
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def sigmoid(self, x):
        # Sigmoid激活函数
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        # Sigmoid导数
        return x * (1 - x)
    
    def backward(self, X, y, output):
        # 反向传播
        m = X.shape[0]
        
        # 计算输出层误差
        delta2 = (output - y) * self.sigmoid_derivative(output)
        
        # 计算隐藏层误差
        delta1 = np.dot(delta2, self.weights2.T) * self.sigmoid_derivative(self.a1)
        
        # 更新权重和偏置
        self.weights2 -= (1/m) * np.dot(self.a1.T, delta2)
        self.bias2 -= (1/m) * np.sum(delta2, axis=0, keepdims=True)
        self.weights1 -= (1/m) * np.dot(X.T, delta1)
        self.bias1 -= (1/m) * np.sum(delta1, axis=0, keepdims=True)
    
    def train(self, X, y, epochs, learning_rate):
        # 训练神经网络
        for epoch in range(epochs):
            # 前向传播
            output = self.forward(X)
            
            # 计算损失
            loss = np.mean((output - y) ** 2)
            
            # 反向传播
            self.backward(X, y, output)
            
            # 打印进度
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')
    
    def predict(self, X):
        # 预测
        return self.forward(X)

# 使用示例
if __name__ == "__main__":
    # 创建数据
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])
    
    # 创建神经网络
    nn = NeuralNetwork(2, 4, 1)
    
    # 训练网络
    nn.train(X, y, 1000, 0.1)
    
    # 测试网络
    print("预测结果:")
    for x in X:
        print(f"输入: {x}, 预测: {nn.predict(x.reshape(1, -1))[0][0]:.4f}")

2. 激活函数

# 激活函数实现
import numpy as np
import matplotlib.pyplot as plt

class ActivationFunctions:
    @staticmethod
    def sigmoid(x):
        """Sigmoid函数"""
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def sigmoid_derivative(x):
        """Sigmoid函数的导数"""
        return x * (1 - x)
    
    @staticmethod
    def tanh(x):
        """Tanh函数"""
        return np.tanh(x)
    
    @staticmethod
    def tanh_derivative(x):
        """Tanh函数的导数"""
        return 1 - np.tanh(x) ** 2
    
    @staticmethod
    def relu(x):
        """ReLU函数"""
        return np.maximum(0, x)
    
    @staticmethod
    def relu_derivative(x):
        """ReLU函数的导数"""
        return np.where(x > 0, 1, 0)
    
    @staticmethod
    def leaky_relu(x, alpha=0.01):
        """Leaky ReLU函数"""
        return np.where(x > 0, x, alpha * x)
    
    @staticmethod
    def leaky_relu_derivative(x, alpha=0.01):
        """Leaky ReLU函数的导数"""
        return np.where(x > 0, 1, alpha)
    
    @staticmethod
    def softmax(x):
        """Softmax函数"""
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    @staticmethod
    def plot_activation_functions():
        """绘制各种激活函数的图像"""
        x = np.linspace(-5, 5, 100)
        
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        fig.suptitle('Activation Functions', fontsize=16)
        
        # Sigmoid
        axes[0, 0].plot(x, ActivationFunctions.sigmoid(x))
        axes[0, 0].set_title('Sigmoid')
        axes[0, 0].grid(True)
        
        # Tanh
        axes[0, 1].plot(x, ActivationFunctions.tanh(x))
        axes[0, 1].set_title('Tanh')
        axes[0, 1].grid(True)
        
        # ReLU
        axes[0, 2].plot(x, ActivationFunctions.relu(x))
        axes[0, 2].set_title('ReLU')
        axes[0, 2].grid(True)
        
        # Leaky ReLU
        axes[1, 0].plot(x, ActivationFunctions.leaky_relu(x))
        axes[1, 0].set_title('Leaky ReLU')
        axes[1, 0].grid(True)
        
        # Softmax
        x_softmax = np.array([[1, 2, 3], [4, 5, 6]])
        y_softmax = ActivationFunctions.softmax(x_softmax)
        axes[1, 1].bar(range(3), y_softmax[0])
        axes[1, 1].set_title('Softmax (example)')
        axes[1, 1].grid(True)
        
        # 导数比较
        axes[1, 2].plot(x, ActivationFunctions.sigmoid_derivative(ActivationFunctions.sigmoid(x)), label='Sigmoid')
        axes[1, 2].plot(x, ActivationFunctions.relu_derivative(x), label='ReLU')
        axes[1, 2].set_title('Derivatives')
        axes[1, 2].legend()
        axes[1, 2].grid(True)
        
        plt.tight_layout()
        plt.show()

# 使用示例
if __name__ == "__main__":
    ActivationFunctions.plot_activation_functions()

3. 损失函数

# 损失函数实现
import numpy as np

class LossFunctions:
    @staticmethod
    def mean_squared_error(y_true, y_pred):
        """均方误差"""
        return np.mean((y_true - y_pred) ** 2)
    
    @staticmethod
    def mean_squared_error_derivative(y_true, y_pred):
        """均方误差的导数"""
        return 2 * (y_pred - y_true) / y_true.size
    
    @staticmethod
    def binary_cross_entropy(y_true, y_pred):
        """二元交叉熵"""
        # 避免log(0)
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    @staticmethod
    def binary_cross_entropy_derivative(y_true, y_pred):
        """二元交叉熵的导数"""
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return (y_pred - y_true) / (y_pred * (1 - y_pred))
    
    @staticmethod
    def categorical_cross_entropy(y_true, y_pred):
        """分类交叉熵"""
        # 避免log(0)
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.sum(y_true * np.log(y_pred)) / y_true.shape[0]
    
    @staticmethod
    def categorical_cross_entropy_derivative(y_true, y_pred):
        """分类交叉熵的导数"""
        return (y_pred - y_true) / y_true.shape[0]
    
    @staticmethod
    def hinge_loss(y_true, y_pred):
        """铰链损失（用于SVM）"""
        return np.mean(np.maximum(0, 1 - y_true * y_pred))
    
    @staticmethod
    def huber_loss(y_true, y_pred, delta=1.0):
        """Huber损失"""
        error = y_true - y_pred
        return np.where(np.abs(error) <= delta, 
                       0.5 * error ** 2, 
                       delta * (np.abs(error) - 0.5 * delta))
    
    @staticmethod
    def plot_loss_functions():
        """绘制损失函数的图像"""
        x = np.linspace(-2, 2, 100)
        y_true = np.ones_like(x)
        
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle('Loss Functions', fontsize=16)
        
        # MSE
        y_pred_mse = x
        mse_loss = LossFunctions.mean_squared_error(y_true, y_pred_mse)
        axes[0, 0].plot(x, mse_loss)
        axes[0, 0].set_title('Mean Squared Error')
        axes[0, 0].grid(True)
        
        # Binary Cross Entropy
        y_pred_bce = 1 / (1 + np.exp(-x))
        bce_loss = LossFunctions.binary_cross_entropy(y_true, y_pred_bce)
        axes[0, 1].plot(x, bce_loss)
        axes[0, 1].set_title('Binary Cross Entropy')
        axes[0, 1].grid(True)
        
        # Hinge Loss
        y_pred_hinge = x
        hinge_loss = LossFunctions.hinge_loss(y_true, y_pred_hinge)
        axes[1, 0].plot(x, hinge_loss)
        axes[1, 0].set_title('Hinge Loss')
        axes[1, 0].grid(True)
        
        # Huber Loss
        y_pred_huber = x
        huber_loss = LossFunctions.huber_loss(y_true, y_pred_huber)
        axes[1, 1].plot(x, huber_loss)
        axes[1, 1].set_title('Huber Loss')
        axes[1, 1].grid(True)
        
        plt.tight_layout()
        plt.show()

# 使用示例
if __name__ == "__main__":
    LossFunctions.plot_loss_functions()

深度学习框架

1. PyTorch入门

# PyTorch基础示例
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# 1. 创建张量
def create_tensors():
    """创建和操作张量"""
    print("=== 创建张量 ===")
    
    # 从numpy数组创建
    numpy_array = np.array([[1, 2], [3, 4]])
    tensor_from_numpy = torch.from_numpy(numpy_array)
    print(f"从numpy创建的张量:\n{tensor_from_numpy}")
    
    # 随机创建张量
    random_tensor = torch.rand(2, 3)
    print(f"随机张量:\n{random_tensor}")
    
    # 全零张量
    zero_tensor = torch.zeros(2, 2)
    print(f"全零张量:\n{zero_tensor}")
    
    # 张量操作
    x = torch.tensor([1, 2, 3], dtype=torch.float32)
    y = torch.tensor([4, 5, 6], dtype=torch.float32)
    
    print(f"加法: {x + y}")
    print(f"乘法: {x * y}")
    print(f"矩阵乘法: {torch.matmul(x.unsqueeze(1), y.unsqueeze(0))}")

# 2. 神经网络定义
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 3. 数据处理
def prepare_data():
    """准备数据"""
    # 创建合成数据
    np.random.seed(42)
    X = np.random.randn(1000, 10)  # 1000个样本，10个特征
    y = np.random.randint(0, 2, 1000)  # 二分类标签
    
    # 转换为PyTorch张量
    X_tensor = torch.FloatTensor(X)
    y_tensor = torch.LongTensor(y)
    
    # 创建数据集
    dataset = TensorDataset(X_tensor, y_tensor)
    
    # 创建数据加载器
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    return dataloader

# 4. 模型训练
def train_model():
    """训练模型"""
    print("\n=== 训练模型 ===")
    
    # 准备数据
    dataloader = prepare_data()
    
    # 创建模型
    model = SimpleNN(input_size=10, hidden_size=64, output_size=2)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # 训练循环
    num_epochs = 10
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, target) in enumerate(dataloader):
            # 前向传播
            outputs = model(data)
            loss = criterion(outputs, target)
            
            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            if batch_idx % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx}/{len(dataloader)}], Loss: {loss.item():.4f}')
        
        print(f'Epoch [{epoch+1}/{num_epochs}] 平均损失: {running_loss/len(dataloader):.4f}')
    
    return model

# 5. 模型评估
def evaluate_model(model, dataloader):
    """评估模型"""
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in dataloader:
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    print(f'准确率: {accuracy:.2f}%')
    return accuracy

# 6. 模型保存和加载
def save_and_load_model():
    """保存和加载模型"""
    print("\n=== 保存和加载模型 ===")
    
    # 训练模型
    model = train_model()
    
    # 保存模型
    torch.save(model.state_dict(), 'simple_nn.pth')
    print("模型已保存到 simple_nn.pth")
    
    # 加载模型
    loaded_model = SimpleNN(input_size=10, hidden_size=64, output_size=2)
    loaded_model.load_state_dict(torch.load('simple_nn.pth'))
    print("模型已加载")
    
    return loaded_model

# 7. GPU加速
def gpu_demo():
    """GPU加速示例"""
    print("\n=== GPU加速示例 ===")
    
    # 检查CUDA是否可用
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    # 创建张量
    x = torch.randn(1000, 1000)
    y = torch.randn(1000, 1000)
    
    # CPU计算
    import time
    start_time = time.time()
    result_cpu = torch.matmul(x, y)
    cpu_time = time.time() - start_time
    
    # GPU计算
    x_gpu = x.to(device)
    y_gpu = y.to(device)
    
    start_time = time.time()
    result_gpu = torch.matmul(x_gpu, y_gpu)
    gpu_time = time.time() - start_time
    
    print(f"CPU时间: {cpu_time:.4f}秒")
    print(f"GPU时间: {gpu_time:.4f}秒")
    print(f"加速比: {cpu_time/gpu_time:.2f}x")

# 主函数
def main():
    print("PyTorch基础示例")
    
    # 创建张量
    create_tensors()
    
    # 训练模型
    model = train_model()
    
    # 评估模型
    dataloader = prepare_data()
    evaluate_model(model, dataloader)
    
    # 保存和加载模型
    loaded_model = save_and_load_model()
    
    # GPU加速
    gpu_demo()

if __name__ == "__main__":
    main()

2. TensorFlow/Keras入门

# TensorFlow/Keras基础示例
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

# 1. 基础操作
def basic_operations():
    """TensorFlow基础操作"""
    print("=== TensorFlow基础操作 ===")
    
    # 创建常量
    const1 = tf.constant([[1, 2, 3], [4, 5, 6]])
    print("常量:")
    print(const1)
    
    # 创建变量
    var1 = tf.Variable([[7, 8, 9], [10, 11, 12]])
    print("变量:")
    print(var1)
    
    # 张量操作
    add_result = tf.add(const1, var1)
    print("相加结果:")
    print(add_result)
    
    # 矩阵乘法
    matmul_result = tf.matmul(const1, tf.transpose(var1))
    print("矩阵乘法结果:")
    print(matmul_result)

# 2. Keras模型构建
def build_model():
    """构建Keras模型"""
    print("\n=== 构建Keras模型 ===")
    
    # 方法1: Sequential API
    model1 = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(10,)),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    print("Sequential模型:")
    model1.summary()
    
    # 方法2: Functional API
    inputs = keras.Input(shape=(10,))
    x = keras.layers.Dense(64, activation='relu')(inputs)
    x = keras.layers.Dense(32, activation='relu')(x)
    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
    
    model2 = keras.Model(inputs=inputs, outputs=outputs)
    print("\nFunctional模型:")
    model2.summary()
    
    return model1, model2

# 3. 模型编译和训练
def compile_and_train():
    """编译和训练模型"""
    print("\n=== 编译和训练模型 ===")
    
    # 创建模型
    model = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(10,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    # 编译模型
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # 生成训练数据
    np.random.seed(42)
    X_train = np.random.randn(1000, 10)
    y_train = np.random.randint(0, 2, 1000)
    
    X_val = np.random.randn(200, 10)
    y_val = np.random.randint(0, 2, 200)
    
    # 训练模型
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=32,
        verbose=1
    )
    
    return model, history

# 4. 模型评估
def evaluate_model():
    """评估模型"""
    print("\n=== 评估模型 ===")
    
    model, history = compile_and_train()
    
    # 生成测试数据
    X_test = np.random.randn(200, 10)
    y_test = np.random.randint(0, 2, 200)
    
    # 评估模型
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"测试损失: {loss:.4f}")
    print(f"测试准确率: {accuracy:.4f}")
    
    # 绘制训练历史
    plt.figure(figsize=(12, 4))
    
    # 准确率
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='训练准确率')
    plt.plot(history.history['val_accuracy'], label='验证准确率')
    plt.title('模型准确率')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # 损失
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='训练损失')
    plt.plot(history.history['val_loss'], label='验证损失')
    plt.title('模型损失')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

# 5. 模型保存和加载
def save_and_load():
    """保存和加载模型"""
    print("\n=== 保存和加载模型 ===")
    
    # 训练模型
    model, _ = compile_and_train()
    
    # 保存模型
    model.save('tensorflow_model.h5')
    print("模型已保存到 tensorflow_model.h5")
    
    # 加载模型
    loaded_model = keras.models.load_model('tensorflow_model.h5')
    print("模型已加载")
    
    # 预测
    X_test = np.random.randn(5, 10)
    predictions = loaded_model.predict(X_test)
    print("预测结果:")
    print(predictions)

# 6. 高级功能：自定义层
class CustomDenseLayer(keras.layers.Layer):
    """自定义密集层"""
    def __init__(self, units, activation=None):
        super(CustomDenseLayer, self).__init__()
        self.units = units
        self.activation = keras.activations.get(activation)
    
    def build(self, input_shape):
        self.kernel = self.add_weight(
            name='kernel',
            shape=(input_shape[-1], self.units),
            initializer='glorot_uniform',
            trainable=True
        )
        self.bias = self.add_weight(
            name='bias',
            shape=(self.units,),
            initializer='zeros',
            trainable=True
        )
        super().build(input_shape)
    
    def call(self, inputs):
        return self.activation(tf.matmul(inputs, self.kernel) + self.bias)

# 7. 使用自定义层
def use_custom_layer():
    """使用自定义层"""
    print("\n=== 使用自定义层 ===")
    
    model = keras.Sequential([
        CustomDenseLayer(64, activation='relu', input_shape=(10,)),
        CustomDenseLayer(32, activation='relu'),
        CustomDenseLayer(1, activation='sigmoid')
    ])
    
    model.summary()

# 8. 数据管道
def create_data_pipeline():
    """创建数据管道"""
    print("\n=== 创建数据管道 ===")
    
    # 创建数据集
    dataset = tf.data.Dataset.from_tensor_slices(
        (np.random.randn(1000, 10), np.random.randint(0, 2, 1000))
    )
    
    # 批处理和预加载
    dataset = dataset.batch(32).prefetch(tf.data.AUTOTUNE)
    
    # 划分训练集和验证集
    train_size = int(0.8 * len(dataset))
    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size)
    
    print(f"训练集大小: {len(train_dataset)}")
    print(f"验证集大小: {len(val_dataset)}")
    
    return train_dataset, val_dataset

# 主函数
def main():
    print("TensorFlow/Keras基础示例")
    
    # 基础操作
    basic_operations()
    
    # 构建模型
    model1, model2 = build_model()
    
    # 编译和训练
    compile_and_train()
    
    # 评估模型
    evaluate_model()
    
    # 保存和加载
    save_and_load()
    
    # 使用自定义层
    use_custom_layer()
    
    # 创建数据管道
    create_data_pipeline()

if __name__ == "__main__":
    main()

深度学习实战项目

1. 图像分类

# 图像分类项目
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

# 1. 数据加载和预处理
def load_data():
    """加载和预处理CIFAR-10数据集"""
    print("=== 加载数据 ===")
    
    # 定义变换
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    # 加载数据集
    train_dataset = torchvision.datasets.CIFAR10(
        root='./data',
        train=True,
        download=True,
        transform=transform
    )
    
    test_dataset = torchvision.datasets.CIFAR10(
        root='./data',
        train=False,
        download=True,
        transform=transform
    )
    
    # 创建数据加载器
    train_loader = DataLoader(
        train_dataset,
        batch_size=64,
        shuffle=True,
        num_workers=2
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=64,
        shuffle=False,
        num_workers=2
    )
    
    # 类别名称
    classes = ('plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck')
    
    return train_loader, test_loader, classes

# 2. 定义CNN模型
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        
        # 第一卷积层
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)
        
        # 第二卷积层
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2)
        
        # 第三卷积层
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(2, 2)
        
        # 全连接层
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # 卷积层1
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        # 卷积层2
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        # 卷积层3
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        
        # 展平
        x = x.view(x.size(0), -1)
        
        # 全连接层
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# 3. 训练函数
def train_model(model, train_loader, criterion, optimizer, device):
    """训练模型"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # 统计
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    
    return train_loss, train_acc

# 4. 测试函数
def test_model(model, test_loader, criterion, device):
    """测试模型"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            
            # 前向传播
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # 统计
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_loss = running_loss / len(test_loader)
    test_acc = 100 * correct / total
    
    return test_loss, test_acc

# 5. 可视化函数
def plot_training_history(train_losses, test_losses, train_accs, test_accs):
    """绘制训练历史"""
    plt.figure(figsize=(12, 4))
    
    # 损失
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='训练损失')
    plt.plot(test_losses, label='测试损失')
    plt.title('训练和测试损失')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    # 准确率
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='训练准确率')
    plt.plot(test_accs, label='测试准确率')
    plt.title('训练和测试准确率')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

# 6. 主训练函数
def main_training():
    """主训练函数"""
    print("=== 开始训练 ===")
    
    # 设备配置
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    # 加载数据
    train_loader, test_loader, classes = load_data()
    
    # 创建模型
    model = CNN(num_classes=10).to(device)
    
    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    # 训练参数
    num_epochs = 10
    train_losses = []
    test_losses = []
    train_accs = []
    test_accs = []
    
    # 训练循环
    for epoch in range(num_epochs):
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        
        # 训练
        train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
        
        # 测试
        test_loss, test_acc = test_model(model, test_loader, criterion, device)
        
        # 更新学习率
        scheduler.step()
        
        # 记录
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        train_accs.append(train_acc)
        test_accs.append(test_acc)
        
        print(f'训练损失: {train_loss:.4f}, 训练准确率: {train_acc:.2f}%')
        print(f'测试损失: {test_loss:.4f}, 测试准确率: {test_acc:.2f}%')
        print('-' * 50)
    
    # 绘制历史
    plot_training_history(train_losses, test_losses, train_accs, test_accs)
    
    return model, train_loader, test_loader, classes

# 7. 可视化预测结果
def visualize_predictions(model, test_loader, classes, num_images=10):
    """可视化预测结果"""
    model.eval()
    
    # 获取一些测试图像
    images, labels = next(iter(test_loader))
    images = images[:num_images]
    labels = labels[:num_images]
    
    # 预测
    with torch.no_grad():
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
    
    # 绘制图像
    fig, axes = plt.subplots(2, 5, figsize=(15, 6))
    fig.suptitle('预测结果', fontsize=16)
    
    for i, ax in enumerate(axes.flat):
        if i < len(images):
            # 反标准化
            img = images[i].numpy().transpose(1, 2, 0)
            img = img * 0.5 + 0.5  # 反标准化
            
            # 显示图像
            ax.imshow(img)
            ax.axis('off')
            
            # 显示预测结果
            true_label = classes[labels[i]]
            pred_label = classes[predicted[i]]
            color = 'green' if true_label == pred_label else 'red'
            ax.set_title(f'真实: {true_label}\n预测: {pred_label}', color=color, fontsize=8)
    
    plt.tight_layout()
    plt.show()

# 8. 主函数
def main():
    print("CIFAR-10图像分类项目")
    
    # 训练模型
    model, train_loader, test_loader, classes = main_training()
    
    # 可视化预测
    visualize_predictions(model, test_loader, classes)

if __name__ == "__main__":
    main()

2. 文本分类

# 文本分类项目
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

# 1. 数据准备
class TextDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_length=100):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_length = max_length
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        # 文本编码
        encoded = [self.vocab.get(word, self.vocab['<UNK>']) for word in text.split()]
        
        # 填充或截断
        if len(encoded) < self.max_length:
            encoded += [self.vocab['<PAD>']] * (self.max_length - len(encoded))
        else:
            encoded = encoded[:self.max_length]
        
        # 转换为张量
        text_tensor = torch.tensor(encoded, dtype=torch.long)
        label_tensor = torch.tensor(label, dtype=torch.long)
        
        return text_tensor, label_tensor

def create_vocab(texts, min_freq=2):
    """创建词汇表"""
    word_counts = Counter()
    for text in texts:
        words = text.split()
        word_counts.update(words)
    
    # 过滤低频词
    vocab = {'<PAD>': 0, '<UNK>': 1}
    for word, count in word_counts.items():
        if count >= min_freq:
            vocab[word] = len(vocab)
    
    return vocab

# 2. 模型定义
class TextClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes, dropout=0.5):
        super(TextClassifier, self).__init__()
        
        # 词嵌入
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # LSTM层
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, 
                           batch_first=True, bidirectional=True, dropout=dropout)
        
        # 全连接层
        self.fc = nn.Linear(hidden_dim * 2, num_classes)
        
        # Dropout
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        # 词嵌入
        embedded = self.embedding(x)
        embedded = self.dropout(embedded)
        
        # LSTM
        lstm_out, (hidden, cell) = self.lstm(embedded)
        
        # 获取最后一个时间步的输出
        output = lstm_out[:, -1, :]
        
        # 全连接层
        output = self.dropout(output)
        output = self.fc(output)
        
        return output

# 3. 数据加载
def load_data():
    """加载和预处理数据"""
    print("=== 加载数据 ===")
    
    # 示例数据
    texts = [
        "I love this movie, it's amazing!",
        "This film was terrible, I hated it.",
        "Great acting and wonderful storyline.",
        "Not recommended, waste of time.",
        "Best movie I've ever seen!",
        "Absolutely awful experience.",
        "The plot was confusing and boring.",
        "Outstanding performance by the cast.",
        "Would watch it again and again.",
        "Poorly written and directed."
    ]
    
    labels = [1, 0, 1, 0, 1, 0, 0, 1, 1, 0]  # 1: 积极, 0: 消极
    
    # 创建词汇表
    vocab = create_vocab(texts)
    
    # 划分训练集和测试集
    train_texts = texts[:8]
    train_labels = labels[:8]
    test_texts = texts[8:]
    test_labels = labels[8:]
    
    # 创建数据集
    train_dataset = TextDataset(train_texts, train_labels, vocab)
    test_dataset = TextDataset(test_texts, test_labels, vocab)
    
    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)
    
    return train_loader, test_loader, vocab

# 4. 训练函数
def train_model(model, train_loader, criterion, optimizer, device):
    """训练模型"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for texts, labels in train_loader:
        texts, labels = texts.to(device), labels.to(device)
        
        # 前向传播
        outputs = model(texts)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # 统计
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    
    return train_loss, train_acc

# 5. 测试函数
def test_model(model, test_loader, criterion, device):
    """测试模型"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for texts, labels in test_loader:
            texts, labels = texts.to(device), labels.to(device)
            
            # 前向传播
            outputs = model(texts)
            loss = criterion(outputs, labels)
            
            # 统计
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_loss = running_loss / len(test_loader)
    test_acc = 100 * correct / total
    
    return test_loss, test_acc

# 6. 主训练函数
def main_training():
    """主训练函数"""
    print("=== 开始训练 ===")
    
    # 设备配置
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    # 加载数据
    train_loader, test_loader, vocab = load_data()
    
    # 模型参数
    vocab_size = len(vocab)
    embedding_dim = 100
    hidden_dim = 128
    num_classes = 2
    
    # 创建模型
    model = TextClassifier(vocab_size, embedding_dim, hidden_dim, num_classes).to(device)
    
    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # 训练参数
    num_epochs = 10
    train_losses = []
    test_losses = []
    train_accs = []
    test_accs = []
    
    # 训练循环
    for epoch in range(num_epochs):
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        
        # 训练
        train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
        
        # 测试
        test_loss, test_acc = test_model(model, test_loader, criterion, device)
        
        # 记录
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        train_accs.append(train_acc)
        test_accs.append(test_acc)
        
        print(f'训练损失: {train_loss:.4f}, 训练准确率: {train_acc:.2f}%')
        print(f'测试损失: {test_loss:.4f}, 测试准确率: {test_acc:.2f}%')
        print('-' * 50)
    
    # 绘制训练历史
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='训练损失')
    plt.plot(test_losses, label='测试损失')
    plt.title('损失曲线')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='训练准确率')
    plt.plot(test_accs, label='测试准确率')
    plt.title('准确率曲线')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    return model, train_loader, test_loader, vocab

# 7. 预测函数
def predict_text(model, text, vocab, device):
    """预测文本"""
    model.eval()
    
    # 文本预处理
    encoded = [vocab.get(word, vocab['<UNK>']) for word in text.split()]
    encoded = encoded[:100]  # 截断到最大长度
    
    # 填充
    if len(encoded) < 100:
        encoded += [vocab['<PAD>']] * (100 - len(encoded))
    
    # 转换为张量
    text_tensor = torch.tensor([encoded], dtype=torch.long).to(device)
    
    # 预测
    with torch.no_grad():
        outputs = model(text_tensor)
        _, predicted = torch.max(outputs, 1)
        probabilities = torch.softmax(outputs, 1)
    
    return predicted.item(), probabilities[0].cpu().numpy()

# 8. 主函数
def main():
    print("文本分类项目")
    
    # 训练模型
    model, train_loader, test_loader, vocab = main_training()
    
    # 预测新文本
    test_texts = [
        "This movie is absolutely fantastic!",
        "I really disliked this film.",
        "Great story and acting.",
        "Terrible experience."
    ]
    
    print("\n=== 预测结果 ===")
    for text in test_texts:
        prediction, probs = predict_text(model, text, vocab, 'cpu')
        sentiment = "积极" if prediction == 1 else "消极"
        confidence = probs[prediction]
        print(f"文本: {text}")
        print(f"情感: {sentiment} (置信度: {confidence:.3f})")
        print("-" * 50)

if __name__ == "__main__":
    main()

深度学习最佳实践

1. 模型优化技巧

# 模型优化技巧
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 1. 权重初始化
def weight_initialization():
    """权重初始化技巧"""
    print("=== 权重初始化 ===")
    
    # 自定义初始化函数
    def he_init(layer):
        if isinstance(layer, nn.Linear):
            nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0)
        elif isinstance(layer, nn.Conv2d):
            nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0)
    
    # 创建模型并应用初始化
    model = nn.Sequential(
        nn.Linear(784, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 10)
    )
    
    # 应用初始化
    model.apply(he_init)
    
    print("权重初始化完成")
    return model

# 2. 学习率调度
def learning_rate_scheduler():
    """学习率调度"""
    print("\n=== 学习率调度 ===")
    
    model = nn.Linear(10, 1)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # 学习率调度器
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    # 训练循环
    for epoch in range(10):
        # 模拟训练
        optimizer.step()
        
        print(f'Epoch {epoch+1}, 学习率: {optimizer.param_groups[0]["lr"]:.6f}')
        
        # 更新学习率
        scheduler.step()

# 3. 正则化技术
def regularization_techniques():
    """正则化技术"""
    print("\n=== 正则化技术 ===")
    
    # L2正则化
    def l2_regularization(model, lambda_l2=0.001):
        l2_norm = 0
        for param in model.parameters():
            l2_norm += torch.norm(param, 2)
        return lambda_l2 * l2_norm
    
    # Dropout
    model_with_dropout = nn.Sequential(
        nn.Linear(784, 256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(128, 10)
    )
    
    print("Dropout模型已创建")
    
    # Batch Normalization
    model_with_bn = nn.Sequential(
        nn.Linear(784, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 10)
    )
    
    print("Batch Normalization模型已创建")
    
    return model_with_dropout, model_with_bn

# 4. 数据增强
def data_augmentation():
    """数据增强"""
    print("\n=== 数据增强 ===")
    
    import torchvision.transforms as transforms
    
    # 图像数据增强
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor()
    ])
    
    print("图像数据增强变换已创建")
    
    # 文本数据增强
    import random
    from nltk.corpus import wordnet
    
    def get_synonyms(word):
        synonyms = set()
        for syn in wordnet.synsets(word):
            for lemma in syn.lemmas():
                synonym = lemma.name().replace('_', ' ').replace('-', ' ')
                if synonym != word:
                    synonyms.add(synonym)
        return list(synonyms)
    
    def synonym_replacement(sentence, n=1):
        words = sentence.split()
        new_words = words.copy()
        random_word_list = list(set([word for word in words if word.isalnum()]))
        random.shuffle(random_word_list)
        
        num_replaced = 0
        for random_word in random_word_list:
            synonyms = get_synonyms(random_word)
            if len(synonyms) >= 1:
                synonym = random.choice(synonyms)
                new_words = [synonym if word == random_word else word for word in new_words]
                num_replaced += 1
            if num_replaced >= n:
                break
        
        return ' '.join(new_words)
    
    # 示例文本增强
    original_sentence = "I love this movie"
    augmented_sentence = synonym_replacement(original_sentence)
    print(f"原始文本: {original_sentence}")
    print(f"增强文本: {augmented_sentence}")

# 5. 早停法
def early_stopping():
    """早停法"""
    print("\n=== 早停法 ===")
    
    class EarlyStopping:
        def __init__(self, patience=3, min_delta=0):
            self.patience = patience
            self.min_delta = min_delta
            self.counter = 0
            self.best_loss = None
            self.early_stop = False
        
        def __call__(self, val_loss):
            if self.best_loss is None:
                self.best_loss = val_loss
            elif val_loss < self.best_loss - self.min_delta:
                self.best_loss = val_loss
                self.counter = 0
            else:
                self.counter += 1
                if self.counter >= self.patience:
                    self.early_stop = True
    
    # 使用示例
    early_stop = EarlyStopping(patience=3)
    
    # 模拟训练过程
    val_losses = [0.8, 0.7, 0.75, 0.72, 0.8, 0.85, 0.9]
    
    for i, loss in enumerate(val_losses):
        early_stop(loss)
        print(f'Epoch {i+1}, 验证损失: {loss:.4f}, 早停: {early_stop.early_stop}')
        
        if early_stop.early_stop:
            print("早停触发！")
            break

# 6. 梯度裁剪
def gradient_clipping():
    """梯度裁剪"""
    print("\n=== 梯度裁剪 ===")
    
    # 创建模型和优化器
    model = nn.Sequential(
        nn.Linear(10, 5),
        nn.ReLU(),
        nn.Linear(5, 1)
    )
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # 模拟训练
    for epoch in range(5):
        # 模拟数据
        x = torch.randn(32, 10)
        y = torch.randn(32, 1)
        
        # 前向传播
        output = model(x)
        loss = nn.MSELoss()(output, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        
        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        # 更新参数
        optimizer.step()
        
        # 计算梯度范数
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=float('inf'))
        
        print(f'Epoch {epoch+1}, 损失: {loss.item():.4f}, 梯度范数: {grad_norm:.4f}')

# 7. 模型集成
def model_ensemble():
    """模型集成"""
    print("\n=== 模型集成 ===")
    
    # 创建多个模型
    models = [
        nn.Sequential(
            nn.Linear(10, 5),
            nn.ReLU(),
            nn.Linear(5, 1)
        ),
        nn.Sequential(
            nn.Linear(10, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        ),
        nn.Sequential(
            nn.Linear(10, 6),
            nn.ReLU(),
            nn.Linear(6, 1)
        )
    ]
    
    # 集成预测
    def ensemble_predict(models, x):
        predictions = []
        for model in models:
            model.eval()
            with torch.no_grad():
                pred = model(x)
                predictions.append(pred)
        
        # 平均预测
        ensemble_pred = torch.mean(torch.stack(predictions), dim=0)
        return ensemble_pred
    
    # 测试
    x = torch.randn(1, 10)
    prediction = ensemble_predict(models, x)
    
    print(f"集成预测结果: {prediction.item():.4f}")

# 主函数
def main():
    print("深度学习最佳实践")
    
    # 权重初始化
    weight_initialization()
    
    # 学习率调度
    learning_rate_scheduler()
    
    # 正则化技术
    model_with_dropout, model_with_bn = regularization_techniques()
    
    # 数据增强
    data_augmentation()
    
    # 早停法
    early_stopping()
    
    # 梯度裁剪
    gradient_clipping()
    
    # 模型集成
    model_ensemble()

if __name__ == "__main__":
    main()