4.1 权值初始化
梯度消失与梯度爆炸
import torch
import torch.nn as nn
from common_tools import set_seed
set_seed(1) # 设置随机种子
class MLP(nn.Module):
def __init__(self, neural_num, layers):
super(MLP, self).__init__()
self.linears = nn.ModuleList([nn.Linear(neural_num, neural_num, bias=False) for i in range(layers)])
self.neural_num = neural_num
def forward(self, x):
for (i, linear) in enumerate(self.linears):
x = linear(x)
return x
def initialize(self):
for m in self.modules():
# 判断这一层是否为线性层,如果为线性层则初始化权值
if isinstance(m, nn.Linear):
nn.init.normal_(m.weight.data) # normal: mean=0, std=1
layer_nums = 100
neural_nums = 256
batch_size = 16
net = MLP(neural_nums, layer_nums)
net.initialize()
inputs = torch.randn((batch_size, neural_nums)) # normal: mean=0, std=1
output = net(inputs)
print(output)Xavier 方法与 Kaiming 方法
Xavier 方法
nn.init.calculate_gain()
Kaiming 方法
常用初始化方法

最后更新于
