8.4 手动实现 RNN
这篇文章主要介绍了循环神经网络(Recurrent Neural Network),简称 RNN。
RNN 常用于处理不定长输入,常用于 NLP 以及时间序列的任务,这种数据一半具有前后关系。
RNN 网络结构如下:
上图的数据说明如下:
  • $x_{t}$:时刻 t 的输入,$shape=(1,57)$,表示 (batch_size, feature_dim)。57 表示词向量的长度。
  • $s{t}$:时刻 t 的状态值,$shape=(1,128)$,表示 (batch_size, hidden_dim)。这个状态值有两个作用:经过一个全连接层得到输出;输入到下一个时刻,影响下一个时刻的状态值。也称为hedden_state,隐藏层状态信息,记录过往时刻的信息。第一个时刻的$s{t}$会初始化为全 0 的向量。
  • $o_{t}$:时刻 t 的输出,$shape=(1,18)$,表示 (batch_size, classes)
  • $U$:linear 层输入$x_{t}$的权重参数,$shape=(57, 128)$,表示 (feature_dim, hidden_dim)
  • $W$:linear 层状态值$s_{t-1}$的权重参数,$shape=(128,128)$,表示 (hidden_dim, hidden_dim)
  • $V$:linear 层状态值$s_{t}$的权重参数,$shape=(128,18)$,表示 (hidden_dim, classes)
公式如下:
$s{t}=f\left(x{t} U+s_{t-1} W \right)$
$o{t}=\operatorname{softmax}\left(s{t} V \right)$
下面的例子是使用 RNN 实现人人名分类:输入任意长度姓名(字符串),输出姓名来自哪个国家(18 分类任务)。数据来源于:http://download.pytorch.org/tutorial/data.zip
1
# Chou(字符串) -> RNN -> Chinese(分类类别)
2
for string in [C,h,o,u]:
3
首先把每个字母转换成 one-hot -> [0,0,...,1,...,0]
4
y,h=model([0,0,...,1,...,0], h) # h 就是隐藏层的状态信息
Copied!
这里没有使用 DataLoader 和 Dataset,而是手动构造了数据集的结构,训练数据使用 dict 存储,包括 18 个元素,每个元素是一个 list,存储了 18 个类别的名字列表。label 存放在一个 list 中。在迭代训练过程如下:
  • 首先随机选择 label 和名字,名字转换为 one-hot 的张量,形状为$[length,1,57]$,其中length表示名字的长度,label 也转换为张量,形状为 1。
  • 初始化隐藏层状态信息。
  • 循环把名字中的每个字符的 one-hot 向量输入到 RNN 中。
  • 最后得到 18 分类的 output。
  • 这里没有使用优化器,而是手动进行反向传播更新参数值。
代码如下:
1
from io import open
2
import glob
3
import unicodedata
4
import string
5
import math
6
import os
7
import time
8
import torch.nn as nn
9
import torch
10
import random
11
import matplotlib.pyplot as plt
12
import torch.utils.data
13
from common_tools import set_seed
14
import enviroments
15
16
set_seed(1) # 设置随机种子
17
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
18
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
device = torch.device("cpu")
20
21
22
# Read a file and split into lines
23
def readLines(filename):
24
lines = open(filename, encoding='utf-8').read().strip().split('\n')
25
return [unicodeToAscii(line) for line in lines]
26
27
28
def unicodeToAscii(s):
29
return ''.join(
30
c for c in unicodedata.normalize('NFD', s)
31
if unicodedata.category(c) != 'Mn'
32
and c in all_letters)
33
34
35
# Find letter index from all_letters, e.g. "a" = 0
36
def letterToIndex(letter):
37
return all_letters.find(letter)
38
39
40
# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
41
def letterToTensor(letter):
42
tensor = torch.zeros(1, n_letters)
43
tensor[0][letterToIndex(letter)] = 1
44
return tensor
45
46
47
# Turn a line into a <line_length x 1 x n_letters>,
48
# or an array of one-hot letter vectors
49
def lineToTensor(line):
50
tensor = torch.zeros(len(line), 1, n_letters)
51
for li, letter in enumerate(line):
52
tensor[li][0][letterToIndex(letter)] = 1
53
return tensor
54
55
56
def categoryFromOutput(output):
57
top_n, top_i = output.topk(1)
58
category_i = top_i[0].item()
59
return all_categories[category_i], category_i
60
61
62
def randomChoice(l):
63
return l[random.randint(0, len(l) - 1)]
64
65
66
def randomTrainingExample():
67
category = randomChoice(all_categories) # 选类别
68
line = randomChoice(category_lines[category]) # 选一个样本
69
category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
70
line_tensor = lineToTensor(line) # str to one-hot
71
return category, line, category_tensor, line_tensor
72
73
74
def timeSince(since):
75
now = time.time()
76
s = now - since
77
m = math.floor(s / 60)
78
s -= m * 60
79
return '%dm %ds' % (m, s)
80
81
82
# Just return an output given a line
83
def evaluate(line_tensor):
84
hidden = rnn.initHidden()
85
86
for i in range(line_tensor.size()[0]):
87
output, hidden = rnn(line_tensor[i], hidden)
88
89
return output
90
91
92
def predict(input_line, n_predictions=3):
93
print('\n> %s' % input_line)
94
with torch.no_grad():
95
output = evaluate(lineToTensor(input_line))
96
97
# Get top N categories
98
topv, topi = output.topk(n_predictions, 1, True)
99
100
for i in range(n_predictions):
101
value = topv[0][i].item()
102
category_index = topi[0][i].item()
103
print('(%.2f) %s' % (value, all_categories[category_index]))
104
105
106
def get_lr(iter, learning_rate):
107
lr_iter = learning_rate if iter < n_iters else learning_rate*0.1
108
return lr_iter
109
110
class RNN(nn.Module):
111
def __init__(self, input_size, hidden_size, output_size):
112
super(RNN, self).__init__()
113
114
self.hidden_size = hidden_size
115
116
self.u = nn.Linear(input_size, hidden_size)
117
self.w = nn.Linear(hidden_size, hidden_size)
118
self.v = nn.Linear(hidden_size, output_size)
119
120
self.tanh = nn.Tanh()
121
self.softmax = nn.LogSoftmax(dim=1)
122
123
def forward(self, inputs, hidden):
124
125
u_x = self.u(inputs)
126
127
hidden = self.w(hidden)
128
hidden = self.tanh(hidden + u_x)
129
130
output = self.softmax(self.v(hidden))
131
132
return output, hidden
133
134
def initHidden(self):
135
return torch.zeros(1, self.hidden_size)
136
137
138
def train(category_tensor, line_tensor):
139
hidden = rnn.initHidden()
140
141
rnn.zero_grad()
142
143
line_tensor = line_tensor.to(device)
144
hidden = hidden.to(device)
145
category_tensor = category_tensor.to(device)
146
147
for i in range(line_tensor.size()[0]):
148
output, hidden = rnn(line_tensor[i], hidden)
149
150
loss = criterion(output, category_tensor)
151
loss.backward()
152
153
# Add parameters' gradients to their values, multiplied by learning rate
154
for p in rnn.parameters():
155
p.data.add_(-learning_rate, p.grad.data)
156
157
return output, loss.item()
158
159
160
if __name__ == "__main__":
161
# config
162
path_txt = os.path.join(enviroments.names,"*.txt")
163
all_letters = string.ascii_letters + " .,;'"
164
n_letters = len(all_letters) # 52 + 5 字符总数
165
print_every = 5000
166
plot_every = 5000
167
learning_rate = 0.005
168
n_iters = 200000
169
170
# step 1 data
171
# Build the category_lines dictionary, a list of names per language
172
category_lines = {}
173
all_categories = []
174
for filename in glob.glob(path_txt):
175
category = os.path.splitext(os.path.basename(filename))[0]
176
all_categories.append(category)
177
lines = readLines(filename)
178
category_lines[category] = lines
179
180
n_categories = len(all_categories)
181
182
# step 2 model
183
n_hidden = 128
184
# rnn = RNN(n_letters, n_hidden, n_categories)
185
rnn = RNN(n_letters, n_hidden, n_categories)
186
187
rnn.to(device)
188
189
# step 3 loss
190
criterion = nn.NLLLoss()
191
192
# step 4 optimize by hand
193
194
# step 5 iteration
195
current_loss = 0
196
all_losses = []
197
start = time.time()
198
for iter in range(1, n_iters + 1):
199
# sample
200
category, line, category_tensor, line_tensor = randomTrainingExample()
201
202
# training
203
output, loss = train(category_tensor, line_tensor)
204
205
current_loss += loss
206
207
# Print iter number, loss, name and guess
208
if iter % print_every == 0:
209
guess, guess_i = categoryFromOutput(output)
210
correct = '✓' if guess == category else '✗ (%s)' % category
211
print('Iter: {:<7} time: {:>8s} loss: {:.4f} name: {:>10s} pred: {:>8s} label: {:>8s}'.format(
212
iter, timeSince(start), loss, line, guess, correct))
213
214
# Add current loss avg to list of losses
215
if iter % plot_every == 0:
216
all_losses.append(current_loss / plot_every)
217
current_loss = 0
218
path_model = os.path.join(BASE_DIR, "rnn_state_dict.pkl")
219
torch.save(rnn.state_dict(), path_model)
220
plt.plot(all_losses)
221
plt.show()
222
223
predict('Yue Tingsong')
224
predict('Yue tingsong')
225
predict('yutingsong')
226
227
predict('test your name')
Copied!
参考资料
如果你觉得这篇文章对你有帮助,不妨点个赞,让我有更多动力写出好文章。
我的文章会首发在公众号上,欢迎扫码关注我的公众号张贤同学
最近更新 1yr ago
复制链接