dwxh,dwhh,dwhy = np.zeros_like(wxh), np.zeros_like(Whh),np.zeros_like(Why) dbh, dby = np.zeros_like(bh),np.zeros_like(by) dhnext = np.zeros_like(hs[0]) for t in reversed(range(len(inputs))) : dy = np.copy(ps[t]) for b in range(0,B): dy[targets[t][b],b] -= 1 dwhy += np.dot(dy, hs[t].T) dby += np.expand_dims(np.sum(dy,axis=1),axis=1) dh = np.dot(Why.T, dy)+ dhnext dh = dh * (1-hs[t]*hs[t] ) dbh += np.expand_dims(np.sum(dh,axis=1),axis=1) dWxh += np.dot(dh,xs[t].T) dWhh += np.dot(dh,hs[t - 1].T) dhnext = np.dot(Whh.T,dh) return loss,dWxh,dWhh,dwhy,dbh,dby, hs[len(inputs) - 1] 给这段代码加上注释
时间: 2023-09-24 18:02:30 浏览: 165
```python
# 初始化各个参数的梯度为0
dwxh, dwhh, dwhy = np.zeros_like(wxh), np.zeros_like(Whh), np.zeros_like(Why)
dbh, dby = np.zeros_like(bh), np.zeros_like(by)
dhnext = np.zeros_like(hs[0])
# 反向传播过程
for t in reversed(range(len(inputs))):
# 计算当前时刻的输出值
dy = np.copy(ps[t])
for b in range(0,B):
dy[targets[t][b], b] -= 1
# 计算各个参数的梯度
dwhy += np.dot(dy, hs[t].T)
dby += np.expand_dims(np.sum(dy, axis=1), axis=1)
dh = np.dot(Why.T, dy) + dhnext
dh = dh * (1 - hs[t] * hs[t])
dbh += np.expand_dims(np.sum(dh, axis=1), axis=1)
dWxh += np.dot(dh, xs[t].T)
dWhh += np.dot(dh, hs[t - 1].T)
dhnext = np.dot(Whh.T, dh)
# 返回损失函数值以及各个参数的梯度和最后一个时刻的隐藏状态
return loss, dwxh, dwhh, dwhy, dbh, dby, hs[len(inputs) - 1]
```
阅读全文