numpy lstm
时间: 2025-01-02 11:37:16 浏览: 7
### 使用NumPy实现LSTM神经网络
#### 初始化权重矩阵和偏置向量
def initialize_parameters(n_x, n_a):
Initialize parameters with small random values.
n_x -- number of units in the input vector (dimensionality of a single time step data point)
n_a -- number of LSTM units
params -- python dictionary containing initialized weights and biases
Wf = np.random.randn(n_a, n_a + n_x) * 0.01 # Forget gate weight matrix
bf = np.zeros((n_a, 1)) # Forget gate bias vector
Wi = np.random.randn(n_a, n_a + n_x) * 0.01 # Input update gate weight matrix
bi = np.zeros((n_a, 1)) # Input update gate bias vector
Wo = np.random.randn(n_a, n_a + n_x) * 0.01 # Output gate weight matrix
bo = np.zeros((n_a, 1)) # Output gate bias vector
Wc = np.random.randn(n_a, n_a + n_x) * 0.01 # Candidate cell state weight matrix
bc = np.zeros((n_a, 1)) # Candidate cell state bias vector
params = {"Wf": Wf, "bf": bf,
"Wi": Wi, "bi": bi,
"Wo": Wo, "bo": bo,
"Wc": Wc, "bc": bc}
return params
#### 单步前向传播过程
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
Implement the forward propagation for the LSTM-cell.
xt -- your input data at timestep "t", numpy array of shape (n_x, m).
a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m)
parameters -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x)
bf -- Bias of the forget gate, numpy array of shape (n_a, 1)
a_next -- next hidden state, of shape (n_a, m)
c_next -- next memory state, of shape (n_a, m)
yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
cache -- tuple of values needed for the backward pass, contains (a_next, c_next, a_prev, c_prev, ft, it, ct_hat, ot, xt, parameters)
# Retrieve parameters from "parameters"
Wf = parameters["Wf"]
bf = parameters["bf"]
Wi = parameters["Wi"]
bi = parameters["bi"]
Wo = parameters["Wo"]
bo = parameters["bo"]
Wc = parameters["Wc"]
bc = parameters["bc"]
# Concatenate a_prev and xt to get concatenated input
concat = np.concatenate([a_prev, xt], axis=0)
# Compute all gates using activation functions sigmoid or tanh where appropriate
ft = sigmoid(, concat) + bf) # Forget gate activations
it = sigmoid(, concat) + bi) # Update gate activations
cct = np.tanh(, concat) + bc) # Cell candidate value
c_next = ft * c_prev + it * cct # New cell state
ot = sigmoid(, concat) + bo) # Output gate activations
a_next = ot * np.tanh(c_next) # Next hidden state
# Define predictions as output layer's result after applying softmax function on top of 'ot'
yt_pred = None # Placeholder; actual implementation depends on task requirements
# Store intermediate results into `cache` variable which will be used during backpropagation phase later
cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)
return a_next, c_next, yt_pred, cache