给出CMSIS-NN的一个完整的使用代码
时间: 2023-06-13 08:07:38 浏览: 127
如何使用 CMSIS-NN 在微控制器上运行深度学习模型_python_Jupyter _代码_相关文件_下载
5星 · 资源好评率100%
以下是一个基于CMSIS-NN的完整使用代码示例,该示例是一个简单的图像分类器,使用MNIST数据集进行训练和测试:
```c
#include "arm_nnfunctions.h"
#include "arm_math.h"
#define NUM_TRAIN_IMAGES 60000
#define NUM_TEST_IMAGES 10000
#define IMAGE_SIZE 784
#define NUM_CLASSES 10
#define NUM_EPOCHS 10
#define BATCH_SIZE 32
#define LEARNING_RATE 0.01
static q7_t train_images[NUM_TRAIN_IMAGES][IMAGE_SIZE];
static q7_t test_images[NUM_TEST_IMAGES][IMAGE_SIZE];
static q7_t train_labels[NUM_TRAIN_IMAGES];
static q7_t test_labels[NUM_TEST_IMAGES];
static q7_t weights1[NUM_CLASSES][IMAGE_SIZE];
static q7_t bias1[NUM_CLASSES];
static q7_t weights2[NUM_CLASSES][NUM_CLASSES];
static q7_t bias2[NUM_CLASSES];
static arm_status status;
static arm_softmax_q7_params softmax_params;
void train(void)
{
int i, j, k;
q7_t *input_data = (q7_t *)malloc(BATCH_SIZE * IMAGE_SIZE * sizeof(q7_t));
q7_t *output_data = (q7_t *)malloc(BATCH_SIZE * NUM_CLASSES * sizeof(q7_t));
q7_t *label_data = (q7_t *)malloc(BATCH_SIZE * sizeof(q7_t));
q7_t *fc1_output_data = (q7_t *)malloc(BATCH_SIZE * NUM_CLASSES * sizeof(q7_t));
q7_t *fc2_output_data = (q7_t *)malloc(BATCH_SIZE * NUM_CLASSES * sizeof(q7_t));
uint32_t num_batches = NUM_TRAIN_IMAGES / BATCH_SIZE;
for (i = 0; i < NUM_EPOCHS; i++)
{
for (j = 0; j < num_batches; j++)
{
for (k = 0; k < BATCH_SIZE; k++)
{
memcpy(input_data + k * IMAGE_SIZE, train_images[j * BATCH_SIZE + k], IMAGE_SIZE * sizeof(q7_t));
label_data[k] = train_labels[j * BATCH_SIZE + k];
}
arm_fully_connected_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
arm_loss_cross_entropy_q7(output_data, label_data, BATCH_SIZE, &status);
arm_softmax_with_temperature_q7(output_data, NUM_CLASSES, output_data, &softmax_params);
arm_fully_connected_backward_q7(label_data, fc2_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights2, NUM_CLASSES, fc1_output_data, &status);
arm_relu_backward_q7(fc1_output_data, fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_backward_q7(fc1_output_data, fc1_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights1, IMAGE_SIZE, input_data, &status);
arm_relu_backward_q7(input_data, input_data, BATCH_SIZE * IMAGE_SIZE);
arm_fully_connected_opt_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
arm_loss_cross_entropy_q7(output_data, label_data, BATCH_SIZE, &status);
arm_softmax_with_temperature_q7(output_data, NUM_CLASSES, output_data, &softmax_params);
arm_fully_connected_opt_backward_q7(label_data, fc2_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights2, NUM_CLASSES, fc1_output_data, &status);
arm_relu_backward_q7(fc1_output_data, fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_backward_q7(fc1_output_data, fc1_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights1, IMAGE_SIZE, input_data, &status);
arm_relu_backward_q7(input_data, input_data, BATCH_SIZE * IMAGE_SIZE);
arm_fully_connected_opt_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
arm_loss_cross_entropy_q7(output_data, label_data, BATCH_SIZE, &status);
arm_softmax_with_temperature_q7(output_data, NUM_CLASSES, output_data, &softmax_params);
arm_fully_connected_opt_backward_q7(label_data, fc2_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights2, NUM_CLASSES, fc1_output_data, &status);
arm_relu_backward_q7(fc1_output_data, fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_backward_q7(fc1_output_data, fc1_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights1, IMAGE_SIZE, input_data, &status);
arm_relu_backward_q7(input_data, input_data, BATCH_SIZE * IMAGE_SIZE);
arm_fully_connected_opt_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
arm_loss_cross_entropy_q7(output_data, label_data, BATCH_SIZE, &status);
arm_softmax_with_temperature_q7(output_data, NUM_CLASSES, output_data, &softmax_params);
arm_fully_connected_opt_backward_q7(label_data, fc2_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights2, NUM_CLASSES, fc1_output_data, &status);
arm_relu_backward_q7(fc1_output_data, fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_backward_q7(fc1_output_data, fc1_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights1, IMAGE_SIZE, input_data, &status);
arm_relu_backward_q7(input_data, input_data, BATCH_SIZE * IMAGE_SIZE);
arm_fully_connected_opt_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
arm_loss_cross_entropy_q7(output_data, label_data, BATCH_SIZE, &status);
arm_softmax_with_temperature_q7(output_data, NUM_CLASSES, output_data, &softmax_params);
arm_fully_connected_opt_backward_q7(label_data, fc2_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights2, NUM_CLASSES, fc1_output_data, &status);
arm_relu_backward_q7(fc1_output_data, fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_backward_q7(fc1_output_data, fc1_output_data, NUM_CLASSES, BATCH_SIZE, 0, 0, weights1, IMAGE_SIZE, input_data, &status);
arm_relu_backward_q7(input_data, input_data, BATCH_SIZE * IMAGE_SIZE);
arm_fully_connected_opt_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_opt_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
arm_loss_cross_entropy_q7(output_data, label_data, BATCH_SIZE, &status);
arm_softmax_with_temperature_q7(output_data, NUM_CLASSES, output_data, &softmax_params);
}
printf("Epoch %d completed\n", i + 1);
}
free(input_data);
free(output_data);
free(label_data);
free(fc1_output_data);
free(fc2_output_data);
}
void test(void)
{
int i, j, k;
uint32_t num_batches = NUM_TEST_IMAGES / BATCH_SIZE;
uint32_t correct_count = 0;
q7_t *input_data = (q7_t *)malloc(BATCH_SIZE * IMAGE_SIZE * sizeof(q7_t));
q7_t *output_data = (q7_t *)malloc(BATCH_SIZE * NUM_CLASSES * sizeof(q7_t));
q7_t *label_data = (q7_t *)malloc(BATCH_SIZE * sizeof(q7_t));
q7_t *fc1_output_data = (q7_t *)malloc(BATCH_SIZE * NUM_CLASSES * sizeof(q7_t));
q7_t *fc2_output_data = (q7_t *)malloc(BATCH_SIZE * NUM_CLASSES * sizeof(q7_t));
for (i = 0; i < num_batches; i++)
{
for (j = 0; j < BATCH_SIZE; j++)
{
memcpy(input_data + j * IMAGE_SIZE, test_images[i * BATCH_SIZE + j], IMAGE_SIZE * sizeof(q7_t));
label_data[j] = test_labels[i * BATCH_SIZE + j];
}
arm_fully_connected_q7(input_data, weights1, IMAGE_SIZE, NUM_CLASSES, 0, 0, bias1, fc1_output_data, &status);
arm_relu_q7(fc1_output_data, BATCH_SIZE * NUM_CLASSES);
arm_fully_connected_q7(fc1_output_data, weights2, NUM_CLASSES, NUM_CLASSES, 0, 0, bias2, fc2_output_data, &status);
arm_relu_q7(fc2_output_data, BATCH_SIZE * NUM_CLASSES);
arm_softmax_q7(fc2_output_data, NUM_CLASSES, output_data, &softmax_params);
for (j = 0; j < BATCH_SIZE; j++)
{
uint8_t predicted_label = 0;
q7_t max_val = -128;
for (k = 0; k < NUM_CLASSES; k++)
{
if (output_data[j * NUM_CLASSES + k] > max_val)
{
max_val = output_data[j * NUM_CLASSES + k];
predicted_label = k;
}
}
if (predicted_label == label_data[j])
{
correct_count++;
}
}
}
printf("Accuracy: %.2f%%\n", (float)correct_count / NUM_TEST_IMAGES * 100.0f);
free(input_data);
free(output_data);
free(label_data);
free(fc1_output_data);
free(fc2_output_data);
}
int main(void)
{
int i, j;
arm_status status;
// Load MNIST dataset
// ...
// Initialize weights and biases
for (i = 0; i < NUM_CLASSES; i++)
{
for (j = 0; j < IMAGE_SIZE; j++)
{
weights1[i][j] = rand() % 256 - 128;
}
bias1[i] = rand() % 256 - 128;
for (j = 0; j < NUM_CLASSES; j++)
{
weights2[i][j] = rand() % 256 - 128;
}
bias2[i] = rand() % 256 - 128;
}
// Initialize softmax parameters
softmax_params.temperature = 1.0f;
softmax_params.shift = 0;
// Train the model
train();
// Test the model
test();
return 0;
}
```
请注意,此代码仅用于演示CMSIS-NN的使用,并且可能不是最佳实践。在实际应用中,您应该优化和调整各个层的超参数以获得更好的性能。
阅读全文