Job_SignsPads/STM32/Code/STM32F405/nnom_src/layers/nnom_concat.c
2025-04-22 10:29:37 +08:00

224 lines
5.3 KiB
C

/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_concat.h"
nnom_layer_t *concat_s(const nnom_concat_config_t *config)
{
nnom_layer_t* layer = Concat(config->axis);
if(layer)
layer->config = (void*) config;
return layer;
}
// concate method
// concate requires more than one input module. aux input will be allocated in model.merge()
nnom_layer_t *Concat(int8_t axis)
{
nnom_concat_layer_t *layer;
nnom_layer_io_t *in, *out;
size_t mem_size;
// apply a block memory for all the sub handles.
mem_size = sizeof(nnom_concat_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_concat_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONCAT;
layer->super.run = concat_run;
layer->super.build = concat_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// axis
layer->axis = axis;
return (nnom_layer_t *)layer;
}
nnom_status_t concat_build(nnom_layer_t *layer)
{
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
nnom_layer_io_t *in;
uint32_t in_num = 0;
int32_t num_dim;
// for each input module, copy the shape from the output of last layer
in = layer->in;
while (in != NULL)
{
//get the last layer's output as input shape
in->tensor = in->hook.io->tensor;
in = in->aux;
in_num++;
}
// allocate new tensor for output, keep the same dimension lenght
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// convert the axis.
if (cl->axis < 0)
cl->axis = (layer->in->tensor->num_dim + cl->axis);
else if (cl->axis >0)
cl->axis = cl->axis -1; // keras use axis start from 1. we are using 0, 1, 2 (check?)
// find out the concated axis
num_dim = layer->in->tensor->num_dim;
for (uint32_t i = 0; i < num_dim; i ++)
{
// exclue the concat axies
if (i == cl->axis)
{
layer->out->tensor->dim[i] = 0;
// add the same axis from all input up.
in = layer->in;
while (in != NULL)
{
layer->out->tensor->dim[i] += in->tensor->dim[i];
in = in->aux;
}
continue;
}
// check others, all other must be same shape
in = layer->in;
while (in != NULL && in->aux != NULL)
{
if (in->tensor->dim[i] != in->aux->tensor->dim[i])
return NN_ARGUMENT_ERROR;
in = in->aux;
}
// now set other axis
layer->out->tensor->dim[i] = layer->in->tensor->dim[i];
}
return NN_SUCCESS;
}
#ifdef NNOM_USING_CHW
// axis index converter between HWC and CHW
static inline int chw_i(int hwc, int num_dim)
{
num_dim = num_dim -1;
hwc = hwc + 1;
if(hwc>num_dim)
hwc = 0;
return hwc;
}
static inline int hwc_i(int chw, int num_dim)
{
num_dim = num_dim -1;
chw = chw - 1;
if(chw<num_dim)
chw = num_dim;
return chw;
}
#endif
nnom_status_t concat_run(nnom_layer_t *layer)
{
// by default, concat layer has mutiple (>=2) input and 1 output.
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
nnom_layer_io_t *in;
uint32_t dwidth = layer->in->tensor->bitwidth/8; // data width in byte
#ifdef NNOM_USING_CHW
// Concatenate for HWC
uint8_t *pin;
uint8_t *pout = layer->out->tensor->p_data;
uint32_t block_size;
uint32_t n_block;
uint8_t num_dim = layer->in->tensor->num_dim;
// calcualte number of block to concat. the other shapes before the concat axis
n_block = 1;
for(int i= 0; i< chw_i(cl->axis, num_dim); i++)
{
n_block *= layer->in->tensor->dim[hwc_i(i, num_dim)];
}
// concat all input layers
for(int i=0; i<n_block; i++)
{
in = layer->in;
while (in != NULL)
{
// the block size of concat data in this layer
block_size = dwidth;
for(int j= num_dim-1; j >= chw_i(cl->axis, num_dim); j--)
block_size *= in->tensor->dim[hwc_i(j, num_dim)];
// concat
pin = (uint8_t *)in->tensor->p_data + i * block_size;
nnom_memcpy(pout, pin, block_size);
pout += block_size;
in = in->aux;
}
}
#else // end of CHW concate
// Concatenate for HWC
uint8_t* pin;
uint8_t* pout = layer->out->tensor->p_data;
uint32_t block_size;
uint32_t n_block;
uint8_t num_dim = layer->in->tensor->num_dim;
// calcualte the number of block to concat. (the other shapes before the concat axis)
n_block = 1;
for (int i = 0; i < cl->axis; i++)
n_block *= layer->in->tensor->dim[i];
// concat all input layers
for (int i = 0; i < n_block; i++)
{
in = layer->in;
while (in != NULL)
{
// the block size of concat data in this layer
block_size = dwidth;
for (int j = cl->axis; j < num_dim; j++)
block_size *= in->tensor->dim[j];
// concat
pin = (uint8_t*)in->tensor->p_data + i * block_size;
nnom_memcpy(pout, pin, block_size);
pout += block_size;
in = in->aux;
}
}
#endif
return NN_SUCCESS;
}