/**
  ******************************************************************************
  * @file    kpm32xx_ddl_dnu.h
  * @author  Kiwi Software Team
  * @brief   Header file of DNU DDL module.
  * @note
  *          V1.0.0, 2025/1/8.
  *
  * Copyright (c) 2024, Kiwi Instruments Co,. Ltd.
  *
  * Redistribution and use in source and binary forms, with or without modification,
  * are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright notice,
  *      this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright notice,
  *      this list of conditions and the following disclaimer in the documentation
  *      and/or other materials provided with the distribution.
  *
  *   3. Neither the name of the copyright holder nor the names of its contributors
  *      may be used to endorse or promote products derived from this software without
  *      specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  ******************************************************************************
  */



#ifndef __KPM32xx_DDL_DNU_H
#define __KPM32xx_DDL_DNU_H

#ifdef __cplusplus
  extern "C" {
#endif


#include "kpm32xx_ddl_def.h"


/* Stride step length */
typedef enum
{
	STRIDE_STEP_ONE         = 0x00U,       /* Stride step one */
	STRIDE_STEP_TWO         = 0x01U,       /* Stride step two*/
} DNU_Stride_T;

/* Row padding position */
typedef enum
{
	PADDING_POS_BEGIN       = 0x00U,       /* Padding from start */
	PADDING_POS_END         = 0x01U,       /* Padding from end */
} DNU_PaddingPos_T;

/* Calculate accuracy */
typedef enum
{
	ACCURACY_SINGLE         = 0x00U,       /* Single calculate accuracy */
	ACCURACY_INT16          = 0x01U,       /* Signed short calculate accuracy */
	ACCURACY_INT8           = 0x02U,       /* Signed char calculate accuracy */
} DNU_Accuracy_T;

/* DNU operation command */
typedef enum
{
	OPERATION_FC            = 0x00U,       /* Full connect operation */
	OPERATION_CONV          = 0x01U,       /* Convolution operation */
	OPERATION_CONV_PSUM     = 0x02U,       /* Convolution PSUM operation */
	OPERATION_MAXPOOL       = 0x03U,       /* Max pool operation */
	OPERATION_RELU_BN       = 0x04U,       /* Relu and batch normal operation */
	OPERATION_ADD           = 0x05U,       /* Matrix add operation */
	OPERATION_MUL           = 0x06U,       /* Matrix multiply operation */
} DNU_Operation_T;


/* Specify the dimensions of the feature map */
typedef struct
{
	int32_t     h;                        /* Specify feature map's height */
	int32_t     w;                        /* Specify featuce map's width */
	int32_t     c;                        /* Specify featuce map's channel, only support 1 now */
} DNU_Dims_T;


/* Specify the pooling params */
typedef struct
{
	int32_t     strideStep;                /* Specify stride step */
	int32_t     paddingNUM;                /* Specify the numer of padding */
	int32_t     filterSize;                /* Specify the filter's size, only support 2 now */
	DNU_PaddingPos_T  pos;                 /* Specify padding position */
} DNU_PoolParams_T;


/* Specify the convolutional params */
typedef struct
{
	int32_t     strideStep;                /* Specify stride step */
	int32_t     paddingNUM;                /* Specify the numer of padding */
	DNU_PaddingPos_T  pos;                 /* Specify padding position */
} DNU_ConvParams_T;


/* Specify the batch normalize params */
typedef struct
{
	union
	{
		int8_t  gamma_int8;
		int16_t gamma_int16;
		float   gamma_float;
	} gamma;
	union
	{
		int8_t  beta_int8;
		int16_t beta_int16;
		float   beta_float;
	} beta;
	union
	{
		int8_t  eps_int8;
		int16_t eps_int16;
		float   eps_float;
	} eps;
}DNU_BatchNormalParams_T;


/* Enable the DNU peripheral. */
#define __DDL_DNU_ENABLE(void)       SET_BIT(DNU->CTRL, DNU_CTRL_EN)

/* Disable the DNU peripheral. */
#define __DDL_DNU_DISABLE(void)      CLEAR_BIT(DNU->CTRL, DNU_CTRL_EN)

/* Enable the DNU interrupt */
#define __DDL_DNU_ENABLE_IT(void)    SET_BIT(DNU->CTRL, DNU_CTRL_FSGINTEN)

/* Disable the DNU interrupt */
#define __DDL_DNU_DISABLE_IT(void)   CLEAR_BIT(DNU->CTRL, DNU_CTRL_FSGINTEN)

/* Clear the DNU interrupt status and disable interrupt */
#define __DDL_DNU_CLEAR_ITSTATUS(void)   WRITE_REG(DNU->INT, DNU_INT_FSGINTST)

/**
  * @brief  Make specified DNU active.
  * @param  DNUx        DNU instance.
  * @retval None.
  */
DDL_Status_T DDL_DNU_Instance_Active(DNU_Type *DNUx);

/**
  * @brief  Make specified DNU deactive.
  * @param  DNUx        DNU instance.
  * @retval None.
  */
DDL_Status_T DDL_DNU_Instance_Deactive(DNU_Type *DNUx);

/**
  * @brief  Enable DNU interrupt.
  * @param  DNUx        DNU instance.
  * @retval None.
  */
void DDL_DNU_IntEnable(DNU_Type *DNUx, uint32_t priority);

/**
  * @brief  Disable DNU interrupt.
  * @param  DNUx        DNU instance.
  * @retval None.
  */
void DDL_DNU_IntDisable(DNU_Type *DNUx);

/**
  * @brief  Start DNU.
  * @param  DNUx        DNU instance.
  * @retval None.
  */
void DDL_DNU_Start(DNU_Type *DNUx);

/**
  * @brief  Stop DNU.
  * @param  DNUx        DNU instance.
  * @retval None.
  */
void DDL_DNU_Stop(DNU_Type *DNUx);

/**
  * @brief  Implement the max pooling operation, currently only support 2*2 max pooling, accuracy is INT8.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  poolParam   specify the pooling params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_MaxPool_INT8(DNU_Dims_T * inputDims, const int8_t * src, DNU_Dims_T * outputDims, int8_t * dst,
							DNU_PoolParams_T * poolParam);

/**
  * @brief  Implement the max pooling operation, currently only support 2*2 max pooling, accuracy is INT16.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  poolParam   specify the pooling params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_MaxPool_INT16(DNU_Dims_T * inputDims, const int16_t * src, DNU_Dims_T * outputDims, int16_t * dst,
							DNU_PoolParams_T * poolParam);

/**
  * @brief  Implement the max pooling operation, currently only support 2*2 max pooling, accuracy is float.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  poolParam   specify the pooling params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_MaxPool_Float(DNU_Dims_T * inputDims, const float * src, DNU_Dims_T * outputDims, float * dst,
							DNU_PoolParams_T * poolParam);

/**
  * @brief  Implement the convolution operation, currently only support 1*1 filter, accuracy is INT8.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  filterData  specify the kernel data.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  convParam   specify the convolution params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Conv_INT8(DNU_Dims_T * inputDims, const int8_t * src, const int8_t filterData,
	                                   DNU_Dims_T * outputDims, int8_t * dst, DNU_ConvParams_T * convParam);

/**
  * @brief  Implement the convolution operation, currently only support 1*1 filter, accuracy is INT16.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  filterData  specify the kernel data.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  convParam   specify the convolution params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Conv_INT16(DNU_Dims_T * inputDims, const int16_t * src, const int16_t filterData,
	                                   DNU_Dims_T * outputDims, int16_t * dst, DNU_ConvParams_T * convParam);

/**
  * @brief  Implement the convolution operation, currently only support 1*1 filter, accuracy is float.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  filterData  specify the kernel data.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  convParam   specify the convolution params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Conv_Float(DNU_Dims_T * inputDims, const float * src, const float filterData,
	                                   DNU_Dims_T * outputDims, float * dst, DNU_ConvParams_T * convParam);

/**
  * @brief  Implement the convolution psum operation, currently only support 1*1 filter,
  *         only support stride step 1, accuracy is INT8.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  psumDims    specify the psum's parameters.
  * @param  psum        point to the buffer storing psum, the size is the same as input feature map.
  * @param  filterData  specify the kernel data.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  convParam   specify the convolution params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_ConvPsum_INT8(DNU_Dims_T * inputDims, const int8_t * src, DNU_Dims_T * psumDims, const int8_t * psum, const int8_t filterData,
	                                   DNU_Dims_T * outputDims, int8_t * dst, DNU_ConvParams_T * convParam);

/**
  * @brief  Implement the convolution psum operation, currently only support 1*1 filter,
  *         only support stride step 1, accuracy is INT16.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  psumDims    specify the psum's parameters.
  * @param  psum        point to the buffer storing psum, the size is the same as input feature map.
  * @param  filterData  specify the kernel data.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  convParam   specify the convolution params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_ConvPsum_INT16(DNU_Dims_T * inputDims, const int16_t * src, DNU_Dims_T * psumDims, const int16_t * psum, const int16_t filterData,
	                                   DNU_Dims_T * outputDims, int16_t * dst, DNU_ConvParams_T * convParam);

/**
  * @brief  Implement the convolution psum operation, currently only support 1*1 filter,
  *         only support stride step 1, accuracy is float.
  * @param  inputDims   specify the input feature map's parameters.
  * @param  src         store the input feature map.
  * @param  psumDims    specify the psum's parameters.
  * @param  psum        point to the buffer storing psum, the size is the same as input feature map.
  * @param  filterData  specify the kernel data.
  * @param  outputDims  specify the output feature map's parameters.
  * @param  dst         store the output feature map.
  * @param  convParam   specify the convolution params.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_ConvPsum_Float(DNU_Dims_T * inputDims, const float * src, DNU_Dims_T * psumDims, const float * psum, const float filterData,
	                                   DNU_Dims_T * outputDims, float * dst, DNU_ConvParams_T * convParam);


/**
  * @brief  Implement the full connect operation, accuracy is INT8.
  * @param  pInput     point to the input vector.
  * @param  dimInput   the dimension of input.
  * @param  pOut       store the output data.
  * @param  dimOutput  the dimension of output.
  * @param  pM         point to the weight matrix, dimInput ROW dimOutput Column
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_FullConnect_INT8(const int8_t * pInput, uint32_t dimInput, int8_t * pOut, uint32_t dimOutput, const int8_t * pM);


/**
  * @brief  Implement the full connect operation, accuracy is INT16.
  * @param  pInput     point to the input vector.
  * @param  dimInput   the dimension of input.
  * @param  pOut       store the output data.
  * @param  dimOutput  the dimension of input.
  * @param  pM         point to the weight matrix, dimInput ROW dimOutput Column
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_FullConnect_INT16(const int16_t * pInput, uint32_t dimInput, int16_t * pOut, uint32_t dimOutput, const int16_t * pM);


/**
  * @brief  Implement the full connect operation, accuracy is flaot.
  * @param  pInput     point to the input vector.
  * @param  dimInput   the dimension of input.
  * @param  pOut       store the output data.
  * @param  dimOutput  the dimension of input.
  * @param  pM         point to the weight matrix, dimInput ROW dimOutput Column
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_FullConnect_Float(const float * pInput, uint32_t dimInput, float * pOut, uint32_t dimOutput, const float * pM);

/**
  * @brief  Implement the matrix add operation, accuracy is INT8.
  * @param  pM0        point to matrix 0 data.
  * @param  pM1        point to matrix 1 data.
  * @param  matrixDim  specify the dimension of matrix.
  * @param  pOut       store the output matrix.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Add_INT8(const int8_t * pM0, const int8_t * pM1, DNU_Dims_T * matrixDim, int8_t * pOut);

/**
  * @brief  Implement the matrix add operation, accuracy is INT16.
  * @param  pM0        point to matrix 0 data.
  * @param  pM1        point to matrix 1 data.
  * @param  matrixDim  specify the dimension of matrix.
  * @param  pOut       store the output matrix.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Add_INT16(const int16_t * pM0, const int16_t * pM1, DNU_Dims_T * matrixDim, int16_t * pOut);

/**
  * @brief  Implement the matrix add operation, accuracy is float.
  * @param  pM0        point to matrix 0 data.
  * @param  pM1        point to matrix 1 data.
  * @param  matrixDim  specify the dimension of matrix.
  * @param  pOut       store the output matrix.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Add_Float(const float * pM0, const float * pM1, DNU_Dims_T * matrixDim, float * pOut);

/**
  * @brief  Implement the relu and batch normal operation, accuracy is INT8.
  * @param  inputDims    specify the input feature map's parameters.
  * @param  data         point to input data.
  * @param  outputDims   specify the output feature map's parameters.
  * @param  out          store the output data.
  * @param  normalParam  specify the beta and gamma.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_ReluBN_INT8(DNU_Dims_T * inputDims, const int8_t * data, DNU_Dims_T * outputDims,
										int8_t * out, DNU_BatchNormalParams_T * normalParam);

/**
  * @brief  Implement the relu and batch normal operation, accuracy is INT16.
  * @param  inputDims    specify the input feature map's parameters.
  * @param  data         point to input data.
  * @param  outputDims   specify the output feature map's parameters.
  * @param  out          store the output data.
  * @param  normalParam  specify the beta and gamma.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_ReluBN_INT16(DNU_Dims_T * inputDims, const int16_t * data, DNU_Dims_T * outputDims,
                                           int16_t * out, DNU_BatchNormalParams_T * normalParam);

/**
  * @brief  Implement the relu and batch normal operation, accuracy is Float.
  * @param  inputDims    specify the input feature map's parameters.
  * @param  data         point to input data.
  * @param  outputDims   specify the output feature map's parameters.
  * @param  out          store the output data.
  * @param  normalParam  specify the beta and gamma.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_ReluBN_Float(DNU_Dims_T * inputDims, const float * data, DNU_Dims_T * outputDims,
                                          float * out, DNU_BatchNormalParams_T * normalParam);
/**
  * @brief  Implement the matrix multiply operation, accuracy is INT8.
  * @param  pM0        point to matrix 0 data.
  * @param  pM1        point to matrix 1 data.
  * @param  matrixDim  specify the dimension of matrix.
  * @param  pOut       store the output matrix.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Mul_INT8(const int8_t * pM0, const int8_t * pM1, DNU_Dims_T * matrixDim, int8_t * pOut);

/**
  * @brief  Implement the matrix multiply operation, accuracy is INT16.
  * @param  pM0        point to matrix 0 data.
  * @param  pM1        point to matrix 1 data.
  * @param  matrixDim  specify the dimension of matrix.
  * @param  pOut       store the output matrix.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Mul_INT16(const int16_t * pM0, const int16_t * pM1, DNU_Dims_T * matrixDim, int16_t * pOut);

/**
  * @brief  Implement the matrix multiply operation, accuracy is float.
  * @param  pM0        point to matrix 0 data.
  * @param  pM1        point to matrix 1 data.
  * @param  matrixDim  specify the dimension of matrix.
  * @param  pOut       store the output matrix.
  * @retval DDL status
  */
DDL_Status_T DDL_DNU_Mul_Float(const float * pM0, const float * pM1, DNU_Dims_T * matrixDim, float * pOut);

/**
  * @brief  Free DMA channels.
  * @param  None.
  * @retval None.
  */
void DDL_DNU_FreeDMAChannels(void);


#ifdef __cplusplus
}
#endif
#endif /* __KPM32xx_DDL_DNU_H */

