mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-03-16 04:04:06 +00:00
nvdla: add NVDLA driver
Additional update from Prashant Gaikwad <pgaikwad@nvidia.com> Adapted for Linux 5.13 and the BeagleV Starlight board by <cybergaszcz@gmail.com>
This commit is contained in:
parent
1aaa011e7e
commit
29e676e7fa
33 changed files with 32588 additions and 0 deletions
|
@ -236,4 +236,6 @@ source "drivers/interconnect/Kconfig"
|
|||
source "drivers/counter/Kconfig"
|
||||
|
||||
source "drivers/most/Kconfig"
|
||||
|
||||
source "drivers/nvdla/Kconfig"
|
||||
endmenu
|
||||
|
|
|
@ -189,3 +189,4 @@ obj-$(CONFIG_GNSS) += gnss/
|
|||
obj-$(CONFIG_INTERCONNECT) += interconnect/
|
||||
obj-$(CONFIG_COUNTER) += counter/
|
||||
obj-$(CONFIG_MOST) += most/
|
||||
obj-$(CONFIG_NVDLA) += nvdla/
|
||||
|
|
5
drivers/nvdla/Kconfig
Normal file
5
drivers/nvdla/Kconfig
Normal file
|
@ -0,0 +1,5 @@
|
|||
config NVDLA
|
||||
bool "The NVIDIA Deep Learning Accelerator"
|
||||
default n
|
||||
depends on DRM
|
||||
select DRM_GEM_CMA_HELPER
|
19
drivers/nvdla/Makefile
Normal file
19
drivers/nvdla/Makefile
Normal file
|
@ -0,0 +1,19 @@
|
|||
|
||||
ccflags-$(CONFIG_NVDLA) += -I$(srctree)/$(src)
|
||||
ccflags-$(CONFIG_NVDLA) += -I$(srctree)/$(src)/include
|
||||
|
||||
obj-$(CONFIG_NVDLA) += scheduler.o
|
||||
obj-$(CONFIG_NVDLA) += engine.o
|
||||
obj-$(CONFIG_NVDLA) += bdma.o
|
||||
obj-$(CONFIG_NVDLA) += conv.o
|
||||
obj-$(CONFIG_NVDLA) += sdp.o
|
||||
obj-$(CONFIG_NVDLA) += cdp.o
|
||||
obj-$(CONFIG_NVDLA) += pdp.o
|
||||
obj-$(CONFIG_NVDLA) += rubik.o
|
||||
obj-$(CONFIG_NVDLA) += cache.o
|
||||
obj-$(CONFIG_NVDLA) += common.o
|
||||
obj-$(CONFIG_NVDLA) += engine_data.o
|
||||
obj-$(CONFIG_NVDLA) += engine_isr.o
|
||||
obj-$(CONFIG_NVDLA) += engine_debug.o
|
||||
obj-$(CONFIG_NVDLA) += nvdla_core_callbacks.o
|
||||
obj-$(CONFIG_NVDLA) += nvdla_gem.o
|
280
drivers/nvdla/bdma.c
Normal file
280
drivers/nvdla/bdma.c
Normal file
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "dla_engine_internal.h"
|
||||
#include "engine_debug.h"
|
||||
|
||||
static const uint8_t map_mem[] = {
|
||||
FIELD_ENUM(BDMA_CFG_CMD_0, SRC_RAM_TYPE, MC),
|
||||
FIELD_ENUM(BDMA_CFG_CMD_0, SRC_RAM_TYPE, CVSRAM),
|
||||
};
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_bdma_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
uint64_t end_time = 0;
|
||||
struct dla_bdma_stat_desc *bdma_stat;
|
||||
|
||||
bdma_stat = &processor->stat_data_desc->bdma_stat;
|
||||
|
||||
end_time = dla_get_time_us();
|
||||
|
||||
if (group->id == (uint32_t)0) {
|
||||
bdma_stat->read_stall = bdma_reg_read(STATUS_GRP0_READ_STALL);
|
||||
bdma_stat->write_stall = bdma_reg_read(STATUS_GRP0_WRITE_STALL);
|
||||
} else {
|
||||
bdma_stat->read_stall = bdma_reg_read(STATUS_GRP1_READ_STALL);
|
||||
bdma_stat->write_stall = bdma_reg_read(STATUS_GRP1_WRITE_STALL);
|
||||
}
|
||||
bdma_stat->runtime = (uint32_t)(end_time - group->start_time);
|
||||
}
|
||||
|
||||
void
|
||||
dla_bdma_dump_stat(struct dla_processor *processor)
|
||||
{
|
||||
struct dla_bdma_stat_desc *bdma_stat;
|
||||
|
||||
bdma_stat = &processor->stat_data_desc->bdma_stat;
|
||||
|
||||
dla_debug_bdma_stats(bdma_stat);
|
||||
}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
void
|
||||
dla_bdma_set_producer(int32_t group_id, int32_t rdma_group_id)
|
||||
{
|
||||
/**
|
||||
* There is no producer bit for BDMA operation,
|
||||
* interrupt pointer decides which outstanding request
|
||||
* to use for this BDMA operation
|
||||
*/
|
||||
}
|
||||
|
||||
int
|
||||
dla_bdma_enable(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_debug("Enter: %s\n", __func__);
|
||||
|
||||
if (group->surface_desc->bdma_surface.num_transfers == (uint16_t)0) {
|
||||
group->events |= ((uint8_t)1 << DLA_EVENT_OP_COMPLETED);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (engine->stat_enable == (uint32_t)1) {
|
||||
bdma_reg_write(CFG_STATUS, FIELD_ENUM(BDMA_CFG_STATUS_0,
|
||||
STALL_COUNT_EN, YES));
|
||||
group->start_time = dla_get_time_us();
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch BDMA transfer
|
||||
*/
|
||||
if (group->id == 0)
|
||||
bdma_reg_write(CFG_LAUNCH0, FIELD_ENUM(BDMA_CFG_LAUNCH0_0,
|
||||
GRP0_LAUNCH, YES));
|
||||
else
|
||||
bdma_reg_write(CFG_LAUNCH1, FIELD_ENUM(BDMA_CFG_LAUNCH1_0,
|
||||
GRP1_LAUNCH, YES));
|
||||
|
||||
exit:
|
||||
dla_debug("Exit: %s\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
dla_bdma_rdma_check(struct dla_processor_group *group)
|
||||
{
|
||||
group->is_rdma_needed = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Program BDMA slot for transfer
|
||||
*/
|
||||
static int32_t
|
||||
processor_bdma_program_slot(struct dla_bdma_surface_desc *bdma_surface,
|
||||
struct dla_bdma_transfer_desc *transfer)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint64_t source_addr = 0;
|
||||
uint64_t destination_addr = 0;
|
||||
uint32_t high, low, reg;
|
||||
uint8_t bdma_free_slots = 0;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_debug("Enter: %s\n", __func__);
|
||||
|
||||
/* make sure there're enough free slots */
|
||||
if (bdma_free_slots <= 0) {
|
||||
do {
|
||||
reg = bdma_reg_read(STATUS);
|
||||
reg = (reg & MASK(BDMA_STATUS_0, FREE_SLOT)) >>
|
||||
SHIFT(BDMA_STATUS_0, FREE_SLOT);
|
||||
} while (reg == 0);
|
||||
bdma_free_slots = (uint8_t)reg;
|
||||
}
|
||||
|
||||
dla_get_dma_address(engine->driver_context, engine->task->task_data,
|
||||
transfer->source_address,
|
||||
(void *)&source_addr,
|
||||
DESTINATION_DMA);
|
||||
dla_get_dma_address(engine->driver_context, engine->task->task_data,
|
||||
transfer->destination_address,
|
||||
(void *)&destination_addr,
|
||||
DESTINATION_DMA);
|
||||
|
||||
ASSERT_GOTO((transfer->line_repeat <= 8192),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO((transfer->surface_repeat <= 8192),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO((transfer->line_size % 32) == 0,
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO(transfer->source_line >= transfer->line_size,
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO(transfer->destination_line >= transfer->line_size,
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO(transfer->source_surface >=
|
||||
(transfer->source_line * transfer->line_repeat),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO(transfer->destination_surface >=
|
||||
(transfer->destination_line * transfer->line_repeat),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
/* config registers */
|
||||
high = HIGH32BITS(source_addr);
|
||||
low = LOW32BITS(source_addr);
|
||||
bdma_reg_write(CFG_SRC_ADDR_LOW, low);
|
||||
bdma_reg_write(CFG_SRC_ADDR_HIGH, high);
|
||||
high = HIGH32BITS(destination_addr);
|
||||
low = LOW32BITS(destination_addr);
|
||||
bdma_reg_write(CFG_DST_ADDR_LOW, low);
|
||||
bdma_reg_write(CFG_DST_ADDR_HIGH, high);
|
||||
bdma_reg_write(CFG_LINE, (transfer->line_size >> 5) - 1);
|
||||
reg = (map_mem[bdma_surface->source_type] <<
|
||||
SHIFT(BDMA_CFG_CMD_0, SRC_RAM_TYPE)) |
|
||||
(map_mem[bdma_surface->destination_type] <<
|
||||
SHIFT(BDMA_CFG_CMD_0, DST_RAM_TYPE));
|
||||
bdma_reg_write(CFG_CMD, reg);
|
||||
bdma_reg_write(CFG_LINE_REPEAT, transfer->line_repeat - 1);
|
||||
bdma_reg_write(CFG_SRC_LINE, transfer->source_line);
|
||||
bdma_reg_write(CFG_DST_LINE, transfer->destination_line);
|
||||
bdma_reg_write(CFG_SURF_REPEAT, transfer->surface_repeat - 1);
|
||||
bdma_reg_write(CFG_SRC_SURF, transfer->source_surface);
|
||||
bdma_reg_write(CFG_DST_SURF, transfer->destination_surface);
|
||||
bdma_reg_write(CFG_OP, FIELD_ENUM(BDMA_CFG_OP_0, EN, ENABLE));
|
||||
|
||||
dla_debug("Exit: %s\n", __func__);
|
||||
|
||||
exit:
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
dla_bdma_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_processor_group *next_group;
|
||||
|
||||
next_group = &processor->groups[!group->id];
|
||||
|
||||
/**
|
||||
* If another group is already programmed but not active then
|
||||
* do not program this operation as BDMA does not really
|
||||
* have shadow copies for groups. It will end programming
|
||||
* same group. Wait for another group to get enabled.
|
||||
*/
|
||||
if ((processor->group_status & (1 << next_group->id)) &&
|
||||
!next_group->active)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
dla_bdma_dump_config(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_bdma_op_desc *bdma_op;
|
||||
struct dla_bdma_surface_desc *bdma_surface;
|
||||
|
||||
bdma_surface = &group->surface_desc->bdma_surface;
|
||||
bdma_op = &group->operation_desc->bdma_op;
|
||||
|
||||
dla_debug_bdma_surface_desc(bdma_surface, group->roi_index);
|
||||
dla_debug_bdma_op_desc(bdma_op, group->roi_index);
|
||||
}
|
||||
|
||||
int
|
||||
dla_bdma_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t i;
|
||||
int32_t ret = 0;
|
||||
struct dla_bdma_surface_desc *bdma_surface;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_debug("Enter: %s\n", __func__);
|
||||
|
||||
if (!engine->config_data->bdma_enable) {
|
||||
dla_error("BDMA is not supported for this configuration\n");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
bdma_surface = &group->surface_desc->bdma_surface;
|
||||
|
||||
dla_debug("Num of transfers %u\n", bdma_surface->num_transfers);
|
||||
if (bdma_surface->num_transfers == (uint16_t)0)
|
||||
goto exit;
|
||||
|
||||
if (bdma_surface->num_transfers > NUM_MAX_BDMA_OPS) {
|
||||
dla_error("Invalid number of transfers\n");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (i = 0; i < bdma_surface->num_transfers; i++) {
|
||||
ret = processor_bdma_program_slot(bdma_surface,
|
||||
&bdma_surface->transfers[i]);
|
||||
if (ret)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, BDMA_DONE_MASK1) |
|
||||
MASK(GLB_S_INTR_MASK_0, BDMA_DONE_MASK0));
|
||||
|
||||
exit:
|
||||
dla_debug("Exit: %s\n", __func__);
|
||||
RETURN(ret);
|
||||
}
|
253
drivers/nvdla/cache.c
Normal file
253
drivers/nvdla/cache.c
Normal file
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_engine.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "dla_engine_internal.h"
|
||||
|
||||
#define DLA_OP_CACHE_SIZE (DLA_NUM_GROUPS * ((DLA_OP_NUM + 2) * 2))
|
||||
|
||||
static struct dla_common_op_desc desc_cache[DLA_OP_NUM][DLA_OP_CACHE_SIZE];
|
||||
static int32_t desc_refcount[DLA_OP_NUM][DLA_OP_CACHE_SIZE];
|
||||
|
||||
void
|
||||
dla_get_refcount(struct dla_common_op_desc *op_desc)
|
||||
{
|
||||
int32_t i;
|
||||
struct dla_common_op_desc *desc = NULL;
|
||||
|
||||
if (op_desc == NULL)
|
||||
return;
|
||||
|
||||
if (op_desc->index == -1)
|
||||
return;
|
||||
|
||||
desc = &desc_cache[op_desc->op_type][0];
|
||||
|
||||
for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
|
||||
if (desc->index == op_desc->index &&
|
||||
desc->roi_index == op_desc->roi_index) {
|
||||
desc_refcount[op_desc->op_type][i]++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct dla_common_op_desc *
|
||||
dla_get_op_desc(struct dla_task *task, int16_t index,
|
||||
uint8_t op_type, uint8_t roi_index)
|
||||
{
|
||||
int32_t i;
|
||||
int32_t ret;
|
||||
uint64_t op_base;
|
||||
uint64_t dep_graph_addr;
|
||||
struct dla_common_op_desc *desc = NULL;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
if (index == -1) {
|
||||
dla_debug("no desc get due to index==-1\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
dep_graph_addr = (sizeof(struct dla_common_op_desc) *
|
||||
engine->network->num_operations * roi_index);
|
||||
|
||||
desc = &desc_cache[op_type][0];
|
||||
|
||||
for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
|
||||
if (desc->index == index && desc->roi_index == roi_index) {
|
||||
if (desc->op_type != op_type) {
|
||||
dla_error("op_cache[op=%u] contains incorrect "
|
||||
"entry of op[%u]\n", op_type,
|
||||
desc->op_type);
|
||||
continue;
|
||||
}
|
||||
desc_refcount[op_type][i]++;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
desc = &desc_cache[op_type][0];
|
||||
|
||||
for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
|
||||
if (desc->index == -1) {
|
||||
op_base = dep_graph_addr +
|
||||
(sizeof(struct dla_common_op_desc) *
|
||||
(uint64_t)index);
|
||||
ret = dla_data_read(engine->driver_context,
|
||||
task->task_data,
|
||||
task->dependency_graph_addr,
|
||||
(void *)(desc),
|
||||
sizeof(struct dla_common_op_desc),
|
||||
op_base);
|
||||
if (ret) {
|
||||
desc = NULL;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (op_type != desc->op_type) {
|
||||
/*
|
||||
* op_type of entry read from DRAM should not
|
||||
* mismatch with given op_type. If they
|
||||
* mismatches, then wrong entry is fetched, so
|
||||
* report this issue by throwing error.
|
||||
*/
|
||||
dla_error("Fetched [op_type=%u] from DRAM doesn't "
|
||||
"match with op_type[%u]\n",
|
||||
desc->op_type,
|
||||
op_type);
|
||||
desc->op_type = op_type;
|
||||
desc->index = -1;
|
||||
desc->roi_index = -1;
|
||||
desc = NULL;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
desc->index = index;
|
||||
desc->roi_index = roi_index;
|
||||
|
||||
/**
|
||||
* Refcount must be 0 if we are reading it first time
|
||||
* from DRAM
|
||||
*/
|
||||
assert(desc_refcount[op_type][i] == 0);
|
||||
|
||||
desc_refcount[op_type][i]++;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
return desc;
|
||||
}
|
||||
|
||||
static void
|
||||
dla_free_op_desc(struct dla_common_op_desc *op_desc)
|
||||
{
|
||||
uint64_t op_base;
|
||||
uint64_t dep_graph_addr;
|
||||
struct dla_task *task;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_debug("Enter: %s op desc index %u ROI %d\n", __func__,
|
||||
op_desc->index, op_desc->roi_index);
|
||||
|
||||
task = engine->task;
|
||||
dep_graph_addr = (sizeof(struct dla_common_op_desc) *
|
||||
engine->network->num_operations *
|
||||
op_desc->roi_index);
|
||||
|
||||
if (op_desc->index == -1)
|
||||
goto exit;
|
||||
|
||||
if (op_desc == NULL)
|
||||
goto exit;
|
||||
|
||||
/**
|
||||
* TODO: keeping the depth value hardcoded as 0 for now,
|
||||
* need to replace it once corresponding implementation is done.
|
||||
*/
|
||||
op_base = (dep_graph_addr +
|
||||
(sizeof(struct dla_common_op_desc) *
|
||||
(uint64_t)op_desc->index));
|
||||
|
||||
/**
|
||||
* Flush descriptor to DRAM
|
||||
*/
|
||||
dla_data_write(engine->driver_context,
|
||||
task->task_data,
|
||||
(void *)op_desc,
|
||||
task->dependency_graph_addr,
|
||||
sizeof(struct dla_common_op_desc),
|
||||
op_base);
|
||||
|
||||
/**
|
||||
* Release it
|
||||
*/
|
||||
op_desc->index = -1;
|
||||
op_desc->roi_index = -1;
|
||||
exit:
|
||||
dla_debug("Exit: %s\n", __func__);
|
||||
}
|
||||
|
||||
void
|
||||
dla_put_op_desc(struct dla_common_op_desc *op_desc)
|
||||
{
|
||||
int32_t i;
|
||||
struct dla_common_op_desc *desc;
|
||||
|
||||
if (op_desc == NULL)
|
||||
return;
|
||||
|
||||
if (op_desc->index == -1)
|
||||
return;
|
||||
|
||||
desc = &desc_cache[op_desc->op_type][0];
|
||||
|
||||
for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
|
||||
if (desc->index == op_desc->index &&
|
||||
desc->roi_index == op_desc->roi_index) {
|
||||
/**
|
||||
* Refcount can't be 0 when we are trying to free it
|
||||
*/
|
||||
assert(desc_refcount[op_desc->op_type][i] > 0);
|
||||
|
||||
desc_refcount[op_desc->op_type][i]--;
|
||||
|
||||
/**
|
||||
* Free desc if refcount is 0
|
||||
*/
|
||||
if (desc_refcount[op_desc->op_type][i] == 0)
|
||||
dla_free_op_desc(op_desc);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dla_init_op_cache(struct dla_engine *engine)
|
||||
{
|
||||
int32_t i, j;
|
||||
struct dla_common_op_desc *desc = &desc_cache[0][0];
|
||||
|
||||
dla_memset((uint8_t *)&desc_cache[0][0], 0, sizeof(desc_cache));
|
||||
dla_memset((uint8_t *)&desc_refcount[0][0], 0, sizeof(desc_refcount));
|
||||
|
||||
for (i = 0; i < DLA_OP_NUM; i++) {
|
||||
for (j = 0; j < DLA_OP_CACHE_SIZE; j++) {
|
||||
desc->index = -1;
|
||||
desc->roi_index = -1;
|
||||
desc->op_type = (uint8_t)i;
|
||||
desc++;
|
||||
}
|
||||
}
|
||||
}
|
384
drivers/nvdla/cdp.c
Normal file
384
drivers/nvdla/cdp.c
Normal file
|
@ -0,0 +1,384 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "dla_engine_internal.h"
|
||||
#include "engine_debug.h"
|
||||
|
||||
static const uint8_t map_ram[] = {
|
||||
FIELD_ENUM(CDP_RDMA_D_SRC_DMA_CFG_0, SRC_RAM_TYPE, MC),
|
||||
FIELD_ENUM(CDP_RDMA_D_SRC_DMA_CFG_0, SRC_RAM_TYPE, CV),
|
||||
};
|
||||
|
||||
static const uint8_t map_precision[] = {
|
||||
FIELD_ENUM(CDP_RDMA_D_DATA_FORMAT_0, INPUT_DATA, INT8),
|
||||
FIELD_ENUM(CDP_RDMA_D_DATA_FORMAT_0, INPUT_DATA, INT16),
|
||||
FIELD_ENUM(CDP_RDMA_D_DATA_FORMAT_0, INPUT_DATA, FP16),
|
||||
};
|
||||
|
||||
static const uint8_t map_perf_dma[] = {
|
||||
FIELD_ENUM(CDP_D_PERF_ENABLE_0, DMA_EN, DISABLE),
|
||||
FIELD_ENUM(CDP_D_PERF_ENABLE_0, DMA_EN, ENABLE),
|
||||
};
|
||||
|
||||
static const uint8_t map_perf_lut[] = {
|
||||
FIELD_ENUM(CDP_D_PERF_ENABLE_0, LUT_EN, DISABLE),
|
||||
FIELD_ENUM(CDP_D_PERF_ENABLE_0, LUT_EN, ENABLE),
|
||||
};
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_cdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
uint64_t end_time = 0;
|
||||
struct dla_cdp_stat_desc *cdp_stat;
|
||||
|
||||
cdp_stat = &processor->stat_data_desc->cdp_stat;
|
||||
|
||||
end_time = dla_get_time_us();
|
||||
|
||||
cdp_stat->write_stall = cdp_reg_read(D_PERF_WRITE_STALL);
|
||||
cdp_stat->lut_uflow = cdp_reg_read(D_PERF_LUT_UFLOW);
|
||||
cdp_stat->lut_oflow = cdp_reg_read(D_PERF_LUT_OFLOW);
|
||||
cdp_stat->lut_hybrid = cdp_reg_read(D_PERF_LUT_HYBRID);
|
||||
cdp_stat->lut_le_hit = cdp_reg_read(D_PERF_LUT_LE_HIT);
|
||||
cdp_stat->lut_lo_hit = cdp_reg_read(D_PERF_LUT_LO_HIT);
|
||||
cdp_stat->runtime = (uint32_t)(end_time - group->start_time);
|
||||
}
|
||||
|
||||
void
|
||||
dla_cdp_dump_stat(struct dla_processor *processor)
|
||||
{
|
||||
struct dla_cdp_stat_desc *cdp_stat;
|
||||
|
||||
cdp_stat = &processor->stat_data_desc->cdp_stat;
|
||||
|
||||
dla_debug_cdp_stats(cdp_stat);
|
||||
}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
static uint32_t
|
||||
map_local_size(uint8_t local_size)
|
||||
{
|
||||
return ((local_size-1)/2)-1;
|
||||
}
|
||||
|
||||
void
|
||||
dla_cdp_set_producer(int32_t group_id, int32_t rdma_group_id)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
||||
/**
|
||||
* set producer pointer for all sub-modules
|
||||
*/
|
||||
reg = group_id << SHIFT(CDP_S_POINTER_0, PRODUCER);
|
||||
cdp_reg_write(S_POINTER, reg);
|
||||
reg = group_id << SHIFT(CDP_RDMA_S_POINTER_0, PRODUCER);
|
||||
cdp_rdma_reg_write(S_POINTER, reg);
|
||||
}
|
||||
|
||||
int
|
||||
dla_cdp_enable(struct dla_processor_group *group)
|
||||
{
|
||||
uint32_t reg;
|
||||
uint8_t perf_reg;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_debug("Enter: %s\n", __func__);
|
||||
|
||||
if (engine->stat_enable == (uint32_t)1) {
|
||||
perf_reg = (map_perf_dma[1] <<
|
||||
SHIFT(CDP_D_PERF_ENABLE_0, DMA_EN)) |
|
||||
(map_perf_lut[1] <<
|
||||
SHIFT(CDP_D_PERF_ENABLE_0, LUT_EN));
|
||||
|
||||
cdp_reg_write(D_PERF_ENABLE, perf_reg);
|
||||
group->start_time = dla_get_time_us();
|
||||
}
|
||||
|
||||
/**
|
||||
* enable all sub-modules
|
||||
*/
|
||||
reg = FIELD_ENUM(CDP_RDMA_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
cdp_rdma_reg_write(D_OP_ENABLE, reg);
|
||||
reg = FIELD_ENUM(CDP_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
cdp_reg_write(D_OP_ENABLE, reg);
|
||||
|
||||
dla_debug("Exit: %s\n", __func__);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
void
|
||||
dla_cdp_rdma_check(struct dla_processor_group *group)
|
||||
{
|
||||
group->is_rdma_needed = 1;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
processor_cdp_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint32_t reg, high, low;
|
||||
uint64_t input_address = 0;
|
||||
uint64_t output_address = 0;
|
||||
struct dla_lut_param lut;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
struct dla_cdp_op_desc *cdp_op;
|
||||
struct dla_cdp_surface_desc *cdp_surface;
|
||||
|
||||
dla_debug("Enter: %s\n", __func__);
|
||||
|
||||
cdp_op = &group->operation_desc->cdp_op;
|
||||
cdp_surface = &group->surface_desc->cdp_surface;
|
||||
|
||||
/* Argument check */
|
||||
if (cdp_surface->src_data.type == DLA_MEM_HW) {
|
||||
dla_error("Invalid source memory type\n");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
if (cdp_surface->dst_data.type == DLA_MEM_HW) {
|
||||
dla_error("Invalid destination memory type\n");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (cdp_op->in_precision != cdp_op->out_precision) {
|
||||
dla_error("CDP does not support precision conversion\n");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* get the addresses from task descriptor */
|
||||
ret = dla_read_input_address(&cdp_surface->src_data,
|
||||
&input_address,
|
||||
group->op_desc->index,
|
||||
group->roi_index,
|
||||
1);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
cdp_surface->dst_data.address,
|
||||
cdp_surface->dst_data.offset,
|
||||
(void *)&output_address,
|
||||
DESTINATION_DMA);
|
||||
if (cdp_op->lut_index >= 0) {
|
||||
group->lut_index = cdp_op->lut_index;
|
||||
dla_read_lut(engine, cdp_op->lut_index, (void *)&lut);
|
||||
dla_debug_lut_params(&lut);
|
||||
}
|
||||
|
||||
/* config CDP RDMA registers */
|
||||
reg = ((cdp_surface->src_data.width - 1)
|
||||
<< SHIFT(CDP_RDMA_D_DATA_CUBE_WIDTH_0, WIDTH));
|
||||
cdp_rdma_reg_write(D_DATA_CUBE_WIDTH, reg);
|
||||
|
||||
reg = ((cdp_surface->src_data.height - 1)
|
||||
<< SHIFT(CDP_RDMA_D_DATA_CUBE_HEIGHT_0, HEIGHT));
|
||||
cdp_rdma_reg_write(D_DATA_CUBE_HEIGHT, reg);
|
||||
|
||||
reg = ((cdp_surface->src_data.channel - 1)
|
||||
<< SHIFT(CDP_RDMA_D_DATA_CUBE_CHANNEL_0, CHANNEL));
|
||||
cdp_rdma_reg_write(D_DATA_CUBE_CHANNEL, reg);
|
||||
|
||||
high = HIGH32BITS(input_address);
|
||||
low = LOW32BITS(input_address);
|
||||
cdp_rdma_reg_write(D_SRC_BASE_ADDR_LOW, low);
|
||||
cdp_rdma_reg_write(D_SRC_BASE_ADDR_HIGH, high);
|
||||
|
||||
cdp_rdma_reg_write(D_SRC_LINE_STRIDE,
|
||||
cdp_surface->src_data.line_stride);
|
||||
cdp_rdma_reg_write(D_SRC_SURFACE_STRIDE,
|
||||
cdp_surface->src_data.surf_stride);
|
||||
|
||||
reg = (map_ram[cdp_surface->src_data.type]
|
||||
<< SHIFT(CDP_RDMA_D_SRC_DMA_CFG_0, SRC_RAM_TYPE));
|
||||
cdp_rdma_reg_write(D_SRC_DMA_CFG, reg);
|
||||
|
||||
reg = (map_precision[cdp_op->in_precision]
|
||||
<< SHIFT(CDP_RDMA_D_DATA_FORMAT_0, INPUT_DATA));
|
||||
cdp_rdma_reg_write(D_DATA_FORMAT, reg);
|
||||
|
||||
/* config CDP */
|
||||
if (cdp_op->lut_index >= 0)
|
||||
update_lut(CDP_S_LUT_ACCESS_CFG_0, &lut, cdp_op->in_precision);
|
||||
|
||||
high = HIGH32BITS(output_address);
|
||||
low = LOW32BITS(output_address);
|
||||
cdp_reg_write(D_DST_BASE_ADDR_LOW, low);
|
||||
cdp_reg_write(D_DST_BASE_ADDR_HIGH, high);
|
||||
|
||||
cdp_reg_write(D_DST_LINE_STRIDE, cdp_surface->dst_data.line_stride);
|
||||
cdp_reg_write(D_DST_SURFACE_STRIDE, cdp_surface->dst_data.surf_stride);
|
||||
|
||||
reg = (map_ram[cdp_surface->dst_data.type]
|
||||
<< SHIFT(CDP_D_DST_DMA_CFG_0, DST_RAM_TYPE));
|
||||
cdp_reg_write(D_DST_DMA_CFG, reg);
|
||||
|
||||
reg = (map_precision[cdp_op->in_precision]
|
||||
<< SHIFT(CDP_D_DATA_FORMAT_0, INPUT_DATA_TYPE));
|
||||
cdp_reg_write(D_DATA_FORMAT, reg);
|
||||
|
||||
reg = (map_local_size(cdp_op->local_size)
|
||||
<< SHIFT(CDP_D_LRN_CFG_0, NORMALZ_LEN));
|
||||
cdp_reg_write(D_LRN_CFG, reg);
|
||||
|
||||
reg = (cdp_op->in_cvt.offset
|
||||
<< SHIFT(CDP_D_DATIN_OFFSET_0, DATIN_OFFSET));
|
||||
cdp_reg_write(D_DATIN_OFFSET, reg);
|
||||
|
||||
reg = (cdp_op->in_cvt.scale
|
||||
<< SHIFT(CDP_D_DATIN_SCALE_0, DATIN_SCALE));
|
||||
cdp_reg_write(D_DATIN_SCALE, reg);
|
||||
|
||||
reg = (cdp_op->in_cvt.truncate
|
||||
<< SHIFT(CDP_D_DATIN_SHIFTER_0, DATIN_SHIFTER));
|
||||
cdp_reg_write(D_DATIN_SHIFTER, reg);
|
||||
|
||||
reg = (cdp_op->out_cvt.offset
|
||||
<< SHIFT(CDP_D_DATOUT_OFFSET_0, DATOUT_OFFSET));
|
||||
cdp_reg_write(D_DATOUT_OFFSET, reg);
|
||||
|
||||
reg = (cdp_op->out_cvt.scale
|
||||
<< SHIFT(CDP_D_DATOUT_SCALE_0, DATOUT_SCALE));
|
||||
cdp_reg_write(D_DATOUT_SCALE, reg);
|
||||
|
||||
reg = (cdp_op->out_cvt.truncate
|
||||
<< SHIFT(CDP_D_DATOUT_SHIFTER_0, DATOUT_SHIFTER));
|
||||
cdp_reg_write(D_DATOUT_SHIFTER, reg);
|
||||
|
||||
reg = ((cdp_op->bypass_sqsum ?
|
||||
FIELD_ENUM(CDP_D_FUNC_BYPASS_0, SQSUM_BYPASS, ENABLE) :
|
||||
FIELD_ENUM(CDP_D_FUNC_BYPASS_0, SQSUM_BYPASS, DISABLE)) <<
|
||||
SHIFT(CDP_D_FUNC_BYPASS_0, SQSUM_BYPASS)) |
|
||||
((cdp_op->bypass_out_mul ?
|
||||
FIELD_ENUM(CDP_D_FUNC_BYPASS_0, MUL_BYPASS, ENABLE) :
|
||||
FIELD_ENUM(CDP_D_FUNC_BYPASS_0, MUL_BYPASS, DISABLE)) <<
|
||||
SHIFT(CDP_D_FUNC_BYPASS_0, MUL_BYPASS));
|
||||
cdp_reg_write(D_FUNC_BYPASS, reg);
|
||||
|
||||
exit:
|
||||
dla_debug("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
dla_cdp_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_processor_group *next_group;
|
||||
struct dla_cdp_op_desc *cdp_op;
|
||||
|
||||
cdp_op = &group->operation_desc->cdp_op;
|
||||
next_group = &processor->groups[!group->id];
|
||||
|
||||
/**
|
||||
* Single LUT is shared between two CDP groups, need to make
|
||||
* sure that usage does not conflict. Also, LUT write
|
||||
* access is locked when CDP sub-engine is active, so delay
|
||||
* writing LUT when another group is active.
|
||||
*/
|
||||
|
||||
/**
|
||||
* if no LUT required for current group then it can be programmed
|
||||
* without further checks
|
||||
*/
|
||||
if (cdp_op->lut_index == -1)
|
||||
return 1;
|
||||
|
||||
/**
|
||||
* if same LUT is used for both groups then it can be programmed
|
||||
* without more checks. Even if another group is active and LUT
|
||||
* is locked, it would have been programmed by another group.
|
||||
*/
|
||||
if (next_group->lut_index == cdp_op->lut_index)
|
||||
return 1;
|
||||
|
||||
/**
|
||||
* if LUT index of another group is not -1 means some LUT is programmed,
|
||||
* then do not program current LUT as we already know current LUT is not
|
||||
* -1 and neither same as another group.
|
||||
*/
|
||||
if (next_group->lut_index != -1)
|
||||
return 0;
|
||||
|
||||
/**
|
||||
* if current group needs LUT different than another group and that
|
||||
* group is not active then program it.
|
||||
*/
|
||||
if (!next_group->active)
|
||||
return 1;
|
||||
|
||||
/**
|
||||
* if control is here it means current group is using LUT different than
|
||||
* another group and that group is active. Wait for another group to
|
||||
* become idle.
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
dla_cdp_dump_config(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_cdp_op_desc *cdp_op;
|
||||
struct dla_cdp_surface_desc *cdp_surface;
|
||||
|
||||
cdp_surface = &group->surface_desc->cdp_surface;
|
||||
cdp_op = &group->operation_desc->cdp_op;
|
||||
|
||||
dla_debug_cdp_surface_desc(cdp_surface, group->roi_index);
|
||||
dla_debug_cdp_op_desc(cdp_op, group->roi_index);
|
||||
}
|
||||
|
||||
int
|
||||
dla_cdp_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
dla_debug("Enter: %s", __func__);
|
||||
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, CDP_DONE_MASK1) |
|
||||
MASK(GLB_S_INTR_MASK_0, CDP_DONE_MASK0));
|
||||
|
||||
ret = processor_cdp_program(group);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
exit:
|
||||
dla_debug("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
324
drivers/nvdla/common.c
Normal file
324
drivers/nvdla/common.c
Normal file
|
@ -0,0 +1,324 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "dla_engine_internal.h"
|
||||
|
||||
static const uint8_t map_lut_method[] = {
|
||||
FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_LE_FUNCTION, EXPONENT),
|
||||
FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_LE_FUNCTION, LINEAR)
|
||||
};
|
||||
static const uint8_t map_lut_out[] = {
|
||||
FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_UFLOW_PRIORITY, LE),
|
||||
FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_UFLOW_PRIORITY, LO)
|
||||
};
|
||||
|
||||
static const uint16_t access_data_offset[] = {
|
||||
CDP_S_LUT_ACCESS_DATA_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_ACCESS_DATA_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t lut_cfg_offset[] = {
|
||||
CDP_S_LUT_CFG_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_CFG_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t lut_info_offset[] = {
|
||||
CDP_S_LUT_INFO_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_INFO_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t le_start_offset[] = {
|
||||
CDP_S_LUT_LE_START_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LE_START_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t le_end_offset[] = {
|
||||
CDP_S_LUT_LE_END_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LE_END_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t lo_start_offset[] = {
|
||||
CDP_S_LUT_LO_START_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LO_START_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t lo_end_offset[] = {
|
||||
CDP_S_LUT_LO_END_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LO_END_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t le_slope_scale_offset[] = {
|
||||
CDP_S_LUT_LE_SLOPE_SCALE_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LE_SLOPE_SCALE_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t le_slope_shift_offset[] = {
|
||||
CDP_S_LUT_LE_SLOPE_SHIFT_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LE_SLOPE_SHIFT_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t lo_slope_scale_offset[] = {
|
||||
CDP_S_LUT_LO_SLOPE_SCALE_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LO_SLOPE_SCALE_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
static const uint16_t lo_slope_shift_offset[] = {
|
||||
CDP_S_LUT_LO_SLOPE_SHIFT_0 - CDP_S_LUT_ACCESS_CFG_0,
|
||||
SDP_S_LUT_LO_SLOPE_SHIFT_0 - SDP_S_LUT_ACCESS_CFG_0,
|
||||
};
|
||||
|
||||
void update_lut(uint32_t reg_base, struct dla_lut_param *lut,
|
||||
uint8_t precision)
|
||||
{
|
||||
int32_t i;
|
||||
uint32_t reg;
|
||||
uint32_t high, low;
|
||||
int32_t is_sdp = reg_base == SDP_S_LUT_ACCESS_CFG_0;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
/* program raw table */
|
||||
reg = (FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID, LE)
|
||||
<< SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID)) |
|
||||
(FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE, WRITE)
|
||||
<< SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE));
|
||||
reg_write(reg_base, reg);
|
||||
|
||||
for (i = 0; i < (1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1; i++) {
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + access_data_offset[is_sdp],
|
||||
lut->linear_exp_table[i]);
|
||||
}
|
||||
|
||||
/* program density table */
|
||||
reg = (FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID, LO)
|
||||
<< SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID)) |
|
||||
(FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE, WRITE)
|
||||
<< SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE));
|
||||
dla_reg_write(engine->driver_context, reg_base, reg);
|
||||
|
||||
for (i = 0; i < (1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1; i++) {
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + access_data_offset[is_sdp],
|
||||
lut->linear_only_table[i]);
|
||||
}
|
||||
|
||||
/* program other configurations */
|
||||
reg = (map_lut_method[lut->method] <<
|
||||
SHIFT(CDP_S_LUT_CFG_0, LUT_LE_FUNCTION)) |
|
||||
(map_lut_out[lut->hybrid_priority] <<
|
||||
SHIFT(CDP_S_LUT_CFG_0, LUT_HYBRID_PRIORITY)) |
|
||||
(map_lut_out[lut->underflow_priority] <<
|
||||
SHIFT(CDP_S_LUT_CFG_0, LUT_UFLOW_PRIORITY)) |
|
||||
(map_lut_out[lut->overflow_priority] <<
|
||||
SHIFT(CDP_S_LUT_CFG_0, LUT_OFLOW_PRIORITY));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lut_cfg_offset[is_sdp], reg);
|
||||
|
||||
if (lut->method == FIELD_ENUM(CDP_S_LUT_CFG_0,
|
||||
LUT_LE_FUNCTION, EXPONENT)) {
|
||||
reg = ((((uint32_t)lut->linear_exp_offset.exp_offset) <<
|
||||
SHIFT(CDP_S_LUT_INFO_0, LUT_LE_INDEX_OFFSET))&
|
||||
MASK(CDP_S_LUT_INFO_0, LUT_LE_INDEX_OFFSET)) |
|
||||
((((uint32_t)lut->linear_only_offset.frac_bits) <<
|
||||
SHIFT(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT))&
|
||||
MASK(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT));
|
||||
} else {
|
||||
reg = ((((uint32_t)lut->linear_exp_offset.frac_bits) <<
|
||||
SHIFT(CDP_S_LUT_INFO_0, LUT_LE_INDEX_SELECT))&
|
||||
MASK(CDP_S_LUT_INFO_0, LUT_LE_INDEX_SELECT)) |
|
||||
((((uint32_t)lut->linear_only_offset.frac_bits) <<
|
||||
SHIFT(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT))&
|
||||
MASK(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT));
|
||||
}
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lut_info_offset[is_sdp], reg);
|
||||
high = HIGH32BITS(lut->linear_exp_start);
|
||||
low = LOW32BITS(lut->linear_exp_start);
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_start_offset[is_sdp], low);
|
||||
if (!is_sdp)
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_start_offset[is_sdp] + 4, high);
|
||||
|
||||
high = HIGH32BITS(lut->linear_exp_end);
|
||||
low = LOW32BITS(lut->linear_exp_end);
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_end_offset[is_sdp], low);
|
||||
if (!is_sdp)
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_end_offset[is_sdp] + 4, high);
|
||||
|
||||
high = HIGH32BITS(lut->linear_only_start);
|
||||
low = LOW32BITS(lut->linear_only_start);
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_start_offset[is_sdp], low);
|
||||
if (!is_sdp)
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_start_offset[is_sdp] + 4, high);
|
||||
|
||||
high = HIGH32BITS(lut->linear_only_end);
|
||||
low = LOW32BITS(lut->linear_only_end);
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_end_offset[is_sdp], low);
|
||||
if (!is_sdp)
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_end_offset[is_sdp] + 4, high);
|
||||
|
||||
if (precision == PRECISION_FP16) {
|
||||
reg = (lut->linear_exp_underflow_slope.data_f <<
|
||||
SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
|
||||
LUT_LE_SLOPE_UFLOW_SCALE)) |
|
||||
(lut->linear_exp_overflow_slope.data_f <<
|
||||
SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
|
||||
LUT_LE_SLOPE_OFLOW_SCALE));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_slope_scale_offset[is_sdp], reg);
|
||||
|
||||
reg = (lut->linear_only_underflow_slope.data_f <<
|
||||
SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
|
||||
LUT_LO_SLOPE_UFLOW_SCALE)) |
|
||||
(lut->linear_only_overflow_slope.data_f <<
|
||||
SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
|
||||
LUT_LO_SLOPE_OFLOW_SCALE));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_slope_scale_offset[is_sdp], reg);
|
||||
} else {
|
||||
union dla_slope *oslope;
|
||||
union dla_slope *uslope;
|
||||
|
||||
uslope = &lut->linear_exp_underflow_slope;
|
||||
oslope = &lut->linear_exp_overflow_slope;
|
||||
reg = ((((uint32_t)uslope->data_i.scale)
|
||||
<< SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
|
||||
LUT_LE_SLOPE_UFLOW_SCALE))&
|
||||
MASK(CDP_S_LUT_LE_SLOPE_SCALE_0,
|
||||
LUT_LE_SLOPE_UFLOW_SCALE)) |
|
||||
((((uint32_t)oslope->data_i.scale)
|
||||
<< SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
|
||||
LUT_LE_SLOPE_OFLOW_SCALE))&
|
||||
MASK(CDP_S_LUT_LE_SLOPE_SCALE_0,
|
||||
LUT_LE_SLOPE_OFLOW_SCALE));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_slope_scale_offset[is_sdp], reg);
|
||||
|
||||
reg = ((((uint32_t)uslope->data_i.shifter) <<
|
||||
SHIFT(CDP_S_LUT_LE_SLOPE_SHIFT_0,
|
||||
LUT_LE_SLOPE_UFLOW_SHIFT))&
|
||||
MASK(CDP_S_LUT_LE_SLOPE_SHIFT_0,
|
||||
LUT_LE_SLOPE_UFLOW_SHIFT)) |
|
||||
((((uint32_t)oslope->data_i.shifter) <<
|
||||
SHIFT(CDP_S_LUT_LE_SLOPE_SHIFT_0,
|
||||
LUT_LE_SLOPE_OFLOW_SHIFT))&
|
||||
MASK(CDP_S_LUT_LE_SLOPE_SHIFT_0,
|
||||
LUT_LE_SLOPE_OFLOW_SHIFT));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + le_slope_shift_offset[is_sdp], reg);
|
||||
|
||||
uslope = &lut->linear_only_underflow_slope;
|
||||
oslope = &lut->linear_only_overflow_slope;
|
||||
reg = ((((uint32_t)uslope->data_i.scale) <<
|
||||
SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
|
||||
LUT_LO_SLOPE_UFLOW_SCALE))&
|
||||
MASK(CDP_S_LUT_LO_SLOPE_SCALE_0,
|
||||
LUT_LO_SLOPE_UFLOW_SCALE)) |
|
||||
((((uint32_t)oslope->data_i.scale) <<
|
||||
SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
|
||||
LUT_LO_SLOPE_OFLOW_SCALE))&
|
||||
MASK(CDP_S_LUT_LO_SLOPE_SCALE_0,
|
||||
LUT_LO_SLOPE_OFLOW_SCALE));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_slope_scale_offset[is_sdp], reg);
|
||||
reg = ((((uint32_t)uslope->data_i.shifter) <<
|
||||
SHIFT(CDP_S_LUT_LO_SLOPE_SHIFT_0,
|
||||
LUT_LO_SLOPE_UFLOW_SHIFT))&
|
||||
MASK(CDP_S_LUT_LO_SLOPE_SHIFT_0,
|
||||
LUT_LO_SLOPE_UFLOW_SHIFT)) |
|
||||
((((uint32_t)oslope->data_i.shifter) <<
|
||||
SHIFT(CDP_S_LUT_LO_SLOPE_SHIFT_0,
|
||||
LUT_LO_SLOPE_OFLOW_SHIFT))&
|
||||
MASK(CDP_S_LUT_LO_SLOPE_SHIFT_0,
|
||||
LUT_LO_SLOPE_OFLOW_SHIFT));
|
||||
dla_reg_write(engine->driver_context,
|
||||
reg_base + lo_slope_shift_offset[is_sdp], reg);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
validate_data_cube(struct dla_data_cube src_data_cube,
|
||||
struct dla_data_cube dst_data_cube,
|
||||
uint8_t mem_type)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
if ((src_data_cube.width > DCUBE_MAX_WIDTH) ||
|
||||
(src_data_cube.height > DCUBE_MAX_HEIGHT) ||
|
||||
(src_data_cube.channel > DCUBE_MAX_CHANNEL)) {
|
||||
dla_error("Invalid SrcInput Cude[W: %u, H: %u, C: %u]",
|
||||
src_data_cube.width, src_data_cube.height,
|
||||
src_data_cube.channel);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if ((dst_data_cube.width > DCUBE_MAX_WIDTH) ||
|
||||
(dst_data_cube.height > DCUBE_MAX_HEIGHT) ||
|
||||
(dst_data_cube.channel > DCUBE_MAX_CHANNEL)) {
|
||||
dla_error("Invalid DstInput Cude[W: %u, H: %u, C: %u]",
|
||||
dst_data_cube.width, dst_data_cube.height,
|
||||
dst_data_cube.channel);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (src_data_cube.type > mem_type) {
|
||||
dla_error("Invalid src_data.mem_type: %u\n", src_data_cube.type);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (dst_data_cube.type > mem_type) {
|
||||
dla_error("Invalid dst_data.mem_type: %u\n", dst_data_cube.type);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
validate_precision(uint8_t precision, uint8_t map_precision)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
|
||||
if (precision >= map_precision) {
|
||||
dla_error("Invalid precision: %u\n", precision);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
}
|
||||
|
||||
RETURN(ret);
|
||||
}
|
47
drivers/nvdla/common.h
Normal file
47
drivers/nvdla/common.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __FIRMWARE_COMMON_H_
|
||||
#define __FIRMWARE_COMMON_H_
|
||||
|
||||
#include <dla_interface.h>
|
||||
|
||||
#define DCUBE_MAX_WIDTH 8192
|
||||
#define DCUBE_MAX_HEIGHT 8192
|
||||
#define DCUBE_MAX_CHANNEL 8192
|
||||
|
||||
void update_lut(uint32_t reg_base,
|
||||
struct dla_lut_param *lut,
|
||||
uint8_t precision);
|
||||
int32_t validate_data_cube(struct dla_data_cube src_data_cube,
|
||||
struct dla_data_cube dst_data_cube,
|
||||
uint8_t mem_type);
|
||||
int32_t validate_precision(uint8_t precision,
|
||||
uint8_t map_precision);
|
||||
|
||||
#endif /* __FIRMWARE_COMMON_H_ */
|
779
drivers/nvdla/conv.c
Normal file
779
drivers/nvdla/conv.c
Normal file
|
@ -0,0 +1,779 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "dla_engine_internal.h"
|
||||
#include "engine_debug.h"
|
||||
|
||||
static const uint8_t map_precision[] = {
|
||||
FIELD_ENUM(CDMA_D_MISC_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(CDMA_D_MISC_CFG_0, IN_PRECISION, INT16),
|
||||
FIELD_ENUM(CDMA_D_MISC_CFG_0, IN_PRECISION, FP16),
|
||||
};
|
||||
|
||||
static const uint8_t map_conv[] = {
|
||||
FIELD_ENUM(CACC_D_MISC_CFG_0, CONV_MODE, DIRECT),
|
||||
FIELD_ENUM(CACC_D_MISC_CFG_0, CONV_MODE, WINOGRAD),
|
||||
};
|
||||
|
||||
static const uint8_t map_weight_fmt[] = {
|
||||
FIELD_ENUM(CSC_D_WEIGHT_FORMAT_0, WEIGHT_FORMAT, UNCOMPRESSED),
|
||||
FIELD_ENUM(CSC_D_WEIGHT_FORMAT_0, WEIGHT_FORMAT, COMPRESSED),
|
||||
};
|
||||
|
||||
static const uint8_t map_img_fmt[][2] = {
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R8), 1},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R10), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R12), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R16), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R16_I), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R16_F), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A16B16G16R16), 8},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_X16B16G16R16), 8},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A16B16G16R16_F), 8},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A16Y16U16V16), 8},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_V16U16Y16A16), 8},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A16Y16U16V16_F), 8},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A8B8G8R8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A8R8G8B8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_B8G8R8A8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R8G8B8A8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_X8B8G8R8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_X8R8G8B8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_B8G8R8X8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R8G8B8X8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A2B10G10R10), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A2R10G10B10), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_B10G10R10A2), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_R10G10B10A2), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A2Y10U10V10), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_V10U10Y10A2), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_A8Y8U8V8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_V8U8Y8A8), 4},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y8___U8V8_N444), 1},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y8___V8U8_N444), 1},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y10___U10V10_N444), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y10___V10U10_N444), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y12___U12V12_N444), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y12___V12U12_N444), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y16___U16V16_N444), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
PIXEL_FORMAT, T_Y16___V16U16_N444), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
DATAIN_FORMAT, FEATURE), 2},
|
||||
{FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
DATAIN_FORMAT, PIXEL), 1},
|
||||
};
|
||||
|
||||
static const uint8_t map_pixel[] = {
|
||||
FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0, PIXEL_MAPPING, PITCH_LINEAR),
|
||||
};
|
||||
|
||||
static const uint8_t map_ram[] = {
|
||||
FIELD_ENUM(CDMA_D_DAIN_RAM_TYPE_0, DATAIN_RAM_TYPE, MCIF),
|
||||
FIELD_ENUM(CDMA_D_DAIN_RAM_TYPE_0, DATAIN_RAM_TYPE, CVIF),
|
||||
};
|
||||
|
||||
static const uint8_t map_mean[] = {
|
||||
FIELD_ENUM(CDMA_D_MEAN_FORMAT_0, MEAN_FORMAT, DISABLE),
|
||||
FIELD_ENUM(CDMA_D_MEAN_FORMAT_0, MEAN_FORMAT, ENABLE),
|
||||
};
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_conv_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
uint64_t end_time = 0;
|
||||
struct dla_conv_stat_desc *conv_stat;
|
||||
|
||||
conv_stat = &processor->stat_data_desc->conv_stat;
|
||||
|
||||
end_time = dla_get_time_us();
|
||||
|
||||
conv_stat->data_read_stall = cdma_reg_read(D_PERF_DAT_READ_STALL);
|
||||
conv_stat->weight_read_stall = cdma_reg_read(D_PERF_WT_READ_STALL);
|
||||
conv_stat->data_read_latency = cdma_reg_read(D_PERF_DAT_READ_LATENCY);
|
||||
conv_stat->weight_read_latency = cdma_reg_read(D_PERF_WT_READ_LATENCY);
|
||||
conv_stat->nan_data_num = cdma_reg_read(D_NAN_INPUT_DATA_NUM);
|
||||
conv_stat->nan_weight_num = cdma_reg_read(D_NAN_INPUT_WEIGHT_NUM);
|
||||
conv_stat->inf_data_num = cdma_reg_read(D_INF_INPUT_DATA_NUM);
|
||||
conv_stat->inf_weight_num = cdma_reg_read(D_INF_INPUT_WEIGHT_NUM);
|
||||
conv_stat->saturation_count = cacc_reg_read(D_OUT_SATURATION);
|
||||
conv_stat->runtime = (uint32_t)(end_time - group->start_time);
|
||||
}
|
||||
|
||||
void
|
||||
dla_conv_dump_stat(struct dla_processor *processor)
|
||||
{
|
||||
struct dla_conv_stat_desc *conv_stat;
|
||||
|
||||
conv_stat = &processor->stat_data_desc->conv_stat;
|
||||
|
||||
dla_debug_conv_stats(conv_stat);
|
||||
}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
static uint32_t
|
||||
get_in_format(uint8_t format)
|
||||
{
|
||||
uint32_t in_format = 0;
|
||||
|
||||
if (format >= FORMAT_T_R8 && format < FORMAT_FEATURE) {
|
||||
in_format = FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
DATAIN_FORMAT, PIXEL);
|
||||
} else if (format == FORMAT_FEATURE) {
|
||||
in_format = FIELD_ENUM(CDMA_D_DATAIN_FORMAT_0,
|
||||
DATAIN_FORMAT, FEATURE);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return in_format;
|
||||
}
|
||||
|
||||
void
|
||||
dla_conv_set_producer(int32_t group_id, int32_t rdma_group_id)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
||||
/* set producer pointer for all sub-modules */
|
||||
reg = group_id << SHIFT(CACC_S_POINTER_0, PRODUCER);
|
||||
cacc_reg_write(S_POINTER, reg);
|
||||
cmac_a_reg_write(S_POINTER, reg);
|
||||
cmac_b_reg_write(S_POINTER, reg);
|
||||
csc_reg_write(S_POINTER, reg);
|
||||
cdma_reg_write(S_POINTER, reg);
|
||||
}
|
||||
|
||||
int
|
||||
dla_conv_enable(struct dla_processor_group *group)
|
||||
{
|
||||
uint32_t reg;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
do {
|
||||
reg = cdma_reg_read(S_CBUF_FLUSH_STATUS);
|
||||
} while (!(reg & MASK(CDMA_S_CBUF_FLUSH_STATUS_0, FLUSH_DONE)));
|
||||
|
||||
if (engine->stat_enable == (uint32_t)1) {
|
||||
cdma_reg_write(D_PERF_ENABLE, 1);
|
||||
group->start_time = dla_get_time_us();
|
||||
}
|
||||
|
||||
/* enable all sub-modules */
|
||||
reg = FIELD_ENUM(CACC_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
cacc_reg_write(D_OP_ENABLE, reg);
|
||||
cmac_a_reg_write(D_OP_ENABLE, reg);
|
||||
cmac_b_reg_write(D_OP_ENABLE, reg);
|
||||
csc_reg_write(D_OP_ENABLE, reg);
|
||||
cdma_reg_write(D_OP_ENABLE, reg);
|
||||
|
||||
dla_trace("Exit: %s", __func__);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
void
|
||||
dla_conv_rdma_check(struct dla_processor_group *group)
|
||||
{
|
||||
group->is_rdma_needed = 0;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
processor_conv_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint32_t reg, high, low, shift, mask;
|
||||
uint32_t stride_x, stride_y, pad_x, pad_y;
|
||||
uint64_t weight_address = 0;
|
||||
uint64_t wmb_address = 0;
|
||||
uint64_t wgs_address = 0;
|
||||
uint64_t input_address = 0;
|
||||
uint64_t output_address = 0;
|
||||
uint32_t atom_size = 0;
|
||||
bool weight_compress_support = false;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
struct dla_conv_op_desc *conv_op;
|
||||
struct dla_conv_surface_desc *conv_surface;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
weight_compress_support = engine->config_data->weight_compress_support;
|
||||
atom_size = engine->config_data->atom_size;
|
||||
conv_op = &group->operation_desc->conv_op;
|
||||
conv_surface = &group->surface_desc->conv_surface;
|
||||
|
||||
if (conv_op->weight_format == WEIGHT_FORMAT_COMPRESSED) {
|
||||
ASSERT_GOTO((weight_compress_support), ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO((conv_surface->wmb_data.address != -1),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
conv_surface->wmb_data.address,
|
||||
conv_surface->wmb_data.offset,
|
||||
(void *)&wmb_address,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(wmb_address, atom_size);
|
||||
CHECK_ALIGN(conv_surface->wmb_data.size, 128);
|
||||
|
||||
ASSERT_GOTO((conv_surface->wgs_data.address != -1),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
conv_surface->wgs_data.address,
|
||||
conv_surface->wgs_data.offset,
|
||||
(void *)&wgs_address,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(wgs_address, atom_size);
|
||||
CHECK_ALIGN(conv_surface->wgs_data.size, 4);
|
||||
}
|
||||
|
||||
if (conv_surface->weight_data.address != -1) {
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
conv_surface->weight_data.address,
|
||||
conv_surface->weight_data.offset,
|
||||
(void *)&weight_address,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(weight_address, atom_size);
|
||||
CHECK_ALIGN(conv_surface->weight_data.size, 128);
|
||||
}
|
||||
|
||||
if (conv_surface->dst_data.address != -1) {
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
conv_surface->dst_data.address,
|
||||
conv_surface->dst_data.offset,
|
||||
(void *)&output_address,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(output_address, atom_size);
|
||||
CHECK_ALIGN(conv_surface->dst_data.size, atom_size);
|
||||
CHECK_ALIGN(conv_surface->dst_data.line_stride, atom_size);
|
||||
CHECK_ALIGN(conv_surface->dst_data.surf_stride, atom_size);
|
||||
}
|
||||
|
||||
ret = dla_read_input_address(&conv_surface->src_data, &input_address,
|
||||
group->op_desc->index,
|
||||
group->roi_index,
|
||||
map_img_fmt[conv_op->data_format][1]);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
CHECK_ALIGN(input_address, atom_size);
|
||||
|
||||
ASSERT_GOTO((conv_op->out_cvt.scale == 1),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO((conv_op->out_cvt.offset == 0),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
/* check if the register group is idle */
|
||||
reg = cacc_reg_read(S_STATUS);
|
||||
mask = group->id ? MASK(CACC_S_STATUS_0, STATUS_1) :
|
||||
MASK(CACC_S_STATUS_0, STATUS_0);
|
||||
shift = group->id ? SHIFT(CACC_S_STATUS_0, STATUS_1) :
|
||||
SHIFT(CACC_S_STATUS_0, STATUS_0);
|
||||
reg = (reg & mask) >> shift;
|
||||
ASSERT_GOTO((reg == FIELD_ENUM(CACC_S_STATUS_0, STATUS_0, IDLE)),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
reg = cmac_a_reg_read(S_STATUS);
|
||||
mask = group->id ? MASK(CMAC_A_S_STATUS_0, STATUS_1) :
|
||||
MASK(CMAC_A_S_STATUS_0, STATUS_0);
|
||||
shift = group->id ? SHIFT(CMAC_A_S_STATUS_0, STATUS_1) :
|
||||
SHIFT(CMAC_A_S_STATUS_0, STATUS_0);
|
||||
reg = (reg & mask) >> shift;
|
||||
ASSERT_GOTO((reg == FIELD_ENUM(CMAC_A_S_STATUS_0, STATUS_0, IDLE)),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
reg = cmac_b_reg_read(S_STATUS);
|
||||
mask = group->id ? MASK(CMAC_B_S_STATUS_0, STATUS_1) :
|
||||
MASK(CMAC_B_S_STATUS_0, STATUS_0);
|
||||
shift = group->id ? SHIFT(CMAC_B_S_STATUS_0, STATUS_1) :
|
||||
SHIFT(CMAC_B_S_STATUS_0, STATUS_0);
|
||||
reg = (reg & mask) >> shift;
|
||||
ASSERT_GOTO((reg == FIELD_ENUM(CMAC_B_S_STATUS_0, STATUS_0, IDLE)),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
reg = csc_reg_read(S_STATUS);
|
||||
mask = group->id ? MASK(CSC_S_STATUS_0, STATUS_1) :
|
||||
MASK(CSC_S_STATUS_0, STATUS_0);
|
||||
shift = group->id ? SHIFT(CSC_S_STATUS_0, STATUS_1) :
|
||||
SHIFT(CSC_S_STATUS_0, STATUS_0);
|
||||
reg = (reg & mask) >> shift;
|
||||
ASSERT_GOTO((reg == FIELD_ENUM(CSC_S_STATUS_0, STATUS_0, IDLE)),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
reg = cdma_reg_read(S_STATUS);
|
||||
mask = group->id ? MASK(CDMA_S_STATUS_0, STATUS_1) :
|
||||
MASK(CDMA_S_STATUS_0, STATUS_0);
|
||||
shift = group->id ? SHIFT(CDMA_S_STATUS_0, STATUS_1) :
|
||||
SHIFT(CDMA_S_STATUS_0, STATUS_0);
|
||||
reg = (reg & mask) >> shift;
|
||||
ASSERT_GOTO((reg == FIELD_ENUM(CDMA_S_STATUS_0, STATUS_0, IDLE)),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
/* reverse config each sub-module in CC */
|
||||
|
||||
/* CACC */
|
||||
reg = (map_conv[conv_op->conv_mode]
|
||||
<< SHIFT(CACC_D_MISC_CFG_0, CONV_MODE)) |
|
||||
(map_precision[conv_op->out_precision]
|
||||
<< SHIFT(CACC_D_MISC_CFG_0, PROC_PRECISION));
|
||||
cacc_reg_write(D_MISC_CFG, reg);
|
||||
|
||||
reg = ((conv_surface->dst_data.width - 1)
|
||||
<< SHIFT(CACC_D_DATAOUT_SIZE_0_0, DATAOUT_WIDTH)) |
|
||||
((conv_surface->dst_data.height - 1)
|
||||
<< SHIFT(CACC_D_DATAOUT_SIZE_0_0, DATAOUT_HEIGHT));
|
||||
cacc_reg_write(D_DATAOUT_SIZE_0, reg);
|
||||
|
||||
reg = ((conv_surface->dst_data.channel - 1)
|
||||
<< SHIFT(CACC_D_DATAOUT_SIZE_1_0, DATAOUT_CHANNEL));
|
||||
cacc_reg_write(D_DATAOUT_SIZE_1, reg);
|
||||
|
||||
low = LOW32BITS(output_address);
|
||||
cacc_reg_write(D_DATAOUT_ADDR, low);
|
||||
cacc_reg_write(D_BATCH_NUMBER, conv_op->batch - 1);
|
||||
cacc_reg_write(D_LINE_STRIDE, conv_surface->dst_data.line_stride);
|
||||
cacc_reg_write(D_SURF_STRIDE, conv_surface->dst_data.surf_stride);
|
||||
|
||||
if (conv_surface->dst_data.width == 1 &&
|
||||
conv_surface->dst_data.height == 1) {
|
||||
ASSERT_GOTO((((uint32_t)conv_surface->dst_data.line_stride ==
|
||||
(uint32_t)(conv_surface->dst_data.width * atom_size))),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
reg = (CACC_D_DATAOUT_MAP_0_LINE_PACKED_TRUE <<
|
||||
SHIFT(CACC_D_DATAOUT_MAP_0, LINE_PACKED));
|
||||
reg |= (CACC_D_DATAOUT_MAP_0_SURF_PACKED_TRUE <<
|
||||
SHIFT(CACC_D_DATAOUT_MAP_0, SURF_PACKED));
|
||||
} else {
|
||||
reg = (FIELD_ENUM(CACC_D_DATAOUT_MAP_0, LINE_PACKED, FALSE) <<
|
||||
SHIFT(CACC_D_DATAOUT_MAP_0, LINE_PACKED));
|
||||
reg |= (FIELD_ENUM(CACC_D_DATAOUT_MAP_0, SURF_PACKED, FALSE) <<
|
||||
SHIFT(CACC_D_DATAOUT_MAP_0, SURF_PACKED));
|
||||
}
|
||||
cacc_reg_write(D_DATAOUT_MAP, reg);
|
||||
|
||||
cacc_reg_write(D_CLIP_CFG, conv_op->out_cvt.truncate);
|
||||
|
||||
/* CMAC */
|
||||
reg = (map_conv[conv_op->conv_mode]
|
||||
<< SHIFT(CMAC_A_D_MISC_CFG_0, CONV_MODE)) |
|
||||
(map_precision[conv_op->out_precision]
|
||||
<< SHIFT(CMAC_A_D_MISC_CFG_0, PROC_PRECISION));
|
||||
cmac_a_reg_write(D_MISC_CFG, reg);
|
||||
cmac_b_reg_write(D_MISC_CFG, reg);
|
||||
|
||||
/* CSC */
|
||||
reg = (map_conv[conv_op->conv_mode]
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, CONV_MODE)) |
|
||||
(map_precision[conv_op->out_precision]
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, IN_PRECISION)) |
|
||||
(map_precision[conv_op->out_precision]
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, PROC_PRECISION)) |
|
||||
(conv_op->data_reuse
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, DATA_REUSE)) |
|
||||
(conv_op->weight_reuse
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, WEIGHT_REUSE)) |
|
||||
(conv_op->skip_data_rls
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, SKIP_DATA_RLS)) |
|
||||
(conv_op->skip_weight_rls
|
||||
<< SHIFT(CSC_D_MISC_CFG_0, SKIP_WEIGHT_RLS));
|
||||
csc_reg_write(D_MISC_CFG, reg);
|
||||
|
||||
reg = (get_in_format(conv_op->data_format) <<
|
||||
SHIFT(CSC_D_DATAIN_FORMAT_0, DATAIN_FORMAT));
|
||||
csc_reg_write(D_DATAIN_FORMAT, reg);
|
||||
|
||||
reg = ((conv_op->input_width_csc - 1)
|
||||
<< SHIFT(CSC_D_DATAIN_SIZE_EXT_0_0, DATAIN_WIDTH_EXT)) |
|
||||
((conv_op->input_height_csc - 1)
|
||||
<< SHIFT(CSC_D_DATAIN_SIZE_EXT_0_0, DATAIN_HEIGHT_EXT));
|
||||
csc_reg_write(D_DATAIN_SIZE_EXT_0, reg);
|
||||
|
||||
reg = ((conv_op->input_channel_csc - 1)
|
||||
<< SHIFT(CSC_D_DATAIN_SIZE_EXT_1_0, DATAIN_CHANNEL_EXT));
|
||||
csc_reg_write(D_DATAIN_SIZE_EXT_1, reg);
|
||||
|
||||
reg = ((conv_op->batch - 1)
|
||||
<< SHIFT(CSC_D_BATCH_NUMBER_0, BATCHES));
|
||||
csc_reg_write(D_BATCH_NUMBER, reg);
|
||||
reg = ((conv_op->post_extension)
|
||||
<< SHIFT(CSC_D_POST_Y_EXTENSION_0, Y_EXTENSION));
|
||||
csc_reg_write(D_POST_Y_EXTENSION, reg);
|
||||
|
||||
reg = ((conv_op->entry_per_slice - 1)
|
||||
<< SHIFT(CSC_D_ENTRY_PER_SLICE_0, ENTRIES));
|
||||
csc_reg_write(D_ENTRY_PER_SLICE, reg);
|
||||
|
||||
reg = (map_weight_fmt[conv_op->weight_format]
|
||||
<< SHIFT(CSC_D_WEIGHT_FORMAT_0, WEIGHT_FORMAT));
|
||||
csc_reg_write(D_WEIGHT_FORMAT, reg);
|
||||
|
||||
reg = ((conv_op->kernel_width_csc - 1)
|
||||
<< SHIFT(CSC_D_WEIGHT_SIZE_EXT_0_0, WEIGHT_WIDTH_EXT)) |
|
||||
((conv_op->kernel_height_csc - 1)
|
||||
<< SHIFT(CSC_D_WEIGHT_SIZE_EXT_0_0, WEIGHT_HEIGHT_EXT));
|
||||
csc_reg_write(D_WEIGHT_SIZE_EXT_0, reg);
|
||||
|
||||
reg = ((conv_op->kernel_channel_csc - 1)
|
||||
<< SHIFT(CSC_D_WEIGHT_SIZE_EXT_1_0, WEIGHT_CHANNEL_EXT)) |
|
||||
((conv_surface->dst_data.channel - 1)
|
||||
<< SHIFT(CSC_D_WEIGHT_SIZE_EXT_1_0, WEIGHT_KERNEL));
|
||||
csc_reg_write(D_WEIGHT_SIZE_EXT_1, reg);
|
||||
|
||||
csc_reg_write(D_WEIGHT_BYTES, conv_surface->weight_data.size);
|
||||
csc_reg_write(D_WMB_BYTES, conv_surface->wmb_data.size);
|
||||
|
||||
reg = ((conv_op->input_width_cmac - 1)
|
||||
<< SHIFT(CSC_D_DATAOUT_SIZE_0_0, DATAOUT_WIDTH)) |
|
||||
((conv_op->input_height_cmac - 1)
|
||||
<< SHIFT(CSC_D_DATAOUT_SIZE_0_0, DATAOUT_HEIGHT));
|
||||
csc_reg_write(D_DATAOUT_SIZE_0, reg);
|
||||
|
||||
reg = ((conv_surface->dst_data.channel - 1)
|
||||
<< SHIFT(CSC_D_DATAOUT_SIZE_1_0, DATAOUT_CHANNEL));
|
||||
csc_reg_write(D_DATAOUT_SIZE_1, reg);
|
||||
|
||||
reg = ((conv_surface->dst_data.width *
|
||||
conv_surface->dst_data.height - 1)
|
||||
<< SHIFT(CSC_D_ATOMICS_0, ATOMICS));
|
||||
csc_reg_write(D_ATOMICS, reg);
|
||||
reg = ((conv_op->release - 1)
|
||||
<< SHIFT(CSC_D_RELEASE_0, RLS_SLICES));
|
||||
csc_reg_write(D_RELEASE, reg);
|
||||
|
||||
if (conv_op->conv_mode == CONV_MODE_DIRECT) {
|
||||
stride_x = conv_op->conv_stride_x - 1;
|
||||
stride_y = conv_op->conv_stride_y - 1;
|
||||
pad_x = conv_op->pad_x_left;
|
||||
pad_y = conv_op->pad_y_top;
|
||||
} else {
|
||||
stride_x = 0;
|
||||
stride_y = 0;
|
||||
pad_x = 0;
|
||||
pad_y = 0;
|
||||
}
|
||||
|
||||
reg = (stride_x
|
||||
<< SHIFT(CSC_D_CONV_STRIDE_EXT_0, CONV_X_STRIDE_EXT)) |
|
||||
(stride_y
|
||||
<< SHIFT(CSC_D_CONV_STRIDE_EXT_0, CONV_Y_STRIDE_EXT));
|
||||
csc_reg_write(D_CONV_STRIDE_EXT, reg);
|
||||
|
||||
reg = ((conv_op->dilation_x - 1)
|
||||
<< SHIFT(CSC_D_DILATION_EXT_0, X_DILATION_EXT)) |
|
||||
((conv_op->dilation_y - 1)
|
||||
<< SHIFT(CSC_D_DILATION_EXT_0, Y_DILATION_EXT));
|
||||
csc_reg_write(D_DILATION_EXT, reg);
|
||||
|
||||
reg = (pad_x
|
||||
<< SHIFT(CSC_D_ZERO_PADDING_0, PAD_LEFT)) |
|
||||
(pad_y
|
||||
<< SHIFT(CSC_D_ZERO_PADDING_0, PAD_TOP));
|
||||
csc_reg_write(D_ZERO_PADDING, reg);
|
||||
|
||||
reg = (conv_op->pad_val
|
||||
<< SHIFT(CSC_D_ZERO_PADDING_VALUE_0, PAD_VALUE)) &
|
||||
MASK(CSC_D_ZERO_PADDING_VALUE_0, PAD_VALUE);
|
||||
csc_reg_write(D_ZERO_PADDING_VALUE, reg);
|
||||
|
||||
reg = ((conv_op->data_bank - 1)
|
||||
<< SHIFT(CSC_D_BANK_0, DATA_BANK)) |
|
||||
((conv_op->weight_bank - 1)
|
||||
<< SHIFT(CSC_D_BANK_0, WEIGHT_BANK));
|
||||
csc_reg_write(D_BANK, reg);
|
||||
csc_reg_write(D_PRA_CFG, conv_op->pra_truncate);
|
||||
|
||||
/* CBUF */
|
||||
/* there's no CBUF register */
|
||||
|
||||
/* CDMA */
|
||||
reg = (map_conv[conv_op->conv_mode]
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, CONV_MODE)) |
|
||||
(map_precision[conv_op->in_precision]
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, IN_PRECISION)) |
|
||||
(map_precision[conv_op->out_precision]
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, PROC_PRECISION)) |
|
||||
(conv_op->data_reuse
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, DATA_REUSE)) |
|
||||
(conv_op->weight_reuse
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, WEIGHT_REUSE)) |
|
||||
(conv_op->skip_data_rls
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, SKIP_DATA_RLS)) |
|
||||
(conv_op->skip_weight_rls
|
||||
<< SHIFT(CDMA_D_MISC_CFG_0, SKIP_WEIGHT_RLS));
|
||||
cdma_reg_write(D_MISC_CFG, reg);
|
||||
|
||||
reg = (get_in_format(conv_op->data_format) <<
|
||||
SHIFT(CDMA_D_DATAIN_FORMAT_0, DATAIN_FORMAT)) |
|
||||
(map_img_fmt[conv_op->data_format][0]
|
||||
<< SHIFT(CDMA_D_DATAIN_FORMAT_0, PIXEL_FORMAT)) |
|
||||
(map_pixel[conv_op->pixel_mapping]
|
||||
<< SHIFT(CDMA_D_DATAIN_FORMAT_0, PIXEL_MAPPING)) |
|
||||
(conv_op->pixel_override
|
||||
<< SHIFT(CDMA_D_DATAIN_FORMAT_0, PIXEL_SIGN_OVERRIDE));
|
||||
cdma_reg_write(D_DATAIN_FORMAT, reg);
|
||||
|
||||
reg = ((conv_surface->src_data.width - 1)
|
||||
<< SHIFT(CDMA_D_DATAIN_SIZE_0_0, DATAIN_WIDTH)) |
|
||||
((conv_surface->src_data.height - 1)
|
||||
<< SHIFT(CDMA_D_DATAIN_SIZE_0_0, DATAIN_HEIGHT));
|
||||
cdma_reg_write(D_DATAIN_SIZE_0, reg);
|
||||
|
||||
reg = ((conv_surface->src_data.channel - 1)
|
||||
<< SHIFT(CDMA_D_DATAIN_SIZE_1_0, DATAIN_CHANNEL));
|
||||
cdma_reg_write(D_DATAIN_SIZE_1, reg);
|
||||
|
||||
reg = ((conv_op->input_width_csc - 1)
|
||||
<< SHIFT(CDMA_D_DATAIN_SIZE_EXT_0_0, DATAIN_WIDTH_EXT)) |
|
||||
((conv_op->input_height_csc - 1)
|
||||
<< SHIFT(CDMA_D_DATAIN_SIZE_EXT_0_0, DATAIN_HEIGHT_EXT));
|
||||
cdma_reg_write(D_DATAIN_SIZE_EXT_0, reg);
|
||||
|
||||
reg = (map_ram[conv_surface->src_data.type]
|
||||
<< SHIFT(CDMA_D_DAIN_RAM_TYPE_0, DATAIN_RAM_TYPE));
|
||||
cdma_reg_write(D_DAIN_RAM_TYPE, reg);
|
||||
|
||||
high = HIGH32BITS(input_address);
|
||||
low = LOW32BITS(input_address);
|
||||
cdma_reg_write(D_DAIN_ADDR_HIGH_0, high);
|
||||
cdma_reg_write(D_DAIN_ADDR_LOW_0, low);
|
||||
|
||||
high = HIGH32BITS((input_address + conv_surface->offset_u));
|
||||
low = LOW32BITS(input_address + conv_surface->offset_u);
|
||||
cdma_reg_write(D_DAIN_ADDR_HIGH_1, high);
|
||||
cdma_reg_write(D_DAIN_ADDR_LOW_1, low);
|
||||
|
||||
cdma_reg_write(D_LINE_STRIDE, conv_surface->src_data.line_stride);
|
||||
cdma_reg_write(D_SURF_STRIDE, conv_surface->src_data.surf_stride);
|
||||
cdma_reg_write(D_LINE_UV_STRIDE, conv_surface->in_line_uv_stride);
|
||||
|
||||
reg = ((conv_surface->src_data.line_stride ==
|
||||
((uint32_t)conv_surface->src_data.width * atom_size))
|
||||
<< SHIFT(CDMA_D_DAIN_MAP_0, LINE_PACKED));
|
||||
reg |= ((conv_surface->src_data.surf_stride ==
|
||||
((uint32_t)(conv_surface->src_data.width *
|
||||
conv_surface->src_data.height) * atom_size))
|
||||
<< SHIFT(CDMA_D_DAIN_MAP_0, SURF_PACKED));
|
||||
cdma_reg_write(D_DAIN_MAP, reg);
|
||||
|
||||
reg = ((conv_op->batch - 1)
|
||||
<< SHIFT(CDMA_D_BATCH_NUMBER_0, BATCHES));
|
||||
cdma_reg_write(D_BATCH_NUMBER, reg);
|
||||
|
||||
cdma_reg_write(D_BATCH_STRIDE, conv_op->batch_stride);
|
||||
|
||||
reg = ((conv_op->entry_per_slice - 1)
|
||||
<< SHIFT(CDMA_D_ENTRY_PER_SLICE_0, ENTRIES));
|
||||
cdma_reg_write(D_ENTRY_PER_SLICE, reg);
|
||||
|
||||
reg = ((conv_op->fetch_grain - 1)
|
||||
<< SHIFT(CDMA_D_FETCH_GRAIN_0, GRAINS));
|
||||
cdma_reg_write(D_FETCH_GRAIN, reg);
|
||||
|
||||
reg = (map_weight_fmt[conv_op->weight_format]
|
||||
<< SHIFT(CDMA_D_WEIGHT_FORMAT_0, WEIGHT_FORMAT));
|
||||
cdma_reg_write(D_WEIGHT_FORMAT, reg);
|
||||
|
||||
reg = ((conv_op->bytes_per_kernel - 1)
|
||||
<< SHIFT(CDMA_D_WEIGHT_SIZE_0_0, BYTE_PER_KERNEL));
|
||||
cdma_reg_write(D_WEIGHT_SIZE_0, reg);
|
||||
|
||||
reg = ((conv_surface->dst_data.channel - 1)
|
||||
<< SHIFT(CDMA_D_WEIGHT_SIZE_1_0, WEIGHT_KERNEL));
|
||||
cdma_reg_write(D_WEIGHT_SIZE_1, reg);
|
||||
|
||||
reg = (map_ram[conv_surface->weight_data.type]
|
||||
<< SHIFT(CDMA_D_WEIGHT_RAM_TYPE_0, WEIGHT_RAM_TYPE));
|
||||
cdma_reg_write(D_WEIGHT_RAM_TYPE, reg);
|
||||
|
||||
high = HIGH32BITS(weight_address);
|
||||
low = LOW32BITS(weight_address);
|
||||
cdma_reg_write(D_WEIGHT_ADDR_HIGH, high);
|
||||
cdma_reg_write(D_WEIGHT_ADDR_LOW, low);
|
||||
cdma_reg_write(D_WEIGHT_BYTES, conv_surface->weight_data.size);
|
||||
|
||||
if (conv_op->weight_format == WEIGHT_FORMAT_COMPRESSED) {
|
||||
high = HIGH32BITS(wgs_address);
|
||||
low = LOW32BITS(wgs_address);
|
||||
cdma_reg_write(D_WGS_ADDR_HIGH, high);
|
||||
cdma_reg_write(D_WGS_ADDR_LOW, low);
|
||||
|
||||
high = HIGH32BITS(wmb_address);
|
||||
low = LOW32BITS(wmb_address);
|
||||
cdma_reg_write(D_WMB_ADDR_HIGH, high);
|
||||
cdma_reg_write(D_WMB_ADDR_LOW, low);
|
||||
cdma_reg_write(D_WMB_BYTES, conv_surface->wmb_data.size);
|
||||
}
|
||||
|
||||
reg = (map_mean[conv_op->mean_format]
|
||||
<< SHIFT(CDMA_D_MEAN_FORMAT_0, MEAN_FORMAT));
|
||||
cdma_reg_write(D_MEAN_FORMAT, reg);
|
||||
|
||||
if (conv_op->mean_format == MEAN_FORMAT_ENABLE) {
|
||||
reg = ((conv_op->mean_ry
|
||||
<< SHIFT(CDMA_D_MEAN_GLOBAL_0_0, MEAN_RY)) &
|
||||
MASK(CDMA_D_MEAN_GLOBAL_0_0, MEAN_RY)) |
|
||||
((conv_op->mean_gu
|
||||
<< SHIFT(CDMA_D_MEAN_GLOBAL_0_0, MEAN_GU)) &
|
||||
MASK(CDMA_D_MEAN_GLOBAL_0_0, MEAN_GU));
|
||||
cdma_reg_write(D_MEAN_GLOBAL_0, reg);
|
||||
|
||||
reg = ((conv_op->mean_bv
|
||||
<< SHIFT(CDMA_D_MEAN_GLOBAL_1_0, MEAN_BV))&
|
||||
MASK(CDMA_D_MEAN_GLOBAL_1_0, MEAN_BV)) |
|
||||
((conv_op->mean_ax
|
||||
<< SHIFT(CDMA_D_MEAN_GLOBAL_1_0, MEAN_AX))&
|
||||
MASK(CDMA_D_MEAN_GLOBAL_1_0, MEAN_AX));
|
||||
cdma_reg_write(D_MEAN_GLOBAL_1, reg);
|
||||
}
|
||||
|
||||
if (conv_op->in_cvt.enable) {
|
||||
reg = ((FIELD_ENUM(CDMA_D_CVT_CFG_0, CVT_EN, ENABLE))
|
||||
<< SHIFT(CDMA_D_CVT_CFG_0, CVT_EN)) |
|
||||
(conv_op->in_cvt.truncate
|
||||
<< SHIFT(CDMA_D_CVT_CFG_0, CVT_TRUNCATE));
|
||||
cdma_reg_write(D_CVT_CFG, reg);
|
||||
cdma_reg_write(D_CVT_OFFSET, conv_op->in_cvt.offset);
|
||||
cdma_reg_write(D_CVT_SCALE, conv_op->in_cvt.scale);
|
||||
} else {
|
||||
reg = ((FIELD_ENUM(CDMA_D_CVT_CFG_0, CVT_EN, DISABLE))
|
||||
<< SHIFT(CDMA_D_CVT_CFG_0, CVT_EN));
|
||||
cdma_reg_write(D_CVT_CFG, reg);
|
||||
}
|
||||
|
||||
reg = ((conv_op->conv_stride_x - 1)
|
||||
<< SHIFT(CDMA_D_CONV_STRIDE_0, CONV_X_STRIDE)) |
|
||||
((conv_op->conv_stride_y - 1)
|
||||
<< SHIFT(CDMA_D_CONV_STRIDE_0, CONV_Y_STRIDE));
|
||||
cdma_reg_write(D_CONV_STRIDE, reg);
|
||||
|
||||
reg = (conv_op->pad_x_left <<
|
||||
SHIFT(CDMA_D_ZERO_PADDING_0, PAD_LEFT)) |
|
||||
(conv_op->pad_x_right
|
||||
<< SHIFT(CDMA_D_ZERO_PADDING_0, PAD_RIGHT)) |
|
||||
(conv_op->pad_y_top
|
||||
<< SHIFT(CDMA_D_ZERO_PADDING_0, PAD_TOP)) |
|
||||
(conv_op->pad_y_bottom
|
||||
<< SHIFT(CDMA_D_ZERO_PADDING_0, PAD_BOTTOM));
|
||||
cdma_reg_write(D_ZERO_PADDING, reg);
|
||||
|
||||
reg = conv_op->pad_val <<
|
||||
SHIFT(CDMA_D_ZERO_PADDING_VALUE_0, PAD_VALUE) &
|
||||
MASK(CDMA_D_ZERO_PADDING_VALUE_0, PAD_VALUE);
|
||||
cdma_reg_write(D_ZERO_PADDING_VALUE, reg);
|
||||
reg = ((conv_op->weight_bank - 1)
|
||||
<< SHIFT(CDMA_D_BANK_0, WEIGHT_BANK)) |
|
||||
((conv_op->data_bank - 1)
|
||||
<< SHIFT(CDMA_D_BANK_0, DATA_BANK));
|
||||
cdma_reg_write(D_BANK, reg);
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
dla_conv_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
dla_conv_dump_config(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_conv_op_desc *conv_op;
|
||||
struct dla_conv_surface_desc *conv_surface;
|
||||
|
||||
conv_surface = &group->surface_desc->conv_surface;
|
||||
conv_op = &group->operation_desc->conv_op;
|
||||
|
||||
dla_debug_conv_surface_desc(conv_surface, group->roi_index);
|
||||
dla_debug_conv_op_desc(conv_op, group->roi_index);
|
||||
}
|
||||
|
||||
int
|
||||
dla_conv_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
ret = processor_conv_program(group);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
361
drivers/nvdla/dla_engine_internal.h
Normal file
361
drivers/nvdla/dla_engine_internal.h
Normal file
|
@ -0,0 +1,361 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __FIRMWARE_DLA_ENGINE_INTERNAL_H_
|
||||
#define __FIRMWARE_DLA_ENGINE_INTERNAL_H_
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_engine.h>
|
||||
#include <dla_interface.h>
|
||||
#include <dla_debug.h>
|
||||
|
||||
#include "nvdla_interface.h"
|
||||
|
||||
#define BITS(num, range) ((((0xFFFFFFFF >> (31 - (1 ? range))) & \
|
||||
(0xFFFFFFFF << (0 ? range))) & num) >> \
|
||||
(0 ? range))
|
||||
#define HIGH32BITS(val64bit) ((uint32_t)(val64bit >> 32))
|
||||
#define LOW32BITS(val64bit) ((uint32_t)(val64bit))
|
||||
|
||||
#ifdef MIN
|
||||
#undef MIN
|
||||
#endif /* MIN */
|
||||
|
||||
#ifdef MAX
|
||||
#undef MAX
|
||||
#endif /* MAX */
|
||||
|
||||
#define MIN(a, b) ((a) > (b) ? (b) : (a))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/*********************************************************/
|
||||
/******************** Utilities **************************/
|
||||
/*********************************************************/
|
||||
#ifdef DEBUG
|
||||
#define CHECK_ALIGN(val, align) assert((val&(align-1)) == 0)
|
||||
#else
|
||||
#define CHECK_ALIGN(val, align)
|
||||
#endif /* DEBUG */
|
||||
|
||||
#define MASK(reg, field) (reg##_##field##_FIELD)
|
||||
#define FIELD_ENUM(r, f, e) (r##_##f##_##e)
|
||||
#define SHIFT(reg, field) (reg##_##field##_SHIFT)
|
||||
|
||||
#define GLB_REG(name) GLB_##name##_0
|
||||
#define MCIF_REG(name) MCIF_##name##_0
|
||||
#define CVIF_REG(name) CVIF_##name##_0
|
||||
#define BDMA_REG(name) BDMA_##name##_0
|
||||
#define CDMA_REG(name) CDMA_##name##_0
|
||||
#define CSC_REG(name) CSC_##name##_0
|
||||
#define CMAC_A_REG(name) CMAC_A_##name##_0
|
||||
#define CMAC_B_REG(name) CMAC_B_##name##_0
|
||||
#define CACC_REG(name) CACC_##name##_0
|
||||
#define SDP_RDMA_REG(name) SDP_RDMA_##name##_0
|
||||
#define SDP_REG(name) SDP_##name##_0
|
||||
#define PDP_RDMA_REG(name) PDP_RDMA_##name##_0
|
||||
#define PDP_REG(name) PDP_##name##_0
|
||||
#define CDP_RDMA_REG(name) CDP_RDMA_##name##_0
|
||||
#define CDP_REG(name) CDP_##name##_0
|
||||
#define RBK_REG(name) RBK_##name##_0
|
||||
|
||||
/* alias for register read for each sub-module */
|
||||
#define glb_reg_read(reg) reg_read(GLB_REG(reg))
|
||||
#define bdma_reg_read(reg) reg_read(BDMA_REG(reg))
|
||||
#define cdma_reg_read(reg) reg_read(CDMA_REG(reg))
|
||||
#define csc_reg_read(reg) reg_read(CSC_REG(reg))
|
||||
#define cmac_a_reg_read(reg) reg_read(CMAC_A_REG(reg))
|
||||
#define cmac_b_reg_read(reg) reg_read(CMAC_B_REG(reg))
|
||||
#define cacc_reg_read(reg) reg_read(CACC_REG(reg))
|
||||
#define sdp_rdma_reg_read(reg) reg_read(SDP_RDMA_REG(reg))
|
||||
#define sdp_reg_read(reg) reg_read(SDP_REG(reg))
|
||||
#define pdp_rdma_reg_read(reg) reg_read(PDP_RDMA_REG(reg))
|
||||
#define pdp_reg_read(reg) reg_read(PDP_REG(reg))
|
||||
#define cdp_rdma_reg_read(reg) reg_read(CDP_RDMA_REG(reg))
|
||||
#define cdp_reg_read(reg) reg_read(CDP_REG(reg))
|
||||
#define rubik_reg_read(reg) reg_read(RBK_REG(reg))
|
||||
|
||||
/* alias for register write for each sub-module */
|
||||
#define glb_reg_write(reg, val) reg_write(GLB_REG(reg), val)
|
||||
#define bdma_reg_write(reg, val) reg_write(BDMA_REG(reg), val)
|
||||
#define cdma_reg_write(reg, val) reg_write(CDMA_REG(reg), val)
|
||||
#define csc_reg_write(reg, val) reg_write(CSC_REG(reg), val)
|
||||
#define cmac_a_reg_write(reg, val) reg_write(CMAC_A_REG(reg), val)
|
||||
#define cmac_b_reg_write(reg, val) reg_write(CMAC_B_REG(reg), val)
|
||||
#define cacc_reg_write(reg, val) reg_write(CACC_REG(reg), val)
|
||||
#define sdp_rdma_reg_write(reg, val) reg_write(SDP_RDMA_REG(reg), val)
|
||||
#define sdp_reg_write(reg, val) reg_write(SDP_REG(reg), val)
|
||||
#define pdp_rdma_reg_write(reg, val) reg_write(PDP_RDMA_REG(reg), val)
|
||||
#define pdp_reg_write(reg, val) reg_write(PDP_REG(reg), val)
|
||||
#define cdp_rdma_reg_write(reg, val) reg_write(CDP_RDMA_REG(reg), val)
|
||||
#define cdp_reg_write(reg, val) reg_write(CDP_REG(reg), val)
|
||||
#define rubik_reg_write(reg, val) reg_write(RBK_REG(reg), val)
|
||||
|
||||
void reg_write(uint32_t addr, uint32_t reg);
|
||||
uint32_t reg_read(uint32_t addr);
|
||||
|
||||
/**
|
||||
* Operation descriptor cache functions
|
||||
*/
|
||||
void
|
||||
dla_put_op_desc(struct dla_common_op_desc *op_desc);
|
||||
struct dla_common_op_desc
|
||||
*dla_get_op_desc(struct dla_task *task,
|
||||
int16_t index,
|
||||
uint8_t op_type,
|
||||
uint8_t roi_index);
|
||||
void
|
||||
dla_dump_op_desc(struct dla_common_op_desc *desc);
|
||||
void
|
||||
dla_get_refcount(struct dla_common_op_desc *op_desc);
|
||||
void
|
||||
dla_init_op_cache(struct dla_engine *engine);
|
||||
|
||||
/**
|
||||
* Operation completion handler
|
||||
*/
|
||||
int
|
||||
dla_op_completion(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
|
||||
int32_t
|
||||
dla_read_lut(struct dla_engine *engine, int16_t index, void *dst);
|
||||
int
|
||||
dla_enable_intr(uint32_t mask);
|
||||
int
|
||||
dla_disable_intr(uint32_t mask);
|
||||
int
|
||||
utils_get_free_group(struct dla_processor *processor,
|
||||
uint8_t *group_id,
|
||||
uint8_t *rdma_id);
|
||||
int32_t
|
||||
dla_get_dma_cube_address(void *driver_context,
|
||||
void *task_data,
|
||||
int16_t index,
|
||||
uint32_t offset,
|
||||
void *dst_ptr,
|
||||
uint32_t destination);
|
||||
int
|
||||
dla_read_input_address(struct dla_data_cube *data,
|
||||
uint64_t *address,
|
||||
int16_t op_index,
|
||||
uint8_t roi_index,
|
||||
uint8_t bpp);
|
||||
|
||||
/**
|
||||
* BDMA operations
|
||||
*/
|
||||
void
|
||||
dla_bdma_set_producer(int32_t group_id, int32_t rdma_group_id);
|
||||
int
|
||||
dla_bdma_enable(struct dla_processor_group *group);
|
||||
int
|
||||
dla_bdma_program(struct dla_processor_group *group);
|
||||
int
|
||||
dla_bdma_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_bdma_dump_config(struct dla_processor_group *group);
|
||||
void
|
||||
dla_bdma_rdma_check(struct dla_processor_group *group);
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_bdma_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_bdma_dump_stat(struct dla_processor *processor);
|
||||
|
||||
#else
|
||||
static inline void
|
||||
dla_bdma_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group) {}
|
||||
static inline void
|
||||
dla_bdma_dump_stat(struct dla_processor *processor) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Convolution operations
|
||||
*/
|
||||
void
|
||||
dla_conv_set_producer(int32_t group_id, int32_t rdma_group_id);
|
||||
int
|
||||
dla_conv_enable(struct dla_processor_group *group);
|
||||
int
|
||||
dla_conv_program(struct dla_processor_group *group);
|
||||
int
|
||||
dla_conv_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_conv_dump_config(struct dla_processor_group *group);
|
||||
void
|
||||
dla_conv_rdma_check(struct dla_processor_group *group);
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_conv_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_conv_dump_stat(struct dla_processor *processor);
|
||||
|
||||
#else
|
||||
static inline void
|
||||
dla_conv_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group) {}
|
||||
static inline void
|
||||
dla_conv_dump_stat(struct dla_processor *processor) {}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
/**
|
||||
* SDP operations
|
||||
*/
|
||||
void
|
||||
dla_sdp_set_producer(int32_t group_id, int32_t rdma_group_id);
|
||||
int
|
||||
dla_sdp_enable(struct dla_processor_group *group);
|
||||
int
|
||||
dla_sdp_program(struct dla_processor_group *group);
|
||||
int
|
||||
dla_sdp_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_sdp_dump_config(struct dla_processor_group *group);
|
||||
void
|
||||
dla_sdp_rdma_check(struct dla_processor_group *group);
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_sdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_sdp_dump_stat(struct dla_processor *processor);
|
||||
|
||||
#else
|
||||
static inline void
|
||||
dla_sdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group) {}
|
||||
static inline void
|
||||
dla_sdp_dump_stat(struct dla_processor *processor) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* PDP operations
|
||||
*/
|
||||
void
|
||||
dla_pdp_set_producer(int32_t group_id, int32_t rdma_group_id);
|
||||
int
|
||||
dla_pdp_enable(struct dla_processor_group *group);
|
||||
int
|
||||
dla_pdp_program(struct dla_processor_group *group);
|
||||
int
|
||||
dla_pdp_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_pdp_dump_config(struct dla_processor_group *group);
|
||||
void
|
||||
dla_pdp_rdma_check(struct dla_processor_group *group);
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_pdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_pdp_dump_stat(struct dla_processor *processor);
|
||||
|
||||
#else
|
||||
static inline void
|
||||
dla_pdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group) {}
|
||||
static inline void
|
||||
dla_pdp_dump_stat(struct dla_processor *processor) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* CDP operations
|
||||
*/
|
||||
void
|
||||
dla_cdp_set_producer(int32_t group_id, int32_t rdma_group_id);
|
||||
int
|
||||
dla_cdp_enable(struct dla_processor_group *group);
|
||||
int
|
||||
dla_cdp_program(struct dla_processor_group *group);
|
||||
int
|
||||
dla_cdp_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_cdp_dump_config(struct dla_processor_group *group);
|
||||
void
|
||||
dla_cdp_rdma_check(struct dla_processor_group *group);
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_cdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_cdp_dump_stat(struct dla_processor *processor);
|
||||
|
||||
#else
|
||||
static inline void
|
||||
dla_cdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group) {}
|
||||
static inline void
|
||||
dla_cdp_dump_stat(struct dla_processor *processor) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* RUBIK operations
|
||||
*/
|
||||
void
|
||||
dla_rubik_set_producer(int32_t group_id, int32_t rdma_group_id);
|
||||
int
|
||||
dla_rubik_enable(struct dla_processor_group *group);
|
||||
int
|
||||
dla_rubik_program(struct dla_processor_group *group);
|
||||
int
|
||||
dla_rubik_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_rubik_dump_config(struct dla_processor_group *group);
|
||||
void
|
||||
dla_rubik_rdma_check(struct dla_processor_group *group);
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_rubik_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void
|
||||
dla_rubik_dump_stat(struct dla_processor *processor);
|
||||
|
||||
#else
|
||||
static inline void
|
||||
dla_rubik_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group) {}
|
||||
static inline void
|
||||
dla_rubik_dump_stat(struct dla_processor *processor) {}
|
||||
#endif
|
||||
|
||||
#endif
|
262
drivers/nvdla/engine.c
Normal file
262
drivers/nvdla/engine.c
Normal file
|
@ -0,0 +1,262 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "dla_engine_internal.h"
|
||||
#include "common.h"
|
||||
|
||||
static const uint32_t map_rdma_ptr_addr[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFFF,
|
||||
SDP_REG(RDMA_S_POINTER),
|
||||
PDP_REG(RDMA_S_POINTER),
|
||||
CDP_REG(RDMA_S_POINTER),
|
||||
0xFFFFFFFF,
|
||||
};
|
||||
|
||||
static const uint32_t map_sts_addr[] = {
|
||||
BDMA_REG(STATUS),
|
||||
CACC_REG(S_STATUS),
|
||||
SDP_REG(S_STATUS),
|
||||
PDP_REG(S_STATUS),
|
||||
CDP_REG(S_STATUS),
|
||||
RBK_REG(S_STATUS),
|
||||
};
|
||||
|
||||
static const uint32_t map_ptr_addr[] = {
|
||||
BDMA_REG(STATUS),
|
||||
CACC_REG(S_POINTER),
|
||||
SDP_REG(S_POINTER),
|
||||
PDP_REG(S_POINTER),
|
||||
CDP_REG(S_POINTER),
|
||||
RBK_REG(S_POINTER),
|
||||
};
|
||||
|
||||
int32_t dla_enable_intr(uint32_t mask)
|
||||
{
|
||||
uint32_t reg = glb_reg_read(S_INTR_MASK);
|
||||
|
||||
reg = reg & (~mask);
|
||||
glb_reg_write(S_INTR_MASK, reg);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
int32_t dla_disable_intr(uint32_t mask)
|
||||
{
|
||||
uint32_t reg = glb_reg_read(S_INTR_MASK);
|
||||
|
||||
reg = reg | mask;
|
||||
glb_reg_write(S_INTR_MASK, reg);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
uint8_t bdma_grp_sts[2] = {
|
||||
FIELD_ENUM(BDMA_STATUS_0, IDLE, YES),
|
||||
FIELD_ENUM(BDMA_STATUS_0, IDLE, YES)
|
||||
};
|
||||
|
||||
struct dla_roi_desc roi_desc;
|
||||
|
||||
/**
|
||||
* Get DMA data cube address
|
||||
*/
|
||||
int32_t
|
||||
dla_get_dma_cube_address(void *driver_context, void *task_data,
|
||||
int16_t index, uint32_t offset, void *dst_ptr,
|
||||
uint32_t destination)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint64_t *pdst = (uint64_t *)dst_ptr;
|
||||
ret = dla_get_dma_address(driver_context, task_data, index,
|
||||
dst_ptr, destination);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
pdst[0] += offset;
|
||||
|
||||
exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read input buffer address
|
||||
*
|
||||
* For input layer, in case of static ROI this address is read
|
||||
* from address list and index is specified in data cube. In case
|
||||
* dynamic ROI, it has to be read depending on ROI information
|
||||
* and using surface address
|
||||
*
|
||||
* For all other layers, this address is read from address list
|
||||
* using index specified in data cube
|
||||
*/
|
||||
int
|
||||
dla_read_input_address(struct dla_data_cube *data,
|
||||
uint64_t *address,
|
||||
int16_t op_index,
|
||||
uint8_t roi_index,
|
||||
uint8_t bpp)
|
||||
{
|
||||
uint64_t roi_desc_addr;
|
||||
int32_t ret = ERR(INVALID_INPUT);
|
||||
struct dla_engine *en = dla_get_engine();
|
||||
|
||||
/**
|
||||
* If memory type is HW then no address required
|
||||
*/
|
||||
if (data->type == DLA_MEM_HW) {
|
||||
ret = 0;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/**
|
||||
* If address list index is not -1 means this address has to
|
||||
* be read from address list
|
||||
*/
|
||||
if (data->address != -1) {
|
||||
|
||||
/**
|
||||
* But if other parameters indicate that this is input layer
|
||||
* for dynamic ROI then it is an error
|
||||
*/
|
||||
if (en->network->dynamic_roi &&
|
||||
en->network->input_layer == op_index)
|
||||
goto exit;
|
||||
ret = dla_get_dma_cube_address(en->driver_context,
|
||||
en->task->task_data,
|
||||
data->address,
|
||||
data->offset,
|
||||
(void *)address,
|
||||
DESTINATION_DMA);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if it is dynamic ROI and this is input layer
|
||||
*/
|
||||
if (en->network->dynamic_roi && en->network->input_layer == op_index) {
|
||||
if (!en->task->surface_addr)
|
||||
goto exit;
|
||||
|
||||
/* Calculate address of ROI descriptor in array */
|
||||
roi_desc_addr = en->task->roi_array_addr;
|
||||
|
||||
/* Read ROI descriptor */
|
||||
ret = dla_data_read(en->driver_context,
|
||||
en->task->task_data,
|
||||
roi_desc_addr,
|
||||
(void *)&roi_desc,
|
||||
sizeof(roi_desc),
|
||||
sizeof(struct dla_roi_array_desc) +
|
||||
roi_index * sizeof(struct dla_roi_desc));
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
/* Calculate ROI address */
|
||||
*address = en->task->surface_addr;
|
||||
*address += (roi_desc.top * data->line_stride) +
|
||||
(bpp * roi_desc.left);
|
||||
}
|
||||
|
||||
exit:
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
utils_get_free_group(struct dla_processor *processor,
|
||||
uint8_t *group_id,
|
||||
uint8_t *rdma_id)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint32_t pointer;
|
||||
uint32_t hw_consumer_ptr;
|
||||
uint32_t hw_rdma_ptr;
|
||||
|
||||
hw_rdma_ptr = 0;
|
||||
|
||||
if (processor->op_type == DLA_OP_BDMA) {
|
||||
pointer = reg_read(map_ptr_addr[processor->op_type]);
|
||||
hw_consumer_ptr = ((pointer & MASK(BDMA_STATUS_0, GRP0_BUSY)) >>
|
||||
SHIFT(BDMA_STATUS_0, GRP0_BUSY)) ==
|
||||
FIELD_ENUM(BDMA_STATUS_0, GRP0_BUSY, YES) ?
|
||||
1 : 0;
|
||||
} else {
|
||||
pointer = reg_read(map_ptr_addr[processor->op_type]);
|
||||
hw_consumer_ptr = (pointer & MASK(CDP_S_POINTER_0, CONSUMER)) >>
|
||||
SHIFT(CDP_S_POINTER_0, CONSUMER);
|
||||
|
||||
/**
|
||||
* Read current consumer pointer for RDMA only if processor
|
||||
* has RDMA module
|
||||
*/
|
||||
if (map_rdma_ptr_addr[processor->op_type] != 0xFFFFFFFF) {
|
||||
pointer =
|
||||
reg_read(map_rdma_ptr_addr[processor->op_type]);
|
||||
hw_rdma_ptr = (pointer &
|
||||
MASK(CDP_S_POINTER_0, CONSUMER)) >>
|
||||
SHIFT(CDP_S_POINTER_0, CONSUMER);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If both processors are programmed then exit
|
||||
*/
|
||||
if (processor->group_status == 0x3) {
|
||||
ret = ERR(PROCESSOR_BUSY);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (!processor->group_status)
|
||||
/**
|
||||
* If both groups are idle then use consumer pointer
|
||||
*/
|
||||
*group_id = hw_consumer_ptr;
|
||||
else
|
||||
/**
|
||||
* Here it is assumed that only one group is idle or busy
|
||||
* and hence right shift will work to get correct
|
||||
* group id
|
||||
*/
|
||||
*group_id = !(processor->group_status >> 1);
|
||||
|
||||
/**
|
||||
* If both groups are idle then read group id from pointer
|
||||
*/
|
||||
if (!processor->rdma_status)
|
||||
*rdma_id = hw_rdma_ptr;
|
||||
else
|
||||
*rdma_id = !(processor->rdma_status >> 1);
|
||||
|
||||
exit:
|
||||
RETURN(ret);
|
||||
}
|
303
drivers/nvdla/engine_data.c
Normal file
303
drivers/nvdla/engine_data.c
Normal file
|
@ -0,0 +1,303 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <nvdla_interface.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "dla_engine_internal.h"
|
||||
|
||||
static union dla_operation_container operation_desc[DLA_OP_NUM][DLA_NUM_GROUPS];
|
||||
static union dla_surface_container surface_desc[DLA_OP_NUM][DLA_NUM_GROUPS];
|
||||
|
||||
static struct dla_task global_task;
|
||||
|
||||
static struct dla_engine engine = {
|
||||
.processors[DLA_OP_BDMA] = {
|
||||
.name = "BDMA",
|
||||
.op_type = DLA_OP_BDMA,
|
||||
.program = dla_bdma_program,
|
||||
.enable = dla_bdma_enable,
|
||||
.set_producer = dla_bdma_set_producer,
|
||||
.is_ready = dla_bdma_is_ready,
|
||||
.dump_config = dla_bdma_dump_config,
|
||||
.rdma_check = dla_bdma_rdma_check,
|
||||
.get_stat_data = dla_bdma_stat_data,
|
||||
.dump_stat = dla_bdma_dump_stat,
|
||||
.consumer_ptr = 0,
|
||||
.roi_index = 0,
|
||||
.group_status = 0,
|
||||
.rdma_status = 0,
|
||||
.last_group = 1,
|
||||
.groups[0] = {
|
||||
.id = 0,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_BDMA][0],
|
||||
.surface_desc = &surface_desc[DLA_OP_BDMA][0],
|
||||
},
|
||||
.groups[1] = {
|
||||
.id = 1,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_BDMA][1],
|
||||
.surface_desc = &surface_desc[DLA_OP_BDMA][1],
|
||||
},
|
||||
},
|
||||
.processors[DLA_OP_CONV] = {
|
||||
.name = "Convolution",
|
||||
.op_type = DLA_OP_CONV,
|
||||
.program = dla_conv_program,
|
||||
.enable = dla_conv_enable,
|
||||
.set_producer = dla_conv_set_producer,
|
||||
.is_ready = dla_conv_is_ready,
|
||||
.dump_config = dla_conv_dump_config,
|
||||
.rdma_check = dla_conv_rdma_check,
|
||||
.get_stat_data = dla_conv_stat_data,
|
||||
.dump_stat = dla_conv_dump_stat,
|
||||
.consumer_ptr = 0,
|
||||
.roi_index = 0,
|
||||
.group_status = 0,
|
||||
.rdma_status = 0,
|
||||
.last_group = 1,
|
||||
.groups[0] = {
|
||||
.id = 0,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_CONV][0],
|
||||
.surface_desc = &surface_desc[DLA_OP_CONV][0],
|
||||
},
|
||||
.groups[1] = {
|
||||
.id = 1,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_CONV][1],
|
||||
.surface_desc = &surface_desc[DLA_OP_CONV][1],
|
||||
},
|
||||
},
|
||||
.processors[DLA_OP_SDP] = {
|
||||
.name = "SDP",
|
||||
.op_type = DLA_OP_SDP,
|
||||
.program = dla_sdp_program,
|
||||
.enable = dla_sdp_enable,
|
||||
.set_producer = dla_sdp_set_producer,
|
||||
.is_ready = dla_sdp_is_ready,
|
||||
.dump_config = dla_sdp_dump_config,
|
||||
.rdma_check = dla_sdp_rdma_check,
|
||||
.get_stat_data = dla_sdp_stat_data,
|
||||
.dump_stat = dla_sdp_dump_stat,
|
||||
.consumer_ptr = 0,
|
||||
.roi_index = 0,
|
||||
.group_status = 0,
|
||||
.rdma_status = 0,
|
||||
.last_group = 1,
|
||||
.groups[0] = {
|
||||
.id = 0,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_SDP][0],
|
||||
.surface_desc = &surface_desc[DLA_OP_SDP][0],
|
||||
},
|
||||
.groups[1] = {
|
||||
.id = 1,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_SDP][1],
|
||||
.surface_desc = &surface_desc[DLA_OP_SDP][1],
|
||||
},
|
||||
},
|
||||
.processors[DLA_OP_PDP] = {
|
||||
.name = "PDP",
|
||||
.op_type = DLA_OP_PDP,
|
||||
.program = dla_pdp_program,
|
||||
.enable = dla_pdp_enable,
|
||||
.set_producer = dla_pdp_set_producer,
|
||||
.is_ready = dla_pdp_is_ready,
|
||||
.dump_config = dla_pdp_dump_config,
|
||||
.rdma_check = dla_pdp_rdma_check,
|
||||
.get_stat_data = dla_pdp_stat_data,
|
||||
.dump_stat = dla_pdp_dump_stat,
|
||||
.consumer_ptr = 0,
|
||||
.roi_index = 0,
|
||||
.group_status = 0,
|
||||
.rdma_status = 0,
|
||||
.last_group = 1,
|
||||
.groups[0] = {
|
||||
.id = 0,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_PDP][0],
|
||||
.surface_desc = &surface_desc[DLA_OP_PDP][0],
|
||||
},
|
||||
.groups[1] = {
|
||||
.id = 1,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_PDP][1],
|
||||
.surface_desc = &surface_desc[DLA_OP_PDP][1],
|
||||
},
|
||||
},
|
||||
.processors[DLA_OP_CDP] = {
|
||||
.name = "CDP",
|
||||
.op_type = DLA_OP_CDP,
|
||||
.program = dla_cdp_program,
|
||||
.enable = dla_cdp_enable,
|
||||
.set_producer = dla_cdp_set_producer,
|
||||
.is_ready = dla_cdp_is_ready,
|
||||
.dump_config = dla_cdp_dump_config,
|
||||
.rdma_check = dla_cdp_rdma_check,
|
||||
.get_stat_data = dla_cdp_stat_data,
|
||||
.dump_stat = dla_cdp_dump_stat,
|
||||
.consumer_ptr = 0,
|
||||
.roi_index = 0,
|
||||
.group_status = 0,
|
||||
.rdma_status = 0,
|
||||
.last_group = 1,
|
||||
.groups[0] = {
|
||||
.id = 0,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_CDP][0],
|
||||
.surface_desc = &surface_desc[DLA_OP_CDP][0],
|
||||
},
|
||||
.groups[1] = {
|
||||
.id = 1,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_CDP][1],
|
||||
.surface_desc = &surface_desc[DLA_OP_CDP][1],
|
||||
},
|
||||
},
|
||||
|
||||
.processors[DLA_OP_RUBIK] = {
|
||||
.name = "RUBIK",
|
||||
.op_type = DLA_OP_RUBIK,
|
||||
.program = dla_rubik_program,
|
||||
.enable = dla_rubik_enable,
|
||||
.set_producer = dla_rubik_set_producer,
|
||||
.is_ready = dla_rubik_is_ready,
|
||||
.dump_config = dla_rubik_dump_config,
|
||||
.rdma_check = dla_rubik_rdma_check,
|
||||
.get_stat_data = dla_rubik_stat_data,
|
||||
.dump_stat = dla_rubik_dump_stat,
|
||||
.consumer_ptr = 0,
|
||||
.roi_index = 0,
|
||||
.group_status = 0,
|
||||
.rdma_status = 0,
|
||||
.last_group = 1,
|
||||
.groups[0] = {
|
||||
.id = 0,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_RUBIK][0],
|
||||
.surface_desc = &surface_desc[DLA_OP_RUBIK][0],
|
||||
},
|
||||
.groups[1] = {
|
||||
.id = 1,
|
||||
.rdma_id = 0,
|
||||
.active = 0,
|
||||
.events = 0,
|
||||
.roi_index = 0,
|
||||
.is_rdma_needed = 0,
|
||||
.lut_index = -1,
|
||||
.operation_desc = &operation_desc[DLA_OP_RUBIK][1],
|
||||
.surface_desc = &surface_desc[DLA_OP_RUBIK][1],
|
||||
},
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
struct dla_engine *dla_get_engine(void)
|
||||
{
|
||||
return &engine;
|
||||
}
|
||||
|
||||
int32_t dla_register_driver(void **engine_context, void *driver_context)
|
||||
{
|
||||
*engine_context = &engine;
|
||||
engine.task = &global_task;
|
||||
engine.driver_context = driver_context;
|
||||
engine.task->task_data = NULL;
|
||||
|
||||
dla_init_op_cache(&engine);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
uint32_t reg_read(uint32_t addr)
|
||||
{
|
||||
return dla_reg_read(engine.driver_context, addr);
|
||||
}
|
||||
|
||||
void reg_write(uint32_t addr, uint32_t reg)
|
||||
{
|
||||
dla_reg_write(engine.driver_context, addr, reg);
|
||||
}
|
551
drivers/nvdla/engine_debug.c
Normal file
551
drivers/nvdla/engine_debug.c
Normal file
|
@ -0,0 +1,551 @@
|
|||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <dla_debug.h>
|
||||
#include <dla_interface.h>
|
||||
#include <dla_sched.h>
|
||||
|
||||
#include "engine_debug.h"
|
||||
|
||||
#if DEBUG_NETWORK_DATA
|
||||
|
||||
void
|
||||
dla_debug_network_desc(struct dla_network_desc *nd)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW dla_network_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("op desc index = %d\n", nd->operation_desc_index);
|
||||
dla_debug("surface desc index = %d\n", nd->surface_desc_index);
|
||||
dla_debug("dep graph index = %d\n", nd->dependency_graph_index);
|
||||
dla_debug("lut data index = %d\n", nd->lut_data_index);
|
||||
dla_debug("stat_list_index = %d\n", nd->stat_list_index);
|
||||
dla_debug("roi array index = %d\n", nd->roi_array_index);
|
||||
dla_debug("surface index = %d\n", nd->surface_index);
|
||||
dla_debug("num rois = %u\n", nd->num_rois);
|
||||
dla_debug("num ops = %u\n", nd->num_operations);
|
||||
dla_debug("num luts = %u\n", nd->num_luts);
|
||||
dla_debug("num addr = %u\n", nd->num_addresses);
|
||||
dla_debug("input layer = %u\n", nd->input_layer);
|
||||
dla_debug("dynamic roi = %u\n", nd->dynamic_roi);
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_bdma_transfer(struct dla_bdma_transfer_desc *tr, int32_t id)
|
||||
{
|
||||
dla_debug("transfer[%d] = [ dla_bdma_transfer_desc =>\n", id);
|
||||
dla_debug(" source_address = %x\n", tr->source_address);
|
||||
dla_debug(" destination_address = %x\n", tr->destination_address);
|
||||
dla_debug(" line_size = %x\n", tr->line_size);
|
||||
dla_debug(" line_repeat = %x\n", tr->line_repeat);
|
||||
dla_debug(" source_line = %x\n", tr->source_line);
|
||||
dla_debug(" destination_line = %x\n", tr->destination_line);
|
||||
dla_debug(" surface_repeat = %x\n", tr->surface_repeat);
|
||||
dla_debug(" source_surface = %x\n", tr->source_surface);
|
||||
dla_debug(" destination_surface = %x\n", tr->destination_surface);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_bdma_surface_desc(struct dla_bdma_surface_desc *desc, int32_t roi)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_bdma_surface_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("source_type = %u\n", desc->source_type);
|
||||
dla_debug("destination_type = %u\n", desc->destination_type);
|
||||
dla_debug("num_transfers = %u\n", desc->num_transfers);
|
||||
for (i = 0; i < desc->num_transfers; i++)
|
||||
dla_debug_bdma_transfer(&desc->transfers[i], i);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_bdma_op_desc(struct dla_bdma_op_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_bdma_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("num_transfers = %u\n", desc->num_transfers);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_address_info(struct dla_task *tk)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW address list\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("task base address = %llu\n", tk->base);
|
||||
dla_debug("op desc address = %llu\n", tk->operation_desc_addr);
|
||||
dla_debug("surface desc address = %llu\n", tk->surface_desc_addr);
|
||||
dla_debug("dependency graph address = %llu\n", tk->dependency_graph_addr);
|
||||
dla_debug("LUT data address = %llu\n", tk->lut_data_addr);
|
||||
dla_debug("stat address = %llu\n", tk->stat_data_addr);
|
||||
dla_debug("ROI array address = %llu\n", tk->roi_array_addr);
|
||||
dla_debug("surface address = %llu\n", tk->surface_addr);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_op_desc(struct dla_common_op_desc *desc, int32_t roi)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_common_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("[%p] Operation index %d ROI %d dep_count %d type %d\n",
|
||||
(unsigned int *)desc, desc->index, desc->roi_index,
|
||||
desc->dependency_count, desc->op_type);
|
||||
dla_debug("consumers = [ dla_consumer =>\n");
|
||||
for (i = 0; i < DLA_OP_NUM; i++)
|
||||
dla_debug(" [ %d %d ]", desc->consumers[i].index,
|
||||
desc->consumers[i].event);
|
||||
dla_debug("]");
|
||||
dla_debug("fused_parent = [ dla_consumer =>\n");
|
||||
dla_debug(" [ %d %d ]", desc->fused_parent.index,
|
||||
desc->fused_parent.event);
|
||||
dla_debug("]");
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_data_cube(struct dla_data_cube *cube)
|
||||
{
|
||||
dla_debug(" type = %u\n", cube->type);
|
||||
dla_debug(" address = %d\n", cube->address);
|
||||
dla_debug(" width = %x\n", cube->width);
|
||||
dla_debug(" height = %x\n", cube->height);
|
||||
dla_debug(" channel = %x\n", cube->channel);
|
||||
dla_debug(" size = %u\n", cube->size);
|
||||
dla_debug(" line_stride = %u\n", cube->line_stride);
|
||||
dla_debug(" surf_stride = %u\n", cube->surf_stride);
|
||||
dla_debug(" plane_stride = %u\n", cube->plane_stride);
|
||||
dla_debug("]");
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_converter(struct dla_cvt_param *cvt)
|
||||
{
|
||||
dla_debug("[ scale = %d, truncate = %u, enable = %u, offset = %d ]\n",
|
||||
cvt->scale, cvt->truncate, cvt->enable, cvt->offset);
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_float_data(struct dla_float_data *float_data)
|
||||
{
|
||||
dla_debug("[ scale = %d, shifter = %d ]\n",
|
||||
float_data->scale, float_data->shifter);
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_dla_slope(union dla_slope *slope)
|
||||
{
|
||||
dla_debug(" data_i =\n");
|
||||
dla_debug_float_data(&slope->data_i);
|
||||
dla_debug(" data_f = %u\n", slope->data_f);
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_lut_offset(union dla_lut_offset *offset)
|
||||
{
|
||||
dla_debug(" exp_offset = %d\n", offset->exp_offset);
|
||||
dla_debug(" frac_bits = %d\n", offset->frac_bits);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_lut_params(struct dla_lut_param *lut_param)
|
||||
{
|
||||
int32_t i, j;
|
||||
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW dla_lut_param\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
|
||||
dla_debug("linear_exp_table = [\n");
|
||||
for (i = 0; i < (1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1; i++)
|
||||
dla_debug(" %u", lut_param->linear_exp_table[i]);
|
||||
dla_debug("]");
|
||||
|
||||
dla_debug("linear_only_table = [\n");
|
||||
for (j = 0; j < (1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1; j++)
|
||||
dla_debug(" %u\n", lut_param->linear_only_table[j]);
|
||||
dla_debug("]\n");
|
||||
|
||||
dla_debug("linear_exp_offset =\n");
|
||||
dla_debug_lut_offset(&lut_param->linear_exp_offset);
|
||||
dla_debug("linear_only_offset =\n");
|
||||
dla_debug_lut_offset(&lut_param->linear_only_offset);
|
||||
dla_debug("linear_exp_start = %llu\n",
|
||||
lut_param->linear_exp_start);
|
||||
dla_debug("linear_exp_end = %llu\n",
|
||||
lut_param->linear_exp_end);
|
||||
dla_debug("linear_only_start = %llu\n",
|
||||
lut_param->linear_only_start);
|
||||
dla_debug("linear_only_end = %llu\n",
|
||||
lut_param->linear_only_end);
|
||||
dla_debug("linear_exp_underflow_slope =\n");
|
||||
dla_debug_dla_slope(&lut_param->linear_exp_underflow_slope);
|
||||
dla_debug("linear_exp_overflow_slope =\n");
|
||||
dla_debug_dla_slope(&lut_param->linear_exp_overflow_slope);
|
||||
dla_debug("linear_only_underflow_slope =\n");
|
||||
dla_debug_dla_slope(&lut_param->linear_only_underflow_slope);
|
||||
dla_debug("linear_only_overflow_slope =\n");
|
||||
dla_debug_dla_slope(&lut_param->linear_only_overflow_slope);
|
||||
dla_debug("hybrid_priority = %u\n",
|
||||
lut_param->hybrid_priority);
|
||||
dla_debug("underflow_priority = %u\n",
|
||||
lut_param->underflow_priority);
|
||||
dla_debug("overflow_priority = %u\n",
|
||||
lut_param->overflow_priority);
|
||||
dla_debug("method = %u\n",
|
||||
lut_param->method);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_bdma_stats(struct dla_bdma_stat_desc *stat)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW STATS: dla_bdma_stat_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("read_stall = %u\n", stat->read_stall);
|
||||
dla_debug("write_stall = %u\n", stat->write_stall);
|
||||
dla_debug("runtime = %u\n", stat->runtime);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_conv_surface_desc(struct dla_conv_surface_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_conv_surface_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("weight_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->weight_data);
|
||||
dla_debug("wmb_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->wmb_data);
|
||||
dla_debug("wgs_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->wgs_data);
|
||||
dla_debug("src_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->src_data);
|
||||
dla_debug("dst_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->dst_data);
|
||||
dla_debug("offset_u = %lld\n", desc->offset_u);
|
||||
dla_debug("in_line_uv_stride = %u\n", desc->in_line_uv_stride);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_conv_op_desc(struct dla_conv_op_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_conv_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("conv_mode = %u\n", desc->conv_mode);
|
||||
dla_debug("data_reuse = %u\n", desc->data_reuse);
|
||||
dla_debug("weight_reuse = %u\n", desc->weight_reuse);
|
||||
dla_debug("skip_data_rls = %u\n", desc->skip_data_rls);
|
||||
dla_debug("skip_weight_rls = %u\n", desc->skip_weight_rls);
|
||||
dla_debug("entry_per_slice = %u\n", desc->entry_per_slice);
|
||||
dla_debug("data_format = %u\n", desc->data_format);
|
||||
dla_debug("pixel_mapping = %u\n", desc->pixel_mapping);
|
||||
dla_debug("fetch_grain = %u\n", desc->fetch_grain);
|
||||
dla_debug("batch = %u\n", desc->batch);
|
||||
dla_debug("weight_format = %u\n", desc->weight_format);
|
||||
dla_debug("data_bank = %u\n", desc->data_bank);
|
||||
dla_debug("weight_bank = %u\n", desc->weight_bank);
|
||||
dla_debug("batch_stride = %u\n", desc->batch_stride);
|
||||
dla_debug("post_extension = %u\n", desc->post_extension);
|
||||
dla_debug("pixel_override = %u\n", desc->pixel_override);
|
||||
dla_debug("release = %u\n", desc->release);
|
||||
dla_debug("input_width_csc = %u\n", desc->input_width_csc);
|
||||
dla_debug("input_height_csc = %u\n", desc->input_height_csc);
|
||||
dla_debug("input_channel_csc = %u\n", desc->input_channel_csc);
|
||||
dla_debug("kernel_width_csc = %u\n", desc->kernel_width_csc);
|
||||
dla_debug("kernel_height_csc = %u\n", desc->kernel_height_csc);
|
||||
dla_debug("kernel_channel_csc = %u\n", desc->kernel_channel_csc);
|
||||
dla_debug("input_width_cmac = %u\n", desc->input_width_cmac);
|
||||
dla_debug("input_height_cmac = %u\n", desc->input_height_cmac);
|
||||
dla_debug("bytes_per_kernel = %u\n", desc->bytes_per_kernel);
|
||||
dla_debug("mean_ry = %d\n", desc->mean_ry);
|
||||
dla_debug("mean_gu = %d\n", desc->mean_gu);
|
||||
dla_debug("mean_bv = %d\n", desc->mean_bv);
|
||||
dla_debug("mean_ax = %d\n", desc->mean_ax);
|
||||
dla_debug("mean_format = %u\n", desc->mean_format);
|
||||
dla_debug("conv_stride_x = %u\n", desc->conv_stride_x);
|
||||
dla_debug("conv_stride_y = %u\n", desc->conv_stride_y);
|
||||
dla_debug("pad_x_left = %u\n", desc->pad_x_left);
|
||||
dla_debug("pad_x_right = %u\n", desc->pad_x_right);
|
||||
dla_debug("pad_y_top = %u\n", desc->pad_y_top);
|
||||
dla_debug("pad_y_bottom = %u\n", desc->pad_y_bottom);
|
||||
dla_debug("dilation_x = %u\n", desc->dilation_x);
|
||||
dla_debug("dilation_y = %u\n", desc->dilation_y);
|
||||
dla_debug("pra_truncate = %u\n", desc->pra_truncate);
|
||||
dla_debug("in_precision = %u\n", desc->in_precision);
|
||||
dla_debug("out_precision = %u\n", desc->out_precision);
|
||||
dla_debug("pad_val = %d\n", desc->pad_val);
|
||||
dla_debug("in_cvt =\n");
|
||||
dla_debug_converter(&desc->in_cvt);
|
||||
dla_debug("out_cvt =\n");
|
||||
dla_debug_converter(&desc->out_cvt);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_conv_stats(struct dla_conv_stat_desc *stat)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW STATS: dla_conv_stat_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("data_read_stall = %u\n", stat->data_read_stall);
|
||||
dla_debug("weight_read_stall = %u\n", stat->weight_read_stall);
|
||||
dla_debug("data_read_latency = %u\n", stat->data_read_latency);
|
||||
dla_debug("weight_read_latency = %u\n", stat->weight_read_latency);
|
||||
dla_debug("saturation_count = %u\n", stat->saturation_count);
|
||||
dla_debug("nan_data_num = %u\n", stat->nan_data_num);
|
||||
dla_debug("nan_weight_num = %u\n", stat->nan_weight_num);
|
||||
dla_debug("inf_data_num = %u\n", stat->inf_data_num);
|
||||
dla_debug("inf_weight_num = %u\n", stat->inf_weight_num);
|
||||
dla_debug("runtime = %u\n", stat->runtime);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_pdp_surface_desc(struct dla_pdp_surface_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_pdp_surface_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("src_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->src_data);
|
||||
dla_debug("dst_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->dst_data);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_pdp_op_desc(struct dla_pdp_op_desc *desc, int32_t roi)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_pdp_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("precision = %u\n", desc->precision);
|
||||
dla_debug("padding_value = [\n");
|
||||
for (i = 0; i < PDP_PAD_VAL_NUM; i++)
|
||||
dla_debug(" %d\n", desc->padding_value[i]);
|
||||
dla_debug("]\n");
|
||||
dla_debug("split_num = %u\n", desc->split_num);
|
||||
dla_debug("partial_in_width_first = %u\n",
|
||||
desc->partial_in_width_first);
|
||||
dla_debug("partial_in_width_mid = %u\n", desc->partial_in_width_mid);
|
||||
dla_debug("partial_in_width_last = %u\n", desc->partial_in_width_last);
|
||||
dla_debug("partial_width_first = %u\n", desc->partial_width_first);
|
||||
dla_debug("partial_width_mid = %u\n", desc->partial_width_mid);
|
||||
dla_debug("partial_width_last = %u\n", desc->partial_width_last);
|
||||
dla_debug("pool_mode = %u\n", desc->pool_mode);
|
||||
dla_debug("pool_width = %u\n", desc->pool_width);
|
||||
dla_debug("pool_height = %u\n", desc->pool_height);
|
||||
dla_debug("stride_x = %u\n", desc->stride_x);
|
||||
dla_debug("stride_y = %u\n", desc->stride_y);
|
||||
dla_debug("pad_left = %u\n", desc->pad_left);
|
||||
dla_debug("pad_right = %u\n", desc->pad_right);
|
||||
dla_debug("pad_top = %u\n", desc->pad_top);
|
||||
dla_debug("pad_bottom = %u\n", desc->pad_bottom);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_pdp_stats(struct dla_pdp_stat_desc *stat)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW STATS: dla_pdp_stat_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("inf_input_num = %u\n", stat->inf_input_num);
|
||||
dla_debug("nan_input_num = %u\n", stat->nan_input_num);
|
||||
dla_debug("nan_output_num = %u\n", stat->nan_output_num);
|
||||
dla_debug("write_stall = %u\n", stat->write_stall);
|
||||
dla_debug("runtime = %u\n", stat->runtime);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_cdp_surface_desc(struct dla_cdp_surface_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_cdp_surface_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("src_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->src_data);
|
||||
dla_debug("dst_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->dst_data);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_cdp_op_desc(struct dla_cdp_op_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_cdp_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("in_precision = %u\n", desc->in_precision);
|
||||
dla_debug("out_precision = %u\n", desc->out_precision);
|
||||
dla_debug("lut_index = %d\n", desc->lut_index);
|
||||
dla_debug("in_cvt =\n");
|
||||
dla_debug_converter(&desc->in_cvt);
|
||||
dla_debug("out_cvt =\n");
|
||||
dla_debug_converter(&desc->out_cvt);
|
||||
dla_debug("local_size = %u\n", desc->local_size);
|
||||
dla_debug("bypass_sqsum = %u\n", desc->bypass_sqsum);
|
||||
dla_debug("bypass_out_mul = %u\n", desc->bypass_out_mul);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_cdp_stats(struct dla_cdp_stat_desc *stat)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW STATS: dla_cdp_stat_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("nan_input_num = %u\n", stat->nan_input_num);
|
||||
dla_debug("inf_input_num = %u\n", stat->inf_input_num);
|
||||
dla_debug("nan_output_num = %u\n", stat->nan_output_num);
|
||||
dla_debug("write_stall = %u\n", stat->write_stall);
|
||||
dla_debug("lut_uflow = %u\n", stat->lut_uflow);
|
||||
dla_debug("lut_oflow = %u\n", stat->lut_oflow);
|
||||
dla_debug("lut_hybrid = %u\n", stat->lut_hybrid);
|
||||
dla_debug("lut_le_hit = %u\n", stat->lut_le_hit);
|
||||
dla_debug("lut_lo_hit = %u\n", stat->lut_lo_hit);
|
||||
dla_debug("saturation_count = %u\n", stat->saturation_count);
|
||||
dla_debug("runtime = %u\n", stat->runtime);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_rubik_surface_desc(struct dla_rubik_surface_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_rubik_surface_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("src_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->src_data);
|
||||
dla_debug("dst_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->dst_data);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_rubik_op_desc(struct dla_rubik_op_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_rubik_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("mode = %u\n", desc->mode);
|
||||
dla_debug("precision = %u\n", desc->precision);
|
||||
dla_debug("stride_x = %u\n", desc->stride_x);
|
||||
dla_debug("stride_y = %u\n", desc->stride_y);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_rubik_stats(struct dla_rubik_stat_desc *stat)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW STATS: dla_rubik_stat_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("read_stall = %u\n", stat->read_stall);
|
||||
dla_debug("write_stall = %u\n", stat->write_stall);
|
||||
dla_debug("runtime = %u\n", stat->runtime);
|
||||
}
|
||||
|
||||
static void
|
||||
dla_debug_sdp_op(struct dla_sdp_op *sdp_op)
|
||||
{
|
||||
dla_debug(" enable = %u\n", sdp_op->enable);
|
||||
dla_debug(" alu_type = %u\n", sdp_op->alu_type);
|
||||
dla_debug(" type = %u\n", sdp_op->type);
|
||||
dla_debug(" mode = %u\n", sdp_op->mode);
|
||||
dla_debug(" act = %u\n", sdp_op->act);
|
||||
dla_debug(" shift_value = %u\n", sdp_op->shift_value);
|
||||
dla_debug(" truncate = %u\n", sdp_op->truncate);
|
||||
dla_debug(" precision = %u\n", sdp_op->precision);
|
||||
dla_debug(" alu_operand = %d\n", sdp_op->alu_operand);
|
||||
dla_debug(" mul_operand = %d\n", sdp_op->mul_operand);
|
||||
dla_debug("cvt.alu_cvt =\n");
|
||||
dla_debug_converter(&sdp_op->cvt.alu_cvt);
|
||||
dla_debug("cvt.mul_cvt =\n");
|
||||
dla_debug_converter(&sdp_op->cvt.mul_cvt);
|
||||
dla_debug("]\n");
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_sdp_surface_desc(struct dla_sdp_surface_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_sdp_surface_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("src_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->src_data);
|
||||
dla_debug("x1_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->x1_data);
|
||||
dla_debug("x2_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->x2_data);
|
||||
dla_debug("y_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->y_data);
|
||||
dla_debug("dst_data = [ dla_data_cube =>\n");
|
||||
dla_debug_data_cube(&desc->dst_data);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_sdp_op_desc(struct dla_sdp_op_desc *desc, int32_t roi)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW ROI[%d]: dla_sdp_op_desc\n", roi);
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("src_precision = %u\n", desc->src_precision);
|
||||
dla_debug("dst_precision = %u\n", desc->dst_precision);
|
||||
dla_debug("lut_index = %d\n", desc->lut_index);
|
||||
dla_debug("out_cvt =\n");
|
||||
dla_debug_converter(&desc->out_cvt);
|
||||
dla_debug("conv_mode = %u\n", desc->conv_mode);
|
||||
dla_debug("batch_num = %u\n", desc->batch_num);
|
||||
dla_debug("batch_stride = %u\n", desc->batch_stride);
|
||||
dla_debug("x1_op = [ dla_sdp_op =>\n");
|
||||
dla_debug_sdp_op(&desc->x1_op);
|
||||
dla_debug("x2_op = [ dla_sdp_op =>\n");
|
||||
dla_debug_sdp_op(&desc->x2_op);
|
||||
dla_debug("y_op = [ dla_sdp_op =>\n");
|
||||
dla_debug_sdp_op(&desc->y_op);
|
||||
}
|
||||
|
||||
void
|
||||
dla_debug_sdp_stats(struct dla_sdp_stat_desc *stat)
|
||||
{
|
||||
dla_debug("*********************************************************\n");
|
||||
dla_debug("NVDLA FW STATS: dla_sdp_stat_desc\n");
|
||||
dla_debug("---------------------------------------------------------\n");
|
||||
dla_debug("nan_input_num = %u\n", stat->nan_input_num);
|
||||
dla_debug("inf_input_num = %u\n", stat->inf_input_num);
|
||||
dla_debug("nan_output_num = %u\n", stat->nan_output_num);
|
||||
dla_debug("wdma_write_stall = %u\n", stat->wdma_write_stall);
|
||||
dla_debug("lut_underflow = %u\n", stat->lut_underflow);
|
||||
dla_debug("lut_overflow = %u\n", stat->lut_overflow);
|
||||
dla_debug("lut_hybrid = %u\n", stat->lut_hybrid);
|
||||
dla_debug("lut_le_hit = %u\n", stat->lut_le_hit);
|
||||
dla_debug("lut_lo_hit = %u\n", stat->lut_lo_hit);
|
||||
dla_debug("saturation_count = %u\n", stat->saturation_count);
|
||||
dla_debug("runtime = %u\n", stat->runtime);
|
||||
}
|
||||
#endif /* DEBUG_NETWORK_DATA */
|
129
drivers/nvdla/engine_debug.h
Normal file
129
drivers/nvdla/engine_debug.h
Normal file
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __FIRMWARE_ENGINE_DEBUG_H_
|
||||
#define __FIRMWARE_ENGINE_DEBUG_H_
|
||||
|
||||
#include <dla_debug.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#if DEBUG_NETWORK_DATA
|
||||
void
|
||||
dla_debug_op_desc(struct dla_common_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_network_desc(struct dla_network_desc *network_desc);
|
||||
void
|
||||
dla_debug_address_info(struct dla_task *task);
|
||||
void
|
||||
dla_debug_bdma_surface_desc(struct dla_bdma_surface_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_bdma_op_desc(struct dla_bdma_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_bdma_stats(struct dla_bdma_stat_desc *stat);
|
||||
void
|
||||
dla_debug_conv_surface_desc(struct dla_conv_surface_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_conv_op_desc(struct dla_conv_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_conv_stats(struct dla_conv_stat_desc *stat);
|
||||
void
|
||||
dla_debug_sdp_op_desc(struct dla_sdp_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_sdp_surface_desc(struct dla_sdp_surface_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_sdp_stats(struct dla_sdp_stat_desc *stat);
|
||||
void
|
||||
dla_debug_pdp_surface_desc(struct dla_pdp_surface_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_pdp_op_desc(struct dla_pdp_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_pdp_stats(struct dla_pdp_stat_desc *stat);
|
||||
void
|
||||
dla_debug_cdp_surface_desc(struct dla_cdp_surface_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_cdp_op_desc(struct dla_cdp_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_cdp_stats(struct dla_cdp_stat_desc *stat);
|
||||
void
|
||||
dla_debug_rubik_op_desc(struct dla_rubik_op_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_rubik_surface_desc(struct dla_rubik_surface_desc *desc, int32_t roi);
|
||||
void
|
||||
dla_debug_rubik_stats(struct dla_rubik_stat_desc *stat);
|
||||
void
|
||||
dla_debug_lut_params(struct dla_lut_param *lut_param);
|
||||
|
||||
#else
|
||||
|
||||
static inline void
|
||||
dla_debug_op_desc(struct dla_common_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_network_desc(struct dla_network_desc *network_desc) {}
|
||||
static inline void
|
||||
dla_debug_address_info(struct dla_task *task) {}
|
||||
static inline void
|
||||
dla_debug_bdma_surface_desc(struct dla_bdma_surface_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_bdma_op_desc(struct dla_bdma_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_bdma_stats(struct dla_bdma_stat_desc *stat) {}
|
||||
static inline void
|
||||
dla_debug_conv_surface_desc(struct dla_conv_surface_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_conv_op_desc(struct dla_conv_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_conv_stats(struct dla_conv_stat_desc *stat) {}
|
||||
static inline void
|
||||
dla_debug_sdp_op_desc(struct dla_sdp_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_sdp_surface_desc(struct dla_sdp_surface_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_sdp_stats(struct dla_sdp_stat_desc *stat) {}
|
||||
static inline void
|
||||
dla_debug_pdp_surface_desc(struct dla_pdp_surface_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_pdp_op_desc(struct dla_pdp_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_pdp_stats(struct dla_pdp_stat_desc *stat) {}
|
||||
static inline void
|
||||
dla_debug_cdp_surface_desc(struct dla_cdp_surface_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_cdp_op_desc(struct dla_cdp_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_cdp_stats(struct dla_cdp_stat_desc *stat) {}
|
||||
static inline void
|
||||
dla_debug_rubik_op_desc(struct dla_rubik_op_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_rubik_surface_desc(struct dla_rubik_surface_desc *desc, int32_t roi) {}
|
||||
static inline void
|
||||
dla_debug_rubik_stats(struct dla_rubik_stat_desc *stat) {}
|
||||
static inline void
|
||||
dla_debug_lut_params(struct dla_lut_param *lut_param) {}
|
||||
|
||||
#endif /* DEBUG_NETWORK_DATA */
|
||||
#endif /* __FIRMWARE_ENGINE_DEBUG_H_ */
|
136
drivers/nvdla/engine_isr.c
Normal file
136
drivers/nvdla/engine_isr.c
Normal file
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_engine.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "dla_engine_internal.h"
|
||||
|
||||
int32_t dla_isr_handler(void *engine_data)
|
||||
{
|
||||
uint32_t mask;
|
||||
uint32_t reg;
|
||||
struct dla_processor *processor = NULL;
|
||||
struct dla_processor_group *group;
|
||||
struct dla_engine *engine = (struct dla_engine *)engine_data;
|
||||
|
||||
mask = glb_reg_read(S_INTR_MASK);
|
||||
reg = glb_reg_read(S_INTR_STATUS);
|
||||
|
||||
dla_trace("Enter: dla_isr_handler, reg:%x, mask:%x\n", reg, mask);
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_CONV];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_CONV];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_SDP];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_SDP];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_CDP];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_CDP];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_RUBIK];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_RUBIK];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_PDP];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_PDP];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_BDMA];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_BDMA];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_OP_COMPLETED);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_CONV];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_CDMA_DT_DONE);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_CONV];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_CDMA_DT_DONE);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS0)) {
|
||||
processor = &engine->processors[DLA_OP_CONV];
|
||||
group = &processor->groups[0];
|
||||
group->events |= (1 << DLA_EVENT_CDMA_WT_DONE);
|
||||
}
|
||||
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS1)) {
|
||||
processor = &engine->processors[DLA_OP_CONV];
|
||||
group = &processor->groups[1];
|
||||
group->events |= (1 << DLA_EVENT_CDMA_WT_DONE);
|
||||
}
|
||||
|
||||
glb_reg_write(S_INTR_STATUS, reg);
|
||||
|
||||
mask = glb_reg_read(S_INTR_MASK);
|
||||
reg = glb_reg_read(S_INTR_STATUS);
|
||||
|
||||
dla_trace("Exit: dla_isr_handler, reg:%x, mask:%x\n", reg, mask);
|
||||
RETURN(0);
|
||||
}
|
94
drivers/nvdla/include/dla_debug.h
Normal file
94
drivers/nvdla/include/dla_debug.h
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __FIRMWARE_DLA_DEBUG_H_
|
||||
#define __FIRMWARE_DLA_DEBUG_H_
|
||||
|
||||
#define STRINGIFY(s) #s
|
||||
#define DEFER_STRINGIFY(s) STRINGIFY(s)
|
||||
#define FILELINE DEFER_STRINGIFY(__LINE__)
|
||||
#define FILENAME DEFER_STRINGIFY(__FILE__)
|
||||
|
||||
#define LOG_EVENT_BDMA_SHIFT 0U
|
||||
#define LOG_EVENT_CONV_SHIFT 4U
|
||||
#define LOG_EVENT_SDP_SHIFT 8U
|
||||
#define LOG_EVENT_PDP_SHIFT 12U
|
||||
#define LOG_EVENT_CDP_SHIFT 16U
|
||||
#define LOG_EVENT_RBK_SHIFT 20U
|
||||
#define LOG_EVENT_GROUP_SHIFT 24U
|
||||
#define LOG_EVENT_ROI_SHIFT 28U
|
||||
|
||||
#define LOG_TASK_START 1
|
||||
#define LOG_TASK_END 2
|
||||
#define LOG_READ_OP_CONFIG_START 3
|
||||
#define LOG_READ_OP_CONFIG_END 4
|
||||
#define LOG_READ_SURF_CONFIG_START 5
|
||||
#define LOG_READ_SURF_CONFIG_END 6
|
||||
#define LOG_PROGRAM_START 7
|
||||
#define LOG_PROGRAM_END 8
|
||||
#define LOG_OPERATION_START 9
|
||||
#define LOG_OPERATION_END 10
|
||||
|
||||
#define LOG_EVENT(roi, group, processor, event)
|
||||
|
||||
/**
|
||||
* Used to enable/disable reading stat registers
|
||||
*/
|
||||
#define STAT_ENABLE 1
|
||||
|
||||
/**
|
||||
* Used to print debug network data
|
||||
*/
|
||||
#define DEBUG_NETWORK_DATA 0
|
||||
|
||||
#define pr_dump_stack(format, ...)
|
||||
#define dla_trace(format, ...)
|
||||
|
||||
#define assert(condition)
|
||||
|
||||
#define RETURN(err) { return (err); }
|
||||
|
||||
#define DEBUG_ASSERT
|
||||
|
||||
#ifdef DEBUG_ASSERT
|
||||
#define ASSERT_GOTO(_condition, _ret, _err_value, _goto) \
|
||||
do { \
|
||||
if (!(_condition)) { \
|
||||
dla_error("Assertion Fail(" FILENAME FILELINE "):" \
|
||||
STRINGIFY(_condition)); \
|
||||
_ret = _err_value; \
|
||||
goto _goto; \
|
||||
} else { \
|
||||
_ret = 0; \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define ASSERT_GOTO(_condition, _ret, _err_value, _goto) assert(condition)
|
||||
#endif /* DEBUG_ASSERT */
|
||||
|
||||
#endif
|
94
drivers/nvdla/include/dla_engine.h
Normal file
94
drivers/nvdla/include/dla_engine.h
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __DLA_ENGINE_H_
|
||||
#define __DLA_ENGINE_H_
|
||||
|
||||
#include <dla_interface.h>
|
||||
#include <dla_sched.h>
|
||||
|
||||
struct dla_processor_group {
|
||||
uint8_t id;
|
||||
uint8_t rdma_id;
|
||||
uint8_t active;
|
||||
uint8_t events;
|
||||
uint8_t roi_index;
|
||||
uint8_t is_rdma_needed;
|
||||
uint8_t pending;
|
||||
int32_t lut_index;
|
||||
uint8_t programming;
|
||||
uint64_t start_time;
|
||||
|
||||
struct dla_common_op_desc *op_desc;
|
||||
struct dla_common_op_desc *consumers[DLA_OP_NUM];
|
||||
struct dla_common_op_desc *fused_parent;
|
||||
union dla_operation_container *operation_desc;
|
||||
union dla_surface_container *surface_desc;
|
||||
};
|
||||
|
||||
struct dla_processor {
|
||||
const char *name;
|
||||
uint8_t op_type;
|
||||
uint8_t consumer_ptr;
|
||||
uint8_t roi_index;
|
||||
uint8_t group_status;
|
||||
uint8_t rdma_status;
|
||||
uint8_t last_group;
|
||||
|
||||
struct dla_common_op_desc *tail_op;
|
||||
struct dla_processor_group groups[DLA_NUM_GROUPS];
|
||||
union dla_stat_container *stat_data_desc;
|
||||
|
||||
int32_t (*is_ready)(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
int32_t (*enable)(struct dla_processor_group *group);
|
||||
int32_t (*program)(struct dla_processor_group *group);
|
||||
void (*set_producer)(int32_t group_id, int32_t rdma_id);
|
||||
void (*dump_config)(struct dla_processor_group *group);
|
||||
void (*rdma_check)(struct dla_processor_group *group);
|
||||
void (*get_stat_data)(struct dla_processor *processor,
|
||||
struct dla_processor_group *group);
|
||||
void (*dump_stat)(struct dla_processor *processor);
|
||||
};
|
||||
|
||||
struct dla_engine {
|
||||
struct dla_task *task;
|
||||
struct dla_config *config_data;
|
||||
struct dla_network_desc *network;
|
||||
struct dla_processor processors[DLA_OP_NUM];
|
||||
|
||||
uint16_t num_proc_hwl;
|
||||
int32_t status;
|
||||
uint32_t stat_enable;
|
||||
|
||||
void *driver_context;
|
||||
};
|
||||
|
||||
struct dla_engine *dla_get_engine(void);
|
||||
|
||||
#endif
|
50
drivers/nvdla/include/dla_err.h
Normal file
50
drivers/nvdla/include/dla_err.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __FIRMWARE_DLA_ERR_H_
|
||||
#define __FIRMWARE_DLA_ERR_H_
|
||||
|
||||
#define ERR(code) -DLA_ERR_##code
|
||||
|
||||
#define DLA_ERR_NONE 0
|
||||
#define DLA_ERR_INVALID_METHOD 1
|
||||
#define DLA_ERR_INVALID_TASK 2
|
||||
#define DLA_ERR_INVALID_INPUT 3
|
||||
#define DLA_ERR_INVALID_FALC_DMA 4
|
||||
#define DLA_ERR_INVALID_QUEUE 5
|
||||
#define DLA_ERR_INVALID_PREACTION 6
|
||||
#define DLA_ERR_INVALID_POSTACTION 7
|
||||
#define DLA_ERR_NO_MEM 8
|
||||
#define DLA_ERR_INVALID_DESC_VER 9
|
||||
#define DLA_ERR_INVALID_ENGINE_ID 10
|
||||
#define DLA_ERR_INVALID_REGION 11
|
||||
#define DLA_ERR_PROCESSOR_BUSY 12
|
||||
#define DLA_ERR_RETRY 13
|
||||
#define DLA_ERR_TASK_STATUS_MISMATCH 14
|
||||
|
||||
#endif
|
886
drivers/nvdla/include/dla_interface.h
Normal file
886
drivers/nvdla/include/dla_interface.h
Normal file
|
@ -0,0 +1,886 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __FIRMWARE_DLA_INTERFACE_H_
|
||||
#define __FIRMWARE_DLA_INTERFACE_H_
|
||||
|
||||
#include <nvdla_interface.h>
|
||||
|
||||
/**
|
||||
* @ingroup Processors
|
||||
* @name DLA Processors
|
||||
* Processor modules in DLA engine. Each processor has it's
|
||||
* own operation a.k.a. HW layer. Network is formed using
|
||||
* graph of these operations
|
||||
* @{
|
||||
*/
|
||||
#define DLA_OP_BDMA 0
|
||||
#define DLA_OP_CONV 1
|
||||
#define DLA_OP_SDP 2
|
||||
#define DLA_OP_PDP 3
|
||||
#define DLA_OP_CDP 4
|
||||
#define DLA_OP_RUBIK 5
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Processors
|
||||
* @name Maximum number of processors
|
||||
* @brief DLA ash 6 processors
|
||||
* @{
|
||||
*/
|
||||
#define DLA_OP_NUM 6
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Processors
|
||||
* @name Number of groups
|
||||
* @brief Each processor has 2 groups of registers
|
||||
* @{
|
||||
*/
|
||||
#define DLA_NUM_GROUPS 2
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Network descriptor
|
||||
*
|
||||
* Contains all information to execute a network
|
||||
*
|
||||
* @op_head: Index of first operation of each type in operations list
|
||||
* @num_rois: Number of ROIs
|
||||
* @num_operations: Number of operations in one list
|
||||
* @num_luts: Number of LUTs
|
||||
*/
|
||||
struct dla_network_desc {
|
||||
int16_t operation_desc_index;
|
||||
int16_t surface_desc_index;
|
||||
|
||||
int16_t dependency_graph_index;
|
||||
int16_t lut_data_index;
|
||||
|
||||
int16_t roi_array_index;
|
||||
int16_t surface_index;
|
||||
|
||||
int16_t stat_list_index;
|
||||
int16_t reserved1;
|
||||
|
||||
int16_t op_head[DLA_OP_NUM];
|
||||
|
||||
uint16_t num_rois;
|
||||
uint16_t num_operations;
|
||||
|
||||
uint16_t num_luts;
|
||||
uint16_t num_addresses;
|
||||
|
||||
int16_t input_layer;
|
||||
uint8_t dynamic_roi;
|
||||
uint8_t reserved0;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/**
|
||||
* @name Memory types
|
||||
* @brief DLA engnine can read/write to/from 3 memory types
|
||||
* @{
|
||||
*/
|
||||
#define DLA_MEM_MC 0 /* External DRAM */
|
||||
#define DLA_MEM_CV 1 /* CV-SRAM */
|
||||
#define DLA_MEM_HW 2 /* DLA sub-module */
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Events
|
||||
* @name Operation events
|
||||
* @brief Different events triggered by an operations
|
||||
* @{
|
||||
*/
|
||||
#define DLA_EVENT_OP_COMPLETED 1
|
||||
#define DLA_EVENT_OP_PROGRAMMED 2
|
||||
#define DLA_EVENT_OP_ENABLED 3
|
||||
#define DLA_EVENT_CDMA_WT_DONE 4
|
||||
#define DLA_EVENT_CDMA_DT_DONE 5
|
||||
/** @} */
|
||||
|
||||
struct dla_consumer {
|
||||
int16_t index; /* the index of dla_common_op_desc in dep_graph_addr */
|
||||
uint8_t event;
|
||||
uint8_t res;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_common_op_desc {
|
||||
int16_t index; /* set by ucode */
|
||||
int8_t roi_index;
|
||||
uint8_t op_type;
|
||||
|
||||
uint8_t dependency_count;
|
||||
uint8_t reserved0[3];
|
||||
|
||||
struct dla_consumer consumers[DLA_OP_NUM];
|
||||
struct dla_consumer fused_parent;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_roi_array_desc {
|
||||
uint32_t array_length;
|
||||
|
||||
uint32_t array_reserved;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_roi_desc {
|
||||
uint32_t left;
|
||||
|
||||
uint32_t top;
|
||||
|
||||
uint32_t right;
|
||||
|
||||
uint32_t bottom;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/**
|
||||
* @ingroup BDMA
|
||||
* @name Maximum BDMA transfers
|
||||
* @brief BDMA supports multiple transfers in operation. This indicates
|
||||
* maximum number of transfers possible in one operation.
|
||||
* @{
|
||||
*/
|
||||
#define NUM_MAX_BDMA_OPS 20
|
||||
/** @} */
|
||||
|
||||
struct dla_bdma_transfer_desc {
|
||||
int16_t source_address;
|
||||
int16_t destination_address;
|
||||
|
||||
uint32_t line_size;
|
||||
|
||||
uint32_t line_repeat;
|
||||
|
||||
uint32_t source_line;
|
||||
|
||||
uint32_t destination_line;
|
||||
|
||||
uint32_t surface_repeat;
|
||||
|
||||
uint32_t source_surface;
|
||||
|
||||
uint32_t destination_surface;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_bdma_surface_desc {
|
||||
uint8_t source_type;
|
||||
uint8_t destination_type;
|
||||
uint16_t num_transfers;
|
||||
|
||||
struct dla_bdma_transfer_desc transfers[NUM_MAX_BDMA_OPS];
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_bdma_op_desc {
|
||||
uint16_t num_transfers;
|
||||
uint16_t reserved0;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_bdma_stat_desc {
|
||||
uint32_t read_stall;
|
||||
uint32_t write_stall;
|
||||
uint32_t runtime;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/**
|
||||
* @ingroup Convolution
|
||||
* @name Convolution mode
|
||||
* @brief Convolution modes support by DLA
|
||||
* @{
|
||||
*/
|
||||
#define CONV_MODE_DIRECT 0
|
||||
#define CONV_MODE_WINOGRAD 1
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Processors
|
||||
* @name Precision BPE mapping
|
||||
* @brief Precision formats and Bit Per Elements mapping
|
||||
* @{
|
||||
*/
|
||||
#define BPE_PRECISION_INT8 1
|
||||
#define BPE_PRECISION_INT16 2
|
||||
#define BPE_PRECISION_FP16 2
|
||||
/** @} */
|
||||
|
||||
|
||||
/**
|
||||
* @ingroup Processors
|
||||
* @name Precision types
|
||||
* @brief Precision formats supported by DLA engine
|
||||
* @{
|
||||
*/
|
||||
#define PRECISION_INT8 0
|
||||
#define PRECISION_INT16 1
|
||||
#define PRECISION_FP16 2
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Processors
|
||||
* @name Data formats
|
||||
* @brief Data formats supported by DLA engine
|
||||
* @{
|
||||
*/
|
||||
#define FORMAT_T_R8 0
|
||||
#define FORMAT_T_R10 1
|
||||
#define FORMAT_T_R12 2
|
||||
#define FORMAT_T_R16 3
|
||||
#define FORMAT_T_R16_I 4
|
||||
#define FORMAT_T_R16_F 5
|
||||
#define FORMAT_T_A16B16G16R16 6
|
||||
#define FORMAT_T_X16B16G16R16 7
|
||||
#define FORMAT_T_A16B16G16R16_F 8
|
||||
#define FORMAT_T_A16Y16U16V16 9
|
||||
#define FORMAT_T_V16U16Y16A16 10
|
||||
#define FORMAT_T_A16Y16U16V16_F 11
|
||||
#define FORMAT_T_A8B8G8R8 12
|
||||
#define FORMAT_T_A8R8G8B8 13
|
||||
#define FORMAT_T_B8G8R8A8 14
|
||||
#define FORMAT_T_R8G8B8A8 15
|
||||
#define FORMAT_T_X8B8G8R8 16
|
||||
#define FORMAT_T_X8R8G8B8 17
|
||||
#define FORMAT_T_B8G8R8X8 18
|
||||
#define FORMAT_T_R8G8B8X8 19
|
||||
#define FORMAT_T_A2B10G10R10 20
|
||||
#define FORMAT_T_A2R10G10B10 21
|
||||
#define FORMAT_T_B10G10R10A2 22
|
||||
#define FORMAT_T_R10G10B10A2 23
|
||||
#define FORMAT_T_A2Y10U10V10 24
|
||||
#define FORMAT_T_V10U10Y10A2 25
|
||||
#define FORMAT_T_A8Y8U8V8 26
|
||||
#define FORMAT_T_V8U8Y8A8 27
|
||||
#define FORMAT_T_Y8___U8V8_N444 28
|
||||
#define FORMAT_T_Y8___V8U8_N444 29
|
||||
#define FORMAT_T_Y10___U10V10_N444 30
|
||||
#define FORMAT_T_Y10___V10U10_N444 31
|
||||
#define FORMAT_T_Y12___U12V12_N444 32
|
||||
#define FORMAT_T_Y12___V12U12_N444 33
|
||||
#define FORMAT_T_Y16___U16V16_N444 34
|
||||
#define FORMAT_T_Y16___V16U16_N444 35
|
||||
#define FORMAT_FEATURE 36
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Convolution
|
||||
* @name Pixel mapping
|
||||
* @brief Pixel mapping formats supported for image input in Convolution
|
||||
* @{
|
||||
*/
|
||||
#define MAP_PITCH_LINEAR 0
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Convolution
|
||||
* @name Weight formats
|
||||
* @brief Weight data formats supported in Convolution
|
||||
* @{
|
||||
*/
|
||||
#define WEIGHT_FORMAT_UNCOMPRESSED 0
|
||||
#define WEIGHT_FORMAT_COMPRESSED 1
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup Convolution
|
||||
* @name Mean data format
|
||||
* @brief Mean data formats supported in Convolution
|
||||
* @{
|
||||
*/
|
||||
#define MEAN_FORMAT_DISABLE 0
|
||||
#define MEAN_FORMAT_ENABLE 1
|
||||
/** @} */
|
||||
|
||||
struct dla_cvt_param {
|
||||
int16_t scale;
|
||||
uint8_t truncate;
|
||||
uint8_t enable;
|
||||
|
||||
int32_t offset;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_data_cube {
|
||||
uint16_t type; /* dla_mem_type */
|
||||
int16_t address; /* offset to the actual IOVA in task.address_list */
|
||||
|
||||
uint32_t offset; /* offset within address */
|
||||
uint32_t size;
|
||||
|
||||
/* cube dimensions */
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
|
||||
uint16_t channel;
|
||||
uint16_t reserved0;
|
||||
|
||||
/* stride information */
|
||||
uint32_t line_stride;
|
||||
uint32_t surf_stride;
|
||||
|
||||
/* For Rubik only */
|
||||
uint32_t plane_stride;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#define PIXEL_OVERRIDE_UINT 0
|
||||
#define PIXEL_OVERRIDE_INT 1
|
||||
|
||||
struct dla_conv_surface_desc {
|
||||
/* Data cube */
|
||||
struct dla_data_cube weight_data;
|
||||
struct dla_data_cube wmb_data;
|
||||
struct dla_data_cube wgs_data;
|
||||
struct dla_data_cube src_data;
|
||||
struct dla_data_cube dst_data;
|
||||
|
||||
/**
|
||||
* u_addr = input_data.source_addr + offset_u
|
||||
* this field should be set when YUV is not interleave format
|
||||
*
|
||||
*/
|
||||
int64_t offset_u;
|
||||
|
||||
/* line stride for 2nd plane, must be 32bytes aligned */
|
||||
uint32_t in_line_uv_stride;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_conv_op_desc {
|
||||
/* Performance parameters */
|
||||
|
||||
/* dla_conv_mode */
|
||||
uint8_t conv_mode;
|
||||
uint8_t data_reuse;
|
||||
uint8_t weight_reuse;
|
||||
uint8_t skip_data_rls;
|
||||
|
||||
uint8_t skip_weight_rls;
|
||||
uint8_t reserved0;
|
||||
uint16_t entry_per_slice;
|
||||
|
||||
/* dla_data_format */
|
||||
uint8_t data_format;
|
||||
/* dla_pixel_mapping */
|
||||
uint8_t pixel_mapping;
|
||||
/* number of free slices before fetch */
|
||||
uint16_t fetch_grain;
|
||||
|
||||
uint8_t reserved_b[8];
|
||||
|
||||
/* batch_num */
|
||||
uint8_t batch;
|
||||
/* dla_weight_format */
|
||||
uint8_t weight_format;
|
||||
uint8_t data_bank;
|
||||
uint8_t weight_bank;
|
||||
|
||||
/* the offset in bytes of each data cube in a batch */
|
||||
uint32_t batch_stride;
|
||||
|
||||
uint8_t post_extension;
|
||||
uint8_t pixel_override;
|
||||
/* number of slices need to be released */
|
||||
uint16_t release;
|
||||
|
||||
/* The input cube dimension for CSC */
|
||||
uint16_t input_width_csc;
|
||||
uint16_t input_height_csc;
|
||||
|
||||
uint16_t input_channel_csc;
|
||||
uint16_t kernel_width_csc;
|
||||
|
||||
uint16_t kernel_height_csc;
|
||||
uint16_t kernel_channel_csc;
|
||||
|
||||
/* The input cube dimension for CMAC */
|
||||
uint16_t input_width_cmac;
|
||||
uint16_t input_height_cmac;
|
||||
|
||||
/* actual size in bytes */
|
||||
uint32_t bytes_per_kernel;
|
||||
|
||||
/* Algorithm parameters */
|
||||
|
||||
int16_t mean_ry; /* mean value for red in RGB or Y in YUV */
|
||||
int16_t mean_gu; /* mean value for green in RGB or U in YUV */
|
||||
|
||||
int16_t mean_bv; /* mean value for blue in RGB or V in YUV */
|
||||
int16_t mean_ax;
|
||||
|
||||
uint8_t mean_format; /* dla_mean_format */
|
||||
uint8_t conv_stride_x;
|
||||
uint8_t conv_stride_y;
|
||||
uint8_t pad_x_left;
|
||||
|
||||
uint8_t pad_x_right;
|
||||
uint8_t pad_y_top;
|
||||
uint8_t pad_y_bottom;
|
||||
uint8_t dilation_x;
|
||||
|
||||
uint8_t dilation_y;
|
||||
uint8_t reserved2[2];
|
||||
|
||||
/* Precision parameters */
|
||||
uint8_t pra_truncate;
|
||||
|
||||
uint8_t in_precision;
|
||||
/* The output precision from CONV, it's the MAC processing precison */
|
||||
uint8_t out_precision;
|
||||
int16_t pad_val;
|
||||
|
||||
/* input converter parameters */
|
||||
struct dla_cvt_param in_cvt;
|
||||
/* output converter parameters, support truncate only */
|
||||
struct dla_cvt_param out_cvt;
|
||||
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_conv_stat_desc {
|
||||
uint32_t data_read_stall;
|
||||
uint32_t weight_read_stall;
|
||||
uint32_t data_read_latency;
|
||||
uint32_t weight_read_latency;
|
||||
uint32_t saturation_count;
|
||||
uint32_t nan_data_num;
|
||||
uint32_t nan_weight_num;
|
||||
uint32_t inf_data_num;
|
||||
uint32_t inf_weight_num;
|
||||
uint32_t runtime;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/**
|
||||
* @ingroup SDP
|
||||
* @name Activation functions
|
||||
* @brief Activation functions supported in SDP
|
||||
* @{
|
||||
*/
|
||||
#define ACTIVATION_NONE 0
|
||||
#define ACTIVATION_RELU 1
|
||||
#define ACTIVATION_LUT 2
|
||||
#define ACTIVATION_PRELU 3
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup LUT
|
||||
* @name LUT size
|
||||
* @brief LUT sizes for linear and exponentila LUT
|
||||
* @{
|
||||
*/
|
||||
#define LUT_LINEAR_EXP_TABLE_ENTRY_LOG2 6
|
||||
#define LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2 8
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup LUT
|
||||
* @name LUT types
|
||||
* @brief DLA supports two types of LUT, linear and exonential
|
||||
* @{
|
||||
*/
|
||||
#define LUT_LINEAR_EXP_TABLE 0
|
||||
#define LUT_LINEAR_ONLY_TABLE 1
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup LUT
|
||||
* @name LUT methods
|
||||
* @brief DLA supports two types of LUT, linear and exonential
|
||||
* @{
|
||||
*/
|
||||
#define LUT_METHOD_EXPONENTIAL 0
|
||||
#define LUT_METHOD_LINEAR 1
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @ingroup LUT
|
||||
* @name LUT
|
||||
* @brief DLA supports two types of LUT, linear and exonential
|
||||
* @{
|
||||
*/
|
||||
#define LUT_PRI_LINEAR_EXP 0
|
||||
#define LUT_PRI_LINEAR_ONLY 1
|
||||
/** @} */
|
||||
|
||||
union dla_lut_offset {
|
||||
/**
|
||||
* Number should be substracted on log domain before look up
|
||||
* exponetial table it has the same definition as hardware
|
||||
* thus input scaling should also take into account when
|
||||
* set this field.
|
||||
*/
|
||||
int8_t exp_offset;
|
||||
/**
|
||||
* Number of bits should be right shift before looking
|
||||
* up linear table
|
||||
*/
|
||||
int8_t frac_bits;
|
||||
uint16_t reserved0;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct is used to represent floating point values by INT
|
||||
* suppose we have a float point number fp_x, it will be represented
|
||||
* as:
|
||||
*
|
||||
* fp_x = scale_int_x>>(shifter_x)
|
||||
*
|
||||
* This is very useful for INT pipeline;
|
||||
*/
|
||||
struct dla_float_data {
|
||||
int16_t scale;
|
||||
int8_t shifter;
|
||||
uint8_t reserved0;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/**
|
||||
* For INT pipeline, we use the struct above to represent a floating number;
|
||||
* For FP16 pipeline, we should store the FP16 encoded value into a uint16_t
|
||||
* container
|
||||
*/
|
||||
union dla_slope {
|
||||
struct dla_float_data data_i;
|
||||
|
||||
uint16_t data_f;
|
||||
};
|
||||
|
||||
struct dla_lut_param {
|
||||
/**
|
||||
* value of expression ((1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1) is 65,
|
||||
* ((1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1) is 257, and int16_t is of
|
||||
* 2Byte. And below two statement's combined memory size is 644 Byte.
|
||||
*
|
||||
* NOTE: below two declaration combined size should always be multiple
|
||||
* of 4.
|
||||
*/
|
||||
int16_t linear_exp_table[(1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1];
|
||||
int16_t linear_only_table[(1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1];
|
||||
|
||||
union dla_lut_offset linear_exp_offset;
|
||||
union dla_lut_offset linear_only_offset;
|
||||
|
||||
/**
|
||||
* The start and end point of raw table,
|
||||
* valid when raw_method=LINEAR only
|
||||
*/
|
||||
uint64_t linear_exp_start;
|
||||
uint64_t linear_exp_end;
|
||||
uint64_t linear_only_start;
|
||||
uint64_t linear_only_end;
|
||||
|
||||
union dla_slope linear_exp_underflow_slope;
|
||||
union dla_slope linear_exp_overflow_slope;
|
||||
union dla_slope linear_only_underflow_slope;
|
||||
union dla_slope linear_only_overflow_slope;
|
||||
|
||||
/**
|
||||
* dla_lut_priority, when both lut are hit(or one overflow,
|
||||
* the other underflow), which one should be selected as output
|
||||
*/
|
||||
uint8_t hybrid_priority;
|
||||
uint8_t underflow_priority;
|
||||
uint8_t overflow_priority;
|
||||
uint8_t method; /* dla_lut_method */
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_sdp_surface_desc {
|
||||
/* Data cube */
|
||||
/* source input cube, available when SDP working on offline mode */
|
||||
struct dla_data_cube src_data;
|
||||
|
||||
/* X1 input cube */
|
||||
struct dla_data_cube x1_data;
|
||||
|
||||
/* X2 input cube */
|
||||
struct dla_data_cube x2_data;
|
||||
|
||||
/* Y input cube */
|
||||
struct dla_data_cube y_data;
|
||||
|
||||
/* Output cube */
|
||||
struct dla_data_cube dst_data;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#define SDP_OP_NONE 0
|
||||
#define SDP_OP_MUL 1
|
||||
#define SDP_OP_ADD 2
|
||||
#define SDP_OP_BOTH 3
|
||||
|
||||
#define SDP_ALU_OP_MAX 0
|
||||
#define SDP_ALU_OP_MIN 1
|
||||
#define SDP_ALU_OP_SUM 2
|
||||
#define SDP_ALU_OP_EQL 3
|
||||
|
||||
#define SDP_OP_PER_LAYER 0
|
||||
#define SDP_OP_PER_KERNEL 1
|
||||
#define SDP_OP_PER_POINT 2
|
||||
|
||||
struct dla_sdp_cvt {
|
||||
struct dla_cvt_param alu_cvt;
|
||||
struct dla_cvt_param mul_cvt;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_sdp_op {
|
||||
uint8_t enable;
|
||||
uint8_t alu_type; /* dla_sdp_alu_op_type */
|
||||
uint8_t type; /* dla_sdp_op_type */
|
||||
uint8_t mode; /* dla_sdp_op_mode */
|
||||
|
||||
uint8_t act; /* dla_act_type */
|
||||
uint8_t shift_value; /* left shift */
|
||||
uint8_t truncate;
|
||||
uint8_t precision;
|
||||
|
||||
int32_t alu_operand;
|
||||
int32_t mul_operand;
|
||||
|
||||
struct dla_sdp_cvt cvt;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_sdp_op_desc {
|
||||
/* Precision parameters */
|
||||
/* dla_precision */
|
||||
uint8_t src_precision;
|
||||
uint8_t dst_precision;
|
||||
int16_t lut_index;
|
||||
|
||||
struct dla_cvt_param out_cvt;
|
||||
|
||||
/* Performance parameters */
|
||||
/* dla_conv_mode */
|
||||
uint8_t conv_mode;
|
||||
uint8_t batch_num;
|
||||
uint16_t reserved0;
|
||||
|
||||
uint32_t batch_stride; /* will be used when batch_num > 1 */
|
||||
|
||||
/* Algorithm parameters */
|
||||
struct dla_sdp_op x1_op;
|
||||
struct dla_sdp_op x2_op;
|
||||
struct dla_sdp_op y_op;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_sdp_stat_desc {
|
||||
uint32_t nan_input_num;
|
||||
uint32_t inf_input_num;
|
||||
uint32_t nan_output_num;
|
||||
uint32_t wdma_write_stall;
|
||||
uint32_t lut_underflow;
|
||||
uint32_t lut_overflow;
|
||||
uint32_t lut_hybrid;
|
||||
uint32_t lut_le_hit;
|
||||
uint32_t lut_lo_hit;
|
||||
uint32_t saturation_count;
|
||||
uint32_t runtime;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#define POOL_MODE_AVG 0
|
||||
#define POOL_MODE_MAX 1
|
||||
#define POOL_MODE_MIN 2
|
||||
|
||||
#define POOL_SIZE_1 0
|
||||
#define POOL_SIZE_2 1
|
||||
#define POOL_SIZE_3 2
|
||||
#define POOL_SIZE_4 3
|
||||
#define POOL_SIZE_5 4
|
||||
#define POOL_SIZE_6 5
|
||||
#define POOL_SIZE_7 6
|
||||
#define POOL_SIZE_8 7
|
||||
|
||||
#define PDP_PAD_VAL_NUM 7
|
||||
|
||||
struct dla_pdp_surface_desc {
|
||||
/* Data cube */
|
||||
struct dla_data_cube src_data;
|
||||
|
||||
struct dla_data_cube dst_data;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_pdp_op_desc {
|
||||
/* Performance parameters */
|
||||
uint16_t partial_in_width_first;
|
||||
uint16_t partial_in_width_mid;
|
||||
|
||||
uint16_t partial_in_width_last;
|
||||
uint16_t partial_width_first;
|
||||
|
||||
uint16_t partial_width_mid;
|
||||
uint16_t partial_width_last;
|
||||
|
||||
uint8_t split_num;
|
||||
|
||||
/* Algorithm parameters */
|
||||
uint8_t pool_mode; /* dla_pool_mode */
|
||||
uint8_t pool_width; /* dla_pool_width */
|
||||
uint8_t pool_height; /* dla_pool_height */
|
||||
|
||||
uint8_t stride_x;
|
||||
uint8_t stride_y;
|
||||
|
||||
/**
|
||||
* The left/right padding size,
|
||||
* pad_right might be less than pad_left
|
||||
*/
|
||||
uint8_t pad_left;
|
||||
uint8_t pad_right;
|
||||
|
||||
/* The top/bottom padding size */
|
||||
uint8_t pad_top;
|
||||
uint8_t pad_bottom;
|
||||
|
||||
/* Precision parameters */
|
||||
uint8_t precision; /* dla_precision */
|
||||
uint8_t reserved0;
|
||||
/**
|
||||
* if input has non-zero "offset", this value should be set
|
||||
* There'll be 7 different paddding values, the relationship between
|
||||
* those versions are:
|
||||
* padding_value[0] = -offset*scaling;
|
||||
* padding_value[1] = 2*padding_value[0]
|
||||
* padding_value[2] = 3*padding_value[0]
|
||||
* ...
|
||||
* The purpose is to avoid ucode implement FP16
|
||||
* multiplier(for FP16 mode)
|
||||
*/
|
||||
int32_t padding_value[PDP_PAD_VAL_NUM];
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_pdp_stat_desc {
|
||||
uint32_t inf_input_num;
|
||||
uint32_t nan_input_num;
|
||||
uint32_t nan_output_num;
|
||||
uint32_t write_stall;
|
||||
uint32_t runtime;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_cdp_surface_desc {
|
||||
/* Data cube */
|
||||
struct dla_data_cube src_data;
|
||||
|
||||
struct dla_data_cube dst_data;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_cdp_op_desc {
|
||||
/* Precision parameters */
|
||||
|
||||
/* dla_precision */
|
||||
uint8_t in_precision;
|
||||
uint8_t out_precision;
|
||||
int16_t lut_index;
|
||||
|
||||
struct dla_cvt_param in_cvt;
|
||||
struct dla_cvt_param out_cvt;
|
||||
|
||||
/* Performance parameters */
|
||||
|
||||
/* Algorithm parameters */
|
||||
uint8_t local_size;
|
||||
uint8_t bypass_sqsum;
|
||||
uint8_t bypass_out_mul;
|
||||
uint8_t reserved0;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_cdp_stat_desc {
|
||||
uint32_t nan_input_num;
|
||||
uint32_t inf_input_num;
|
||||
uint32_t nan_output_num;
|
||||
uint32_t write_stall;
|
||||
uint32_t lut_uflow;
|
||||
uint32_t lut_oflow;
|
||||
uint32_t lut_hybrid;
|
||||
uint32_t lut_le_hit;
|
||||
uint32_t lut_lo_hit;
|
||||
uint32_t saturation_count;
|
||||
uint32_t runtime;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_rubik_surface_desc {
|
||||
/* Data cube */
|
||||
struct dla_data_cube src_data;
|
||||
|
||||
struct dla_data_cube dst_data;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/* rubik mode */
|
||||
#define RUBIK_MODE_CONTRACT 0
|
||||
#define RUBIK_MODE_SPLIT 1
|
||||
#define RUBIK_MODE_MERGE 2
|
||||
|
||||
struct dla_rubik_op_desc {
|
||||
/* Precision parameters */
|
||||
uint8_t mode;
|
||||
uint8_t precision;
|
||||
uint8_t stride_x;
|
||||
uint8_t stride_y;
|
||||
} __packed __aligned(4);
|
||||
|
||||
struct dla_rubik_stat_desc {
|
||||
uint32_t read_stall;
|
||||
uint32_t write_stall;
|
||||
uint32_t runtime;
|
||||
} __packed __aligned(4);
|
||||
|
||||
union dla_surface_container {
|
||||
struct dla_bdma_surface_desc bdma_surface;
|
||||
struct dla_conv_surface_desc conv_surface;
|
||||
struct dla_sdp_surface_desc sdp_surface;
|
||||
struct dla_pdp_surface_desc pdp_surface;
|
||||
struct dla_cdp_surface_desc cdp_surface;
|
||||
struct dla_rubik_surface_desc rubik_surface;
|
||||
};
|
||||
|
||||
union dla_operation_container {
|
||||
struct dla_bdma_op_desc bdma_op;
|
||||
struct dla_conv_op_desc conv_op;
|
||||
struct dla_sdp_op_desc sdp_op;
|
||||
struct dla_pdp_op_desc pdp_op;
|
||||
struct dla_cdp_op_desc cdp_op;
|
||||
struct dla_rubik_op_desc rubik_op;
|
||||
};
|
||||
|
||||
union dla_stat_container {
|
||||
struct dla_bdma_stat_desc bdma_stat;
|
||||
struct dla_conv_stat_desc conv_stat;
|
||||
struct dla_sdp_stat_desc sdp_stat;
|
||||
struct dla_pdp_stat_desc pdp_stat;
|
||||
struct dla_cdp_stat_desc cdp_stat;
|
||||
struct dla_rubik_stat_desc rubik_stat;
|
||||
};
|
||||
|
||||
/**
|
||||
* status notifier structure
|
||||
*
|
||||
* @address: 64-bit timestamp representing the time at which
|
||||
* the notifier was written
|
||||
* @status_engine: status work captured from HW engine
|
||||
* @subframe: NA
|
||||
* @status_task: status word as configured from an action list
|
||||
*/
|
||||
struct dla_task_status {
|
||||
uint64_t timestamp;
|
||||
|
||||
uint32_t status_engine;
|
||||
|
||||
uint16_t subframe;
|
||||
uint16_t status_task;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#endif
|
74
drivers/nvdla/include/dla_sched.h
Normal file
74
drivers/nvdla/include/dla_sched.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __DLA_SCHED_H_
|
||||
#define __DLA_SCHED_H_
|
||||
|
||||
struct dla_task {
|
||||
/* platform specific data to communicate with portability layer */
|
||||
void *task_data;
|
||||
/* task state */
|
||||
uint32_t state;
|
||||
/* Task base address */
|
||||
uint64_t base;
|
||||
/* start address of a list of dla_operation_container */
|
||||
uint64_t operation_desc_addr;
|
||||
/* start address of a list of dla_surface_container */
|
||||
uint64_t surface_desc_addr;
|
||||
/* start address of a list of dla_common_op_desc */
|
||||
uint64_t dependency_graph_addr;
|
||||
/* start address of a list of dla_lut_param */
|
||||
uint64_t lut_data_addr;
|
||||
/*
|
||||
* start address of a list of dla_roi_desc,
|
||||
* the first one is dla_roi_array_desc
|
||||
* valid when network.dynamic_roi is true
|
||||
*/
|
||||
uint64_t roi_array_addr;
|
||||
/* start address of a list of dla_surface_container */
|
||||
uint64_t surface_addr;
|
||||
/* start address of a list of dla_stat_container */
|
||||
uint64_t stat_data_addr;
|
||||
} __packed __aligned(256);
|
||||
|
||||
/**
|
||||
* @brief Configuration parameters supported by the engine
|
||||
*
|
||||
* atom_size Memory smallest access size
|
||||
* bdma_enable Defines whether bdma is supported
|
||||
* rubik_enable Defines whether rubik is supported
|
||||
* weight_compress_support Defines whether weight data compression is supported
|
||||
*/
|
||||
struct dla_config {
|
||||
uint32_t atom_size;
|
||||
bool bdma_enable;
|
||||
bool rubik_enable;
|
||||
bool weight_compress_support;
|
||||
};
|
||||
|
||||
#endif
|
327
drivers/nvdla/include/nvdla_interface.h
Normal file
327
drivers/nvdla/include/nvdla_interface.h
Normal file
|
@ -0,0 +1,327 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __NVDLA_INTERFACE_H_
|
||||
#define __NVDLA_INTERFACE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/**
|
||||
* @brief Register driver to firmware
|
||||
*
|
||||
* Implementation in firmware, called by portability layer
|
||||
*
|
||||
* This function must be called once during boot to initialize DLA
|
||||
* engine scheduler and register driver with firmware before submitting
|
||||
* any task. Pass pointer to driver context in @param driver_context
|
||||
* which is passed as param when firmware calls any function
|
||||
* of portability layer. It also updates pointer to engine context
|
||||
* which must be passed in any function call to firmware after this point.
|
||||
*
|
||||
* @param engine_context Pointer to engine specific data
|
||||
* @param driver_context Pointer to driver specific data
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*/
|
||||
int32_t dla_register_driver(void **engine_context, void *driver_context);
|
||||
|
||||
/**
|
||||
* @brief Interrupt handler
|
||||
*
|
||||
* Implementation in firmware, called by portability layer
|
||||
*
|
||||
* This function is called when DLA interrupt is received. Portability layer
|
||||
* should register it's own handler using the mechanism supported by that platform
|
||||
* and call this function from the handler. Call to this function must be
|
||||
* protected by lock to prevent handling interrupt when firmware is programming
|
||||
* layers in process context.
|
||||
*
|
||||
* @param engine_context Engine specific data received in dla_register_driver
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*/
|
||||
int32_t dla_isr_handler(void *engine_context);
|
||||
|
||||
/**
|
||||
* @brief Process events recorded in interrupt handler
|
||||
*
|
||||
* Implementation in firmware, called by portability layer
|
||||
*
|
||||
* Interrupt handler just records events and does not process those events.
|
||||
* Portability layer must call this function in thread/process context after
|
||||
* interrupt handler is done.
|
||||
*
|
||||
* @param engine_context Engine specific data received in dla_register_driver
|
||||
* @param task_complete Pointer to parameter to indicate task complete,
|
||||
firmare writes 1 to it if all layers are processed.
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t dla_process_events(void *engine_context, uint32_t *task_complete);
|
||||
|
||||
/**
|
||||
* @brief Clear task from firmware
|
||||
*
|
||||
* Implementation in firmware, called by portability layer
|
||||
*
|
||||
* This function resets engine scheduler state including op descriptor cache,
|
||||
* error values, sub-engine status, events etc and clears previous task state
|
||||
* from firmware. This function can be called by portability layer after
|
||||
* task completion. It is not mandatory to call it but calling it will
|
||||
* ensure clean state before next task execution.
|
||||
*
|
||||
* @param engine_context Engine specific data received in dla_register_driver
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
void dla_clear_task(void *engine_context);
|
||||
|
||||
/**
|
||||
* @brief Execute task
|
||||
*
|
||||
* Implementation in firmware, called by portability layer
|
||||
*
|
||||
* This function initializes sub-engines and starts task execution. Further
|
||||
* programming and layer scheduling is triggered by events received from
|
||||
* hardware.
|
||||
*
|
||||
* @param engine_context Engine specific data received in dla_register_driver
|
||||
* @param task_data Task specific data to be passed when reading task info
|
||||
* @param config_data Configuration data to be passed
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t dla_execute_task(void *engine_context, void *task_data, void *config_data);
|
||||
|
||||
/**
|
||||
* @brief Register read
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Read DLA HW register. Portability layer is responsible to use correct
|
||||
* base address and for any IO mapping if required.
|
||||
*
|
||||
* @param engine_context Driver specific data received in dla_register_driver
|
||||
* @param addr Register offset
|
||||
*
|
||||
* @return Register value
|
||||
*
|
||||
*/
|
||||
uint32_t dla_reg_read(void *driver_context, uint32_t addr);
|
||||
|
||||
/**
|
||||
* @brief Register write
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Write DLA HW registr. Portability layer is responsible to use correct
|
||||
* base address and for any IO mapping if required.
|
||||
*
|
||||
* @param driver_context Driver specific data received in dla_register_driver
|
||||
* @param addr Register offset
|
||||
* @param reg Value to write
|
||||
*
|
||||
*/
|
||||
void dla_reg_write(void *driver_context, uint32_t addr, uint32_t reg);
|
||||
|
||||
/**
|
||||
* @brief Read data from DMA mapped memory in local buffer
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* This function reads data from buffers passed by UMD in local memory.
|
||||
* Addresses for buffers passed by are shared in address list and network
|
||||
* descriptor contains index in address list for those buffers. Firmware
|
||||
* reads this data from buffer shared by UMD into local buffer to consume
|
||||
* the information.
|
||||
*
|
||||
* @param driver_context Driver specific data received in dla_register_driver
|
||||
* @param task_data Task specific data received in dla_execute_task
|
||||
* @param src Index in address list
|
||||
* @param dst Pointer to local memory
|
||||
* @param size Size of data to copy
|
||||
* @param offset Offset from start of UMD buffer
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t dla_data_read(void *driver_context, void *task_data,
|
||||
uint64_t src, void *dst,
|
||||
uint32_t size, uint64_t offset);
|
||||
|
||||
/**
|
||||
* @brief Write data to DMA mapped memory from local buffer
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* This function writes data from local buffer to buffer passed by UMD.
|
||||
* Addresses for buffers passed by are shared in address list and network
|
||||
* descriptor contains index in address list for those buffers. Firmware
|
||||
* writes this data to buffer shared by UMD from local buffer to update
|
||||
* the information.
|
||||
*
|
||||
* @param driver_context Driver specific data received in dla_register_driver
|
||||
* @param task_data Task specific data received in dla_execute_task
|
||||
* @param src Pointer to local memory
|
||||
* @param dst Index in address list
|
||||
* @param size Size of data to copy
|
||||
* @param offset Offset from start of UMD buffer
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t dla_data_write(void *driver_context, void *task_data,
|
||||
void *src, uint64_t dst,
|
||||
uint32_t size, uint64_t offset);
|
||||
|
||||
/* Destination for DMA buffer */
|
||||
#define DESTINATION_PROCESSOR 0
|
||||
#define DESTINATION_DMA 1
|
||||
|
||||
/**
|
||||
* @brief Read DMA address
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Some buffers shared by UMD are accessed by processor responsible for
|
||||
* programming DLA HW. It would be companion micro-controller in case of
|
||||
* headed config while main CPU in case of headless config. Also, some
|
||||
* buffers are accessed by DLA DMA engines inside sub-engines. This function
|
||||
* should return proper address accessible by destination user depending
|
||||
* on config.
|
||||
*
|
||||
* @param driver_context Driver specific data received in dla_register_driver
|
||||
* @param task_data Task specific data received in dla_execute_task
|
||||
* @param index Index in address list
|
||||
* @param dst_ptr Pointer to update address
|
||||
* @param destination Destination user for DMA address
|
||||
*
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t dla_get_dma_address(void *driver_context, void *task_data,
|
||||
int16_t index, void *dst_ptr,
|
||||
uint32_t destination);
|
||||
|
||||
/**
|
||||
* @brief Read time value in micro-seconds
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Read system time in micro-seconds
|
||||
*
|
||||
* @return Time value in micro-seconds
|
||||
*
|
||||
*/
|
||||
int64_t dla_get_time_us(void);
|
||||
|
||||
/**
|
||||
* @brief Print debug message
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Print debug message to console
|
||||
*
|
||||
* @param str Format string and variable arguments
|
||||
*
|
||||
*/
|
||||
void dla_debug(const char *str, ...);
|
||||
|
||||
/**
|
||||
* @brief Print information message
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Print information message to console
|
||||
*
|
||||
* @param str Format string and variable arguments
|
||||
*
|
||||
*/
|
||||
void dla_info(const char *str, ...);
|
||||
|
||||
/**
|
||||
* @brief Print warning message
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Print warning message to console
|
||||
*
|
||||
* @param str Format string and variable arguments
|
||||
*
|
||||
*/
|
||||
void dla_warn(const char *str, ...);
|
||||
|
||||
/**
|
||||
* @brief Print error message
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Print error message to console
|
||||
*
|
||||
* @param str Format string and variable arguments
|
||||
*
|
||||
*/
|
||||
void dla_error(const char *str, ...);
|
||||
|
||||
/**
|
||||
* @brief Fill memory region
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Fills the first len bytes of the memory area pointed to by src
|
||||
* with the constant byte ch.
|
||||
*
|
||||
* @param src Memory area address
|
||||
* @param ch Byte to fill
|
||||
* @param len Length of memory area to fill
|
||||
*
|
||||
* @return Memory area address
|
||||
*
|
||||
*/
|
||||
void *dla_memset(void *src, int ch, uint64_t len);
|
||||
|
||||
/**
|
||||
* @brief Copy memory
|
||||
*
|
||||
* Implementation in portability layer, called by firmware
|
||||
*
|
||||
* Copies len bytes from memory area src to memory area dest.
|
||||
*
|
||||
* @param dest Destination memory area address
|
||||
* @param src Source memory area address
|
||||
* @param len Length of memory area to copy
|
||||
*
|
||||
* @return Destination memory area address
|
||||
*
|
||||
*/
|
||||
void *dla_memcpy(void *dest, const void *src, uint64_t len);
|
||||
|
||||
#endif
|
138
drivers/nvdla/include/nvdla_ioctl.h
Normal file
138
drivers/nvdla/include/nvdla_ioctl.h
Normal file
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_NVDLA_IOCTL_H
|
||||
#define __LINUX_NVDLA_IOCTL_H
|
||||
|
||||
#include <linux/ioctl.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#if !defined(__KERNEL__)
|
||||
#define __user
|
||||
#endif
|
||||
|
||||
/**
|
||||
* struct nvdla_mem_handle structure for memory handles
|
||||
*
|
||||
* @handle handle to DMA buffer allocated in userspace
|
||||
* @reserved Reserved for padding
|
||||
* @offset offset in bytes from start address of buffer
|
||||
*
|
||||
*/
|
||||
struct nvdla_mem_handle {
|
||||
__u32 handle;
|
||||
__u32 reserved;
|
||||
__u64 offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct nvdla_ioctl_submit_task structure for single task information
|
||||
*
|
||||
* @num_addresses total number of entries in address_list
|
||||
* @reserved Reserved for padding
|
||||
* @address_list pointer to array of struct nvdla_mem_handle
|
||||
*
|
||||
*/
|
||||
struct nvdla_ioctl_submit_task {
|
||||
#define NVDLA_MAX_BUFFERS_PER_TASK (6144)
|
||||
__u32 num_addresses;
|
||||
#define NVDLA_NO_TIMEOUT (0xffffffff)
|
||||
__u32 timeout;
|
||||
__u64 address_list;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct nvdla_submit_args structure for task submit
|
||||
*
|
||||
* @tasks pointer to array of struct nvdla_ioctl_submit_task
|
||||
* @num_tasks number of entries in tasks
|
||||
* @flags flags for task submit, no flags defined yet
|
||||
* @version version of task structure
|
||||
*
|
||||
*/
|
||||
struct nvdla_submit_args {
|
||||
__u64 tasks;
|
||||
__u16 num_tasks;
|
||||
#define NVDLA_MAX_TASKS_PER_SUBMIT 24
|
||||
#define NVDLA_SUBMIT_FLAGS_ATOMIC (1 << 0)
|
||||
__u16 flags;
|
||||
__u32 version;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct nvdla_gem_create_args for allocating DMA buffer through GEM
|
||||
*
|
||||
* @handle handle updated by kernel after allocation
|
||||
* @flags implementation specific flags
|
||||
* @size size of buffer to allocate
|
||||
*/
|
||||
struct nvdla_gem_create_args {
|
||||
__u32 handle;
|
||||
__u32 flags;
|
||||
__u64 size;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct nvdla_gem_map_offset_args for mapping DMA buffer
|
||||
*
|
||||
* @handle handle of the buffer
|
||||
* @reserved reserved for padding
|
||||
* @offset offset updated by kernel after mapping
|
||||
*/
|
||||
struct nvdla_gem_map_offset_args {
|
||||
__u32 handle;
|
||||
__u32 reserved;
|
||||
__u64 offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct nvdla_gem_destroy_args for destroying DMA buffer
|
||||
*
|
||||
* @handle handle of the buffer
|
||||
*/
|
||||
struct nvdla_gem_destroy_args {
|
||||
__u32 handle;
|
||||
};
|
||||
|
||||
#define DRM_NVDLA_SUBMIT 0x00
|
||||
#define DRM_NVDLA_GEM_CREATE 0x01
|
||||
#define DRM_NVDLA_GEM_MMAP 0x02
|
||||
#define DRM_NVDLA_GEM_DESTROY 0x03
|
||||
|
||||
#define DRM_IOCTL_NVDLA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NVDLA_SUBMIT, struct nvdla_submit_args)
|
||||
#define DRM_IOCTL_NVDLA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_NVDLA_GEM_CREATE, struct nvdla_gem_create_args)
|
||||
#define DRM_IOCTL_NVDLA_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_NVDLA_GEM_MMAP, struct nvdla_gem_map_offset_args)
|
||||
#define DRM_IOCTL_NVDLA_GEM_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_NVDLA_GEM_DESTROY, struct nvdla_gem_destroy_args)
|
||||
|
||||
#endif
|
153
drivers/nvdla/include/nvdla_linux.h
Normal file
153
drivers/nvdla/include/nvdla_linux.h
Normal file
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_NVDLA_LINUX_H_
|
||||
#define __LINUX_NVDLA_LINUX_H_
|
||||
|
||||
#include <linux/completion.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
/**
|
||||
* @brief Task information submitted from user space
|
||||
*
|
||||
* ref Reference count for task
|
||||
* num_addresses Number of addresses in address list
|
||||
* nvdla_dev Pointer to NVDLA device
|
||||
* address_list Address list
|
||||
* file DRM file instance
|
||||
*/
|
||||
struct nvdla_task {
|
||||
struct kref ref;
|
||||
uint32_t num_addresses;
|
||||
struct nvdla_device *nvdla_dev;
|
||||
struct nvdla_mem_handle *address_list;
|
||||
struct drm_file *file;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Configuration parameters supported by the engine
|
||||
*
|
||||
* atom_size Memory smallest access size
|
||||
* bdma_enable Defines whether bdma is supported
|
||||
* rubik_enable Defines whether rubik is supported
|
||||
* weight_compress_support Defines whether weight data compression is supported
|
||||
*/
|
||||
struct nvdla_config
|
||||
{
|
||||
uint32_t atom_size;
|
||||
bool bdma_enable;
|
||||
bool rubik_enable;
|
||||
bool weight_compress_support;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief NVDLA device
|
||||
*
|
||||
* irq Interrupt number associated with this device
|
||||
* ref Reference count for device
|
||||
* base IO mapped base address for device
|
||||
* nvdla_lock Spinlock used for synchronization
|
||||
* drm DRM device instance
|
||||
* task Pointer to task in execution
|
||||
* config_data Pointer to the configuration data
|
||||
* pdev Pointer to NVDLA platform device
|
||||
* event_notifier Completion object used to wait for events from HW
|
||||
* engine_context Private data passed from engine in dla_engine_init
|
||||
*/
|
||||
struct nvdla_device {
|
||||
int32_t irq;
|
||||
struct kref ref;
|
||||
void __iomem *base;
|
||||
spinlock_t nvdla_lock;
|
||||
struct drm_device *drm;
|
||||
struct nvdla_task *task;
|
||||
struct nvdla_config *config_data;
|
||||
struct platform_device *pdev;
|
||||
struct completion event_notifier;
|
||||
|
||||
void *engine_context;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Submit task
|
||||
*
|
||||
* This function submits task to NVDLA engine.
|
||||
*
|
||||
* @param nvdla_dev Pointer to NVDLA device
|
||||
* @param task Pointer to task
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t nvdla_task_submit(struct nvdla_device *nvdla_dev, struct nvdla_task *task);
|
||||
|
||||
/**
|
||||
* @brief Get DMA address
|
||||
*
|
||||
* This function gets DMA address for given fd
|
||||
*
|
||||
* @param dev DRM device instance
|
||||
* @param file DRM file instance
|
||||
* @param fd File desriptor for DMA buffer
|
||||
* @param addr Pointer to update DMA address
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t nvdla_gem_dma_addr(struct drm_device *dev, struct drm_file *file,
|
||||
uint32_t fd, dma_addr_t *addr);
|
||||
|
||||
/**
|
||||
* @brief DRM probe
|
||||
*
|
||||
* Probe function for DRM device
|
||||
*
|
||||
* @param nvdla_dev NVDLA device pointer
|
||||
* @return 0 on success and negative on error
|
||||
*
|
||||
*/
|
||||
int32_t nvdla_drm_probe(struct nvdla_device *nvdla_dev);
|
||||
|
||||
/**
|
||||
* @brief DRM remove
|
||||
*
|
||||
* Remove function for DRM device
|
||||
*
|
||||
* @param nvdla_dev NVDLA device pointer
|
||||
*
|
||||
*/
|
||||
void nvdla_drm_remove(struct nvdla_device *nvdla_dev);
|
||||
|
||||
#endif
|
40
drivers/nvdla/include/opendla.h
Normal file
40
drivers/nvdla/include/opendla.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __OPENDLA_H_
|
||||
#define __OPENDLA_H_
|
||||
|
||||
#define DLA_2_CONFIG
|
||||
|
||||
#ifdef DLA_2_CONFIG
|
||||
#include <opendla_small.h>
|
||||
#else
|
||||
#include <opendla_initial.h>
|
||||
#endif
|
||||
|
||||
#endif
|
16743
drivers/nvdla/include/opendla_initial.h
Normal file
16743
drivers/nvdla/include/opendla_initial.h
Normal file
File diff suppressed because it is too large
Load diff
6433
drivers/nvdla/include/opendla_small.h
Normal file
6433
drivers/nvdla/include/opendla_small.h
Normal file
File diff suppressed because it is too large
Load diff
448
drivers/nvdla/nvdla_core_callbacks.c
Normal file
448
drivers/nvdla/nvdla_core_callbacks.c
Normal file
|
@ -0,0 +1,448 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/of_irq.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <nvdla_interface.h>
|
||||
#include <nvdla_linux.h>
|
||||
#include <nvdla_ioctl.h>
|
||||
#include <opendla.h>
|
||||
|
||||
static struct nvdla_config nvdla_config_os_initial = {
|
||||
.atom_size = 32,
|
||||
.bdma_enable = true,
|
||||
.rubik_enable = true,
|
||||
.weight_compress_support = true,
|
||||
};
|
||||
|
||||
static struct nvdla_config nvdla_config_small = {
|
||||
.atom_size = 8,
|
||||
.bdma_enable = false,
|
||||
.rubik_enable = false,
|
||||
.weight_compress_support = false,
|
||||
};
|
||||
|
||||
static struct nvdla_config nvdla_config_large = {
|
||||
.atom_size = 32,
|
||||
.bdma_enable = false,
|
||||
.rubik_enable = false,
|
||||
.weight_compress_support = false,
|
||||
};
|
||||
|
||||
void dla_debug(const char *str, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, str);
|
||||
vprintk(pr_fmt(str), args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void dla_info(const char *str, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, str);
|
||||
vprintk(str, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void dla_warn(const char *str, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, str);
|
||||
vprintk(str, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void dla_error(const char *str, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, str);
|
||||
vprintk(str, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void *dla_memset(void *src, int ch, uint64_t len)
|
||||
{
|
||||
memset(src, ch, len);
|
||||
return src;
|
||||
}
|
||||
|
||||
void *dla_memcpy(void *dest, const void *src, uint64_t len)
|
||||
{
|
||||
return memcpy(dest, src, len);
|
||||
}
|
||||
|
||||
int64_t dla_get_time_us(void)
|
||||
{
|
||||
return ktime_get_ns() / NSEC_PER_USEC;
|
||||
}
|
||||
|
||||
void dla_reg_write(void *driver_context, uint32_t addr, uint32_t reg)
|
||||
{
|
||||
struct nvdla_device *nvdla_dev =
|
||||
(struct nvdla_device *)driver_context;
|
||||
|
||||
if (!nvdla_dev)
|
||||
return;
|
||||
|
||||
writel(reg, nvdla_dev->base + addr);
|
||||
}
|
||||
|
||||
uint32_t dla_reg_read(void *driver_context, uint32_t addr)
|
||||
{
|
||||
struct nvdla_device *nvdla_dev =
|
||||
(struct nvdla_device *)driver_context;
|
||||
|
||||
if (!nvdla_dev)
|
||||
return 0;
|
||||
|
||||
return readl(nvdla_dev->base + addr);
|
||||
}
|
||||
|
||||
static irqreturn_t nvdla_engine_isr(int32_t irq, void *data)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct nvdla_device *nvdla_dev = (struct nvdla_device *)data;
|
||||
|
||||
if (!nvdla_dev)
|
||||
return IRQ_NONE;
|
||||
|
||||
spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags);
|
||||
dla_isr_handler(nvdla_dev->engine_context);
|
||||
complete(&nvdla_dev->event_notifier);
|
||||
spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int32_t dla_read_dma_address(void *driver_context, void *task_data,
|
||||
int16_t index, void *dst)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
struct nvdla_mem_handle *handles;
|
||||
dma_addr_t *phys_addr = (dma_addr_t *)(dst);
|
||||
struct nvdla_device *nvdla_dev =
|
||||
(struct nvdla_device *)driver_context;
|
||||
struct nvdla_task *task = (struct nvdla_task *)task_data;
|
||||
|
||||
if (index == -1 || index > task->num_addresses)
|
||||
return -EINVAL;
|
||||
|
||||
handles = (struct nvdla_mem_handle *)task->address_list;
|
||||
ret = nvdla_gem_dma_addr(nvdla_dev->drm, task->file,
|
||||
handles[index].handle,
|
||||
phys_addr);
|
||||
|
||||
/* Add offset to IOVA address */
|
||||
*phys_addr = *phys_addr + handles[index].offset;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int32_t dla_read_cpu_address(void *driver_context, void *task_data,
|
||||
int16_t index, void *dst)
|
||||
{
|
||||
uint64_t *temp = (uint64_t *)dst;
|
||||
struct nvdla_task *task = (struct nvdla_task *)task_data;
|
||||
|
||||
if (index == -1 || index > task->num_addresses)
|
||||
return -EINVAL;
|
||||
|
||||
*temp = (uint64_t)index;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t dla_get_dma_address(void *driver_context, void *task_data,
|
||||
int16_t index, void *dst_ptr,
|
||||
uint32_t destination)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
|
||||
if (destination == DESTINATION_PROCESSOR) {
|
||||
ret = dla_read_cpu_address(driver_context, task_data,
|
||||
index, dst_ptr);
|
||||
} else if (destination == DESTINATION_DMA) {
|
||||
ret = dla_read_dma_address(driver_context, task_data,
|
||||
index, dst_ptr);
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t dla_data_write(void *driver_context, void *task_data,
|
||||
void *src, uint64_t dst,
|
||||
uint32_t size, uint64_t offset)
|
||||
{
|
||||
int32_t ret;
|
||||
void *ptr = NULL;
|
||||
struct dma_buf *buf;
|
||||
struct dma_buf_map map;
|
||||
struct nvdla_mem_handle *handles;
|
||||
struct nvdla_task *task = (struct nvdla_task *)task_data;
|
||||
uint64_t dma_addr = 0;
|
||||
|
||||
dla_get_dma_address(driver_context, task_data,dst, (void *)&dma_addr, DESTINATION_DMA);
|
||||
handles = task->address_list;
|
||||
buf = dma_buf_get(handles[dst].handle);
|
||||
if (IS_ERR(buf)) {
|
||||
pr_err("%s: Failed get dma_buf for handle=%d\n", __func__,
|
||||
handles[dst].handle);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ret = dma_buf_begin_cpu_access(buf, DMA_BIDIRECTIONAL);
|
||||
if (ret)
|
||||
goto put_dma_buf;
|
||||
|
||||
ret = dma_buf_vmap(buf, &map);
|
||||
ptr = ret ? NULL : map.vaddr;
|
||||
if (!ptr) {
|
||||
pr_err("%s: Failed to vmap dma_buf for handle=%d\n", __func__,
|
||||
handles[dst].handle);
|
||||
ret = -ENOMEM;
|
||||
goto end_cpu_access;
|
||||
}
|
||||
|
||||
memcpy((void *)((uint8_t *)ptr + offset), src, size);
|
||||
dma_buf_vunmap(buf, &map);
|
||||
|
||||
end_cpu_access:
|
||||
dma_buf_end_cpu_access(buf, DMA_BIDIRECTIONAL);
|
||||
|
||||
put_dma_buf:
|
||||
dma_buf_put(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t dla_data_read(void *driver_context, void *task_data,
|
||||
uint64_t src, void *dst,
|
||||
uint32_t size, uint64_t offset)
|
||||
{
|
||||
int32_t ret;
|
||||
void *ptr = NULL;
|
||||
struct dma_buf *buf;
|
||||
struct dma_buf_map map;
|
||||
struct nvdla_mem_handle *handles;
|
||||
struct nvdla_task *task = (struct nvdla_task *)task_data;
|
||||
uint64_t dma_addr = 0;
|
||||
|
||||
dla_get_dma_address(driver_context, task_data, src, (void *)&dma_addr, DESTINATION_DMA);
|
||||
handles = task->address_list;
|
||||
|
||||
buf = dma_buf_get(handles[src].handle);
|
||||
if (IS_ERR(buf)) {
|
||||
pr_err("%s: Failed get dma_buf for handle=%d\n", __func__,
|
||||
handles[src].handle);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ret = dma_buf_begin_cpu_access(buf, DMA_BIDIRECTIONAL);
|
||||
if (ret)
|
||||
goto put_dma_buf;
|
||||
|
||||
ret = dma_buf_vmap(buf, &map);
|
||||
ptr = ret ? NULL : map.vaddr;
|
||||
if (!ptr) {
|
||||
pr_err("%s: Failed to vmap dma_buf for handle=%d\n", __func__,
|
||||
handles[src].handle);
|
||||
ret = -ENOMEM;
|
||||
goto end_cpu_access;
|
||||
}
|
||||
|
||||
memcpy(dst, (void *)(((uint8_t *)ptr) + offset), size);
|
||||
dma_buf_vunmap(buf, &map);
|
||||
|
||||
end_cpu_access:
|
||||
dma_buf_end_cpu_access(buf, DMA_BIDIRECTIONAL);
|
||||
|
||||
put_dma_buf:
|
||||
dma_buf_put(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t nvdla_task_submit(struct nvdla_device *nvdla_dev, struct nvdla_task *task)
|
||||
{
|
||||
int32_t err = 0;
|
||||
uint32_t task_complete = 0;
|
||||
|
||||
nvdla_dev->task = task;
|
||||
|
||||
err = dla_execute_task(nvdla_dev->engine_context, (void *)task, nvdla_dev->config_data);
|
||||
if (err) {
|
||||
pr_err("Task execution failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
pr_debug("Wait for task complete\n");
|
||||
|
||||
while (1) {
|
||||
unsigned long flags;
|
||||
|
||||
wait_for_completion(&nvdla_dev->event_notifier);
|
||||
|
||||
spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags);
|
||||
|
||||
err = dla_process_events(nvdla_dev->engine_context, &task_complete);
|
||||
|
||||
spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags);
|
||||
|
||||
if (err || task_complete)
|
||||
break;
|
||||
}
|
||||
|
||||
pr_debug("Task complete\n");
|
||||
dla_clear_task(nvdla_dev->engine_context);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* driver probe and init */
|
||||
static const struct of_device_id nvdla_of_match[] = {
|
||||
{
|
||||
.compatible = "nvidia,nvdla_os_initial",
|
||||
.data = &nvdla_config_os_initial,
|
||||
},
|
||||
{
|
||||
.compatible = "nvidia,nv_small",
|
||||
.data = &nvdla_config_small,
|
||||
},
|
||||
{
|
||||
.compatible = "nvidia,nv_large",
|
||||
.data = &nvdla_config_large,
|
||||
},
|
||||
{ },
|
||||
};
|
||||
|
||||
static int32_t nvdla_probe(struct platform_device *pdev)
|
||||
{
|
||||
int32_t err = 0;
|
||||
struct resource *res;
|
||||
struct nvdla_device *nvdla_dev;
|
||||
struct device *dev = &pdev->dev;
|
||||
const struct of_device_id *match;
|
||||
|
||||
if (!pdev->dev.of_node)
|
||||
return -EINVAL;
|
||||
|
||||
match = of_match_device(nvdla_of_match, &pdev->dev);
|
||||
if (!match) {
|
||||
pr_err("Missing DT entry!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pr_err("Probe NVDLA config %s\n", match->compatible);
|
||||
|
||||
nvdla_dev = devm_kzalloc(dev, sizeof(*nvdla_dev), GFP_KERNEL);
|
||||
if (!nvdla_dev)
|
||||
return -ENOMEM;
|
||||
|
||||
platform_set_drvdata(pdev, nvdla_dev);
|
||||
nvdla_dev->pdev = pdev;
|
||||
nvdla_dev->config_data = (struct nvdla_config *)match->data;
|
||||
|
||||
init_completion(&nvdla_dev->event_notifier);
|
||||
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
nvdla_dev->base = devm_ioremap_resource(&pdev->dev, res);
|
||||
if (IS_ERR(nvdla_dev->base))
|
||||
return PTR_ERR(nvdla_dev->base);
|
||||
|
||||
res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
|
||||
if (!res) {
|
||||
dev_err(&pdev->dev, "no irq resource\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
nvdla_dev->irq = res->start;
|
||||
|
||||
err = devm_request_irq(&pdev->dev, nvdla_dev->irq,
|
||||
nvdla_engine_isr, 0,
|
||||
dev_name(&pdev->dev), nvdla_dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
dla_register_driver(&nvdla_dev->engine_context, (void *)nvdla_dev);
|
||||
dla_clear_task(nvdla_dev->engine_context);
|
||||
|
||||
err = nvdla_drm_probe(nvdla_dev);
|
||||
if (err)
|
||||
dev_err(&pdev->dev, "failed to register drm device\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int32_t __exit nvdla_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct nvdla_device *nvdla_dev = dev_get_drvdata(&pdev->dev);
|
||||
|
||||
nvdla_drm_remove(nvdla_dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver nvdla_driver = {
|
||||
.probe = nvdla_probe,
|
||||
.remove = __exit_p(nvdla_remove),
|
||||
.driver = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "NVDLA",
|
||||
.of_match_table = of_match_ptr(nvdla_of_match),
|
||||
},
|
||||
};
|
||||
module_platform_driver(nvdla_driver);
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_AUTHOR("NVIDIA");
|
||||
MODULE_DESCRIPTION("Nvidia Deep Learning Accelerator driver");
|
475
drivers/nvdla/nvdla_gem.c
Normal file
475
drivers/nvdla/nvdla_gem.c
Normal file
|
@ -0,0 +1,475 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_cma_helper.h>
|
||||
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_address.h>
|
||||
|
||||
#include <nvdla_linux.h>
|
||||
#include <nvdla_ioctl.h>
|
||||
#include <opendla.h>
|
||||
#define to_nvdla_obj(x) container_of(x, struct nvdla_gem_object, object)
|
||||
|
||||
struct nvdla_gem_object {
|
||||
struct drm_gem_object object;
|
||||
|
||||
void *kvaddr;
|
||||
dma_addr_t dma_addr;
|
||||
unsigned long dma_attrs;
|
||||
};
|
||||
|
||||
static int32_t nvdla_fill_task_desc(struct nvdla_ioctl_submit_task *local_task,
|
||||
struct nvdla_task *task)
|
||||
{
|
||||
struct nvdla_mem_handle *handles;
|
||||
|
||||
/* update task desc fields */
|
||||
task->num_addresses = local_task->num_addresses;
|
||||
|
||||
handles = kzalloc(local_task->num_addresses *
|
||||
sizeof(struct nvdla_mem_handle), GFP_KERNEL);
|
||||
if (handles == NULL)
|
||||
return -EFAULT;
|
||||
|
||||
/* get user addresses list */
|
||||
if (copy_from_user(handles,
|
||||
(void __user *)local_task->address_list,
|
||||
(task->num_addresses *
|
||||
sizeof(struct nvdla_mem_handle)))) {
|
||||
pr_err("failed to copy address list from user ptr\n");
|
||||
kfree(handles);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
task->address_list = handles;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t nvdla_submit(struct drm_device *drm, void *arg,
|
||||
struct drm_file *file)
|
||||
{
|
||||
int32_t err = 0;
|
||||
struct nvdla_task *task;
|
||||
struct nvdla_ioctl_submit_task local_task;
|
||||
struct nvdla_ioctl_submit_task __user *user_task;
|
||||
struct nvdla_device *nvdla_dev = dev_get_drvdata(drm->dev);
|
||||
struct nvdla_submit_args *args =
|
||||
(struct nvdla_submit_args *)arg;
|
||||
|
||||
user_task = (struct nvdla_ioctl_submit_task __user *)
|
||||
(uintptr_t)args->tasks;
|
||||
if (!user_task)
|
||||
return -EINVAL;
|
||||
|
||||
/* IOCTL copy descriptors */
|
||||
if (copy_from_user(&local_task, (void __user *)user_task,
|
||||
(sizeof(*user_task))))
|
||||
return -EFAULT;
|
||||
|
||||
task = kzalloc(sizeof(*task), GFP_KERNEL);
|
||||
if (task == NULL)
|
||||
return -EFAULT;
|
||||
|
||||
nvdla_dev->task = task;
|
||||
kref_init(&task->ref);
|
||||
task->nvdla_dev = nvdla_dev;
|
||||
task->file = file;
|
||||
|
||||
/* update task desc fields */
|
||||
err = nvdla_fill_task_desc(&local_task, task);
|
||||
if (err)
|
||||
goto free_task_desc;
|
||||
|
||||
err = nvdla_task_submit(nvdla_dev, task);
|
||||
|
||||
kfree(task->address_list);
|
||||
|
||||
free_task_desc:
|
||||
kfree(task);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int32_t nvdla_gem_alloc(struct nvdla_gem_object *nobj)
|
||||
{
|
||||
struct drm_gem_object *dobj = &nobj->object;
|
||||
struct drm_device *drm = dobj->dev;
|
||||
|
||||
nobj->dma_attrs = DMA_ATTR_WRITE_COMBINE;
|
||||
|
||||
nobj->kvaddr = dma_alloc_attrs(drm->dev, dobj->size, &nobj->dma_addr,
|
||||
GFP_KERNEL, nobj->dma_attrs);
|
||||
|
||||
if (!nobj->kvaddr)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvdla_gem_free(struct nvdla_gem_object *nobj)
|
||||
{
|
||||
struct drm_gem_object *dobj = &nobj->object;
|
||||
struct drm_device *drm = dobj->dev;
|
||||
|
||||
dma_free_attrs(drm->dev, dobj->size, nobj->kvaddr, nobj->dma_addr,
|
||||
nobj->dma_attrs);
|
||||
}
|
||||
|
||||
static void nvdla_gem_free_object(struct drm_gem_object *dobj)
|
||||
{
|
||||
struct nvdla_gem_object *nobj;
|
||||
|
||||
drm_gem_free_mmap_offset(dobj);
|
||||
|
||||
nobj = to_nvdla_obj(dobj);
|
||||
|
||||
nvdla_gem_free(nobj);
|
||||
|
||||
kfree(nobj);
|
||||
}
|
||||
|
||||
static struct sg_table
|
||||
*nvdla_drm_gem_prime_get_sg_table(struct drm_gem_object *dobj)
|
||||
{
|
||||
int32_t ret;
|
||||
struct sg_table *sgt;
|
||||
struct drm_device *drm = dobj->dev;
|
||||
struct nvdla_gem_object *nobj = to_nvdla_obj(dobj);
|
||||
|
||||
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
|
||||
if (!sgt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = dma_get_sgtable_attrs(drm->dev, sgt, nobj->kvaddr,
|
||||
nobj->dma_addr, dobj->size,
|
||||
nobj->dma_attrs);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to allocate sgt, %d\n", ret);
|
||||
kfree(sgt);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return sgt;
|
||||
}
|
||||
|
||||
static int nvdla_drm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
|
||||
{
|
||||
struct nvdla_gem_object *nobj = to_nvdla_obj(obj);
|
||||
if (nobj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING)
|
||||
return -ENOMEM;
|
||||
dma_buf_map_set_vaddr(map, nobj->kvaddr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvdla_drm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
|
||||
{
|
||||
/* Nothing to do */
|
||||
}
|
||||
|
||||
static const struct drm_gem_object_funcs nvdla_gem_funcs = {
|
||||
.free = nvdla_gem_free_object,
|
||||
.export = drm_gem_prime_export,
|
||||
.vmap = nvdla_drm_gem_prime_vmap,
|
||||
.vunmap = nvdla_drm_gem_prime_vunmap,
|
||||
.get_sg_table = nvdla_drm_gem_prime_get_sg_table,
|
||||
.vm_ops = &drm_gem_cma_vm_ops,
|
||||
};
|
||||
|
||||
static struct nvdla_gem_object *
|
||||
nvdla_gem_create_object(struct drm_device *drm, uint32_t size)
|
||||
{
|
||||
int32_t ret;
|
||||
struct drm_gem_object *dobj;
|
||||
struct nvdla_gem_object *nobj;
|
||||
|
||||
size = round_up(size, PAGE_SIZE);
|
||||
|
||||
nobj = kzalloc(sizeof(*nobj), GFP_KERNEL);
|
||||
if (!nobj)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
dobj = &nobj->object;
|
||||
dobj->funcs = &nvdla_gem_funcs;
|
||||
|
||||
drm_gem_private_object_init(drm, dobj, size);
|
||||
|
||||
ret = nvdla_gem_alloc(nobj);
|
||||
if (ret)
|
||||
goto free_nvdla_obj;
|
||||
|
||||
return nobj;
|
||||
|
||||
free_nvdla_obj:
|
||||
kfree(nobj);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct nvdla_gem_object *
|
||||
nvdla_gem_create_with_handle(struct drm_file *file_priv,
|
||||
struct drm_device *drm, uint32_t size,
|
||||
uint32_t *handle)
|
||||
{
|
||||
int32_t ret;
|
||||
struct drm_gem_object *dobj;
|
||||
struct nvdla_gem_object *nobj;
|
||||
|
||||
nobj = nvdla_gem_create_object(drm, size);
|
||||
if (IS_ERR(nobj))
|
||||
return ERR_CAST(nobj);
|
||||
|
||||
dobj = &nobj->object;
|
||||
|
||||
ret = drm_gem_handle_create(file_priv, dobj, handle);
|
||||
if (ret)
|
||||
goto free_drm_object;
|
||||
|
||||
drm_gem_object_put(dobj);
|
||||
|
||||
return nobj;
|
||||
|
||||
free_drm_object:
|
||||
nvdla_gem_free_object(dobj);
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int32_t nvdla_gem_create(struct drm_device *drm, void *data,
|
||||
struct drm_file *file)
|
||||
{
|
||||
struct nvdla_gem_object *nobj;
|
||||
struct nvdla_gem_create_args *args = data;
|
||||
|
||||
nobj = nvdla_gem_create_with_handle(file, drm, args->size,
|
||||
&args->handle);
|
||||
if (IS_ERR(nobj))
|
||||
return PTR_ERR(nobj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t nvdla_drm_gem_object_mmap(struct drm_gem_object *dobj,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
int32_t ret;
|
||||
struct nvdla_gem_object *nobj = to_nvdla_obj(dobj);
|
||||
struct drm_device *drm = dobj->dev;
|
||||
|
||||
vma->vm_flags &= ~VM_PFNMAP;
|
||||
vma->vm_pgoff = 0;
|
||||
|
||||
ret = dma_mmap_attrs(drm->dev, vma, nobj->kvaddr, nobj->dma_addr,
|
||||
dobj->size, nobj->dma_attrs);
|
||||
if (ret)
|
||||
drm_gem_vm_close(vma);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int32_t nvdla_drm_gem_mmap_buf(struct drm_gem_object *obj,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
ret = drm_gem_mmap_obj(obj, obj->size, vma);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return nvdla_drm_gem_object_mmap(obj, vma);
|
||||
}
|
||||
|
||||
static int32_t nvdla_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
int32_t ret;
|
||||
struct drm_gem_object *obj;
|
||||
|
||||
ret = drm_gem_mmap(filp, vma);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
obj = vma->vm_private_data;
|
||||
|
||||
return nvdla_drm_gem_object_mmap(obj, vma);
|
||||
}
|
||||
|
||||
int32_t nvdla_gem_dma_addr(struct drm_device *dev, struct drm_file *file,
|
||||
uint32_t fd, dma_addr_t *addr)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t handle;
|
||||
struct nvdla_gem_object *nobj;
|
||||
struct drm_gem_object *dobj;
|
||||
|
||||
ret = drm_gem_prime_fd_to_handle(dev, file, fd, &handle);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
dobj = drm_gem_object_lookup(file, handle);
|
||||
if (!dobj)
|
||||
return -EINVAL;
|
||||
|
||||
nobj = to_nvdla_obj(dobj);
|
||||
|
||||
*addr = nobj->dma_addr;
|
||||
|
||||
drm_gem_object_put(dobj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t nvdla_gem_map_offset(struct drm_device *drm, void *data,
|
||||
struct drm_file *file)
|
||||
{
|
||||
int32_t ret;
|
||||
struct drm_gem_object *dobj;
|
||||
struct nvdla_gem_map_offset_args *args = data;
|
||||
|
||||
dobj = drm_gem_object_lookup(file, args->handle);
|
||||
if (!dobj)
|
||||
return -EINVAL;
|
||||
|
||||
ret = drm_gem_create_mmap_offset(dobj);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
args->offset = drm_vma_node_offset_addr(&dobj->vma_node);
|
||||
|
||||
out:
|
||||
drm_gem_object_put(dobj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t nvdla_gem_destroy(struct drm_device *drm, void *data,
|
||||
struct drm_file *file)
|
||||
{
|
||||
struct nvdla_gem_destroy_args *args = data;
|
||||
|
||||
return drm_gem_handle_delete(file, args->handle);
|
||||
}
|
||||
|
||||
static const struct file_operations nvdla_drm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = drm_open,
|
||||
.release = drm_release,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.mmap = nvdla_drm_gem_mmap,
|
||||
.poll = drm_poll,
|
||||
.read = drm_read,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
#endif
|
||||
.llseek = noop_llseek,
|
||||
};
|
||||
|
||||
static const struct drm_ioctl_desc nvdla_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVDLA_SUBMIT, nvdla_submit, DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(NVDLA_GEM_CREATE, nvdla_gem_create, DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(NVDLA_GEM_MMAP, nvdla_gem_map_offset, DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(NVDLA_GEM_DESTROY, nvdla_gem_destroy, DRM_RENDER_ALLOW),
|
||||
};
|
||||
|
||||
static struct drm_driver nvdla_drm_driver = {
|
||||
.driver_features = DRIVER_GEM | DRIVER_RENDER,
|
||||
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
|
||||
.gem_prime_import = drm_gem_prime_import,
|
||||
.gem_prime_mmap = nvdla_drm_gem_mmap_buf,
|
||||
|
||||
.ioctls = nvdla_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(nvdla_drm_ioctls),
|
||||
.fops = &nvdla_drm_fops,
|
||||
|
||||
.name = "nvdla",
|
||||
.desc = "NVDLA driver",
|
||||
.date = "20171017",
|
||||
.major = 0,
|
||||
.minor = 0,
|
||||
.patchlevel = 0,
|
||||
};
|
||||
|
||||
int32_t nvdla_drm_probe(struct nvdla_device *nvdla_dev)
|
||||
{
|
||||
int32_t err;
|
||||
struct drm_device *drm;
|
||||
struct drm_driver *driver = &nvdla_drm_driver;
|
||||
struct resource res_cma;
|
||||
struct device_node *node;
|
||||
|
||||
drm = drm_dev_alloc(driver, &nvdla_dev->pdev->dev);
|
||||
if (IS_ERR(drm))
|
||||
return PTR_ERR(drm);
|
||||
|
||||
nvdla_dev->drm = drm;
|
||||
|
||||
err = drm_dev_register(drm, 0);
|
||||
if (err < 0)
|
||||
goto unref;
|
||||
|
||||
/**
|
||||
* TODO Register separate driver for memory and use DT node to
|
||||
* read memory range
|
||||
*/
|
||||
node = of_parse_phandle(drm->dev->of_node, "memory-region", 0);
|
||||
if(node ){
|
||||
dev_info(drm->dev, "Get mem from memory-region\n");
|
||||
of_address_to_resource(node, 0, &res_cma);
|
||||
err = dma_declare_coherent_memory(drm->dev, res_cma.start, res_cma.start,resource_size(&res_cma));
|
||||
} else {
|
||||
dev_info(drm->dev, "NVDLA using the default mem.\n");
|
||||
err = dma_declare_coherent_memory(drm->dev, 0xC0000000, 0xC0000000, 0x40000000);
|
||||
}
|
||||
|
||||
if (err < 0) {
|
||||
goto unref;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unref:
|
||||
drm_dev_put(drm);
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvdla_drm_remove(struct nvdla_device *nvdla_dev)
|
||||
{
|
||||
drm_dev_unregister(nvdla_dev->drm);
|
||||
drm_dev_put(nvdla_dev->drm);
|
||||
}
|
528
drivers/nvdla/pdp.c
Normal file
528
drivers/nvdla/pdp.c
Normal file
|
@ -0,0 +1,528 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "dla_engine_internal.h"
|
||||
#include "engine_debug.h"
|
||||
|
||||
#define MAX_SPLIT_NUM 64
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a[0])))
|
||||
#endif
|
||||
|
||||
static const uint8_t map_ram[] = {
|
||||
FIELD_ENUM(PDP_RDMA_D_SRC_RAM_CFG_0, SRC_RAM_TYPE, MC),
|
||||
FIELD_ENUM(PDP_RDMA_D_SRC_RAM_CFG_0, SRC_RAM_TYPE, CV),
|
||||
};
|
||||
|
||||
static const uint8_t map_pool[] = {
|
||||
FIELD_ENUM(PDP_D_OPERATION_MODE_CFG_0,
|
||||
POOLING_METHOD, POOLING_METHOD_AVERAGE),
|
||||
FIELD_ENUM(PDP_D_OPERATION_MODE_CFG_0,
|
||||
POOLING_METHOD, POOLING_METHOD_MAX),
|
||||
FIELD_ENUM(PDP_D_OPERATION_MODE_CFG_0,
|
||||
POOLING_METHOD, POOLING_METHOD_MIN),
|
||||
};
|
||||
|
||||
static const uint8_t map_precision[] = {
|
||||
FIELD_ENUM(PDP_D_DATA_FORMAT_0, INPUT_DATA, INT8),
|
||||
FIELD_ENUM(PDP_D_DATA_FORMAT_0, INPUT_DATA, INT16),
|
||||
FIELD_ENUM(PDP_D_DATA_FORMAT_0, INPUT_DATA, FP16),
|
||||
};
|
||||
|
||||
static const uint8_t map_pool_kernel[] = {
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_1),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_2),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_3),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_4),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_5),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_6),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_7),
|
||||
FIELD_ENUM(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH, KERNEL_WIDTH_8),
|
||||
};
|
||||
|
||||
/* The reciprocal of kernel width: 1/1, 1/2, 1/3, ... */
|
||||
static const uint32_t recip_kernel_size[2][8] = {
|
||||
/*
|
||||
* INT8/16
|
||||
* 1 1/2 1/3 1/4 1/5 1/6 1/7 1/8
|
||||
*/
|
||||
{0x10000, 0x8000, 0x5555, 0x4000, 0x3333, 0x2aaa, 0x2492, 0x2000},
|
||||
{0x7c00, 0x7800, 0x7555, 0x7400, 0x7266, 0x7155, 0x7092, 0x7000},
|
||||
};
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_pdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
uint64_t end_time = 0;
|
||||
struct dla_pdp_stat_desc *pdp_stat;
|
||||
|
||||
pdp_stat = &processor->stat_data_desc->pdp_stat;
|
||||
|
||||
end_time = dla_get_time_us();
|
||||
|
||||
pdp_stat->write_stall = pdp_reg_read(D_PERF_WRITE_STALL);
|
||||
pdp_stat->runtime = (uint32_t)(end_time - group->start_time);
|
||||
}
|
||||
|
||||
void
|
||||
dla_pdp_dump_stat(struct dla_processor *processor)
|
||||
{
|
||||
struct dla_pdp_stat_desc *pdp_stat;
|
||||
|
||||
pdp_stat = &processor->stat_data_desc->pdp_stat;
|
||||
|
||||
dla_debug_pdp_stats(pdp_stat);
|
||||
}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
static uint32_t
|
||||
get_fly_mode(uint8_t type)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
val = type == DLA_MEM_HW ?
|
||||
FIELD_ENUM(PDP_D_OPERATION_MODE_CFG_0,
|
||||
FLYING_MODE, ON_FLYING) :
|
||||
FIELD_ENUM(PDP_D_OPERATION_MODE_CFG_0,
|
||||
FLYING_MODE, OFF_FLYING);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
void
|
||||
dla_pdp_set_producer(int32_t group_id, int32_t rdma_group_id)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
dla_debug("group id %d rdma id %d\n", group_id, rdma_group_id);
|
||||
|
||||
reg = group_id << SHIFT(PDP_S_POINTER_0, PRODUCER);
|
||||
pdp_reg_write(S_POINTER, reg);
|
||||
|
||||
reg = rdma_group_id << SHIFT(PDP_RDMA_S_POINTER_0, PRODUCER);
|
||||
pdp_rdma_reg_write(S_POINTER, reg);
|
||||
|
||||
dla_trace("Exit: %s", __func__);
|
||||
}
|
||||
|
||||
int
|
||||
dla_pdp_enable(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint32_t reg;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
if (!group) {
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (engine->stat_enable == (uint32_t)1) {
|
||||
reg = FIELD_ENUM(PDP_D_PERF_ENABLE_0, DMA_EN, ENABLE);
|
||||
pdp_reg_write(D_PERF_ENABLE, reg);
|
||||
group->start_time = dla_get_time_us();
|
||||
}
|
||||
|
||||
dla_debug("rdma needed %u\n", group->is_rdma_needed);
|
||||
|
||||
/**
|
||||
* enable all sub-modules
|
||||
*/
|
||||
if (group->is_rdma_needed) {
|
||||
reg = FIELD_ENUM(PDP_RDMA_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
pdp_rdma_reg_write(D_OP_ENABLE, reg);
|
||||
}
|
||||
reg = FIELD_ENUM(PDP_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
pdp_reg_write(D_OP_ENABLE, reg);
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
void
|
||||
dla_pdp_rdma_check(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_pdp_surface_desc *pdp_surface;
|
||||
|
||||
pdp_surface = &group->surface_desc->pdp_surface;
|
||||
|
||||
group->is_rdma_needed = 0;
|
||||
|
||||
if (pdp_surface->src_data.type != DLA_MEM_HW)
|
||||
group->is_rdma_needed = 1;
|
||||
}
|
||||
|
||||
static int
|
||||
validate_strides(uint8_t stride_x, uint8_t stride_y)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
|
||||
if (stride_x < 1 || stride_y < 1 || stride_x > 8 || stride_y > 8) {
|
||||
dla_error("Invalid Stride (x[%d], y[%d])\n", stride_x, stride_y);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
}
|
||||
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
static int
|
||||
vaildate_pdp_configs(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
struct dla_pdp_op_desc *pdp_op;
|
||||
struct dla_pdp_surface_desc *pdp_surface;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
pdp_op = &group->operation_desc->pdp_op;
|
||||
pdp_surface = &group->surface_desc->pdp_surface;
|
||||
|
||||
if (pdp_surface->dst_data.type == DLA_MEM_HW) {
|
||||
dla_error("Destination buffer for PDP has to be either MC or CV");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
ret = validate_data_cube(pdp_surface->src_data, pdp_surface->dst_data,
|
||||
DLA_MEM_HW);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
ret = validate_precision(pdp_op->precision, ARRAY_SIZE(map_precision));
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
ret = validate_strides(pdp_op->stride_x, pdp_op->stride_y);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
if (pdp_op->split_num > MAX_SPLIT_NUM) {
|
||||
dla_error("Invalid split_num: %u\n", pdp_op->split_num);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (pdp_op->pool_width >= ARRAY_SIZE(map_pool_kernel)) {
|
||||
dla_error("Invalid pool_width: %u\n", pdp_op->pool_width);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (pdp_op->pool_height >= ARRAY_SIZE(map_pool_kernel)) {
|
||||
dla_error("Invalid pool_height: %u\n", pdp_op->pool_height);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (pdp_op->pool_mode >= ARRAY_SIZE(map_pool)) {
|
||||
dla_error("Invalid pool_mode: %u\n", pdp_op->pool_mode);
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
static int
|
||||
processor_pdp_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint32_t reg, high, low;
|
||||
uint64_t input_address = 0;
|
||||
uint64_t output_address = 0;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
struct dla_pdp_op_desc *pdp_op;
|
||||
struct dla_pdp_surface_desc *pdp_surface;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
pdp_op = &group->operation_desc->pdp_op;
|
||||
pdp_surface = &group->surface_desc->pdp_surface;
|
||||
|
||||
ret = vaildate_pdp_configs(group);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
ret = dla_read_input_address(&pdp_surface->src_data,
|
||||
&input_address,
|
||||
group->op_desc->index,
|
||||
group->roi_index,
|
||||
1);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
if (pdp_surface->dst_data.address != -1)
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
pdp_surface->dst_data.address,
|
||||
pdp_surface->dst_data.offset,
|
||||
(void *)&output_address,
|
||||
DESTINATION_DMA);
|
||||
|
||||
if (pdp_surface->src_data.type != DLA_MEM_HW) {
|
||||
/* PDP RDMA */
|
||||
pdp_rdma_reg_write(D_DATA_CUBE_IN_WIDTH,
|
||||
pdp_surface->src_data.width - 1);
|
||||
pdp_rdma_reg_write(D_DATA_CUBE_IN_HEIGHT,
|
||||
pdp_surface->src_data.height - 1);
|
||||
pdp_rdma_reg_write(D_DATA_CUBE_IN_CHANNEL,
|
||||
pdp_surface->src_data.channel - 1);
|
||||
|
||||
high = HIGH32BITS(input_address);
|
||||
low = LOW32BITS(input_address);
|
||||
pdp_rdma_reg_write(D_SRC_BASE_ADDR_HIGH, high);
|
||||
pdp_rdma_reg_write(D_SRC_BASE_ADDR_LOW, low);
|
||||
pdp_rdma_reg_write(D_SRC_LINE_STRIDE,
|
||||
pdp_surface->src_data.line_stride);
|
||||
pdp_rdma_reg_write(D_SRC_SURFACE_STRIDE,
|
||||
pdp_surface->src_data.surf_stride);
|
||||
|
||||
reg = (map_precision[pdp_op->precision]
|
||||
<< SHIFT(PDP_RDMA_D_DATA_FORMAT_0, INPUT_DATA));
|
||||
pdp_rdma_reg_write(D_DATA_FORMAT, reg);
|
||||
|
||||
reg = map_ram[pdp_surface->src_data.type]
|
||||
<< SHIFT(PDP_RDMA_D_SRC_RAM_CFG_0, SRC_RAM_TYPE);
|
||||
pdp_rdma_reg_write(D_SRC_RAM_CFG, reg);
|
||||
|
||||
reg = ((pdp_op->split_num - 1)
|
||||
<< SHIFT(PDP_RDMA_D_OPERATION_MODE_CFG_0, SPLIT_NUM));
|
||||
pdp_rdma_reg_write(D_OPERATION_MODE_CFG, reg);
|
||||
|
||||
reg = (map_pool_kernel[pdp_op->pool_width]
|
||||
<< SHIFT(PDP_RDMA_D_POOLING_KERNEL_CFG_0,
|
||||
KERNEL_WIDTH)) |
|
||||
((pdp_op->stride_x - 1)
|
||||
<< SHIFT(PDP_RDMA_D_POOLING_KERNEL_CFG_0,
|
||||
KERNEL_STRIDE_WIDTH));
|
||||
pdp_rdma_reg_write(D_POOLING_KERNEL_CFG, reg);
|
||||
|
||||
reg = (pdp_op->pad_left
|
||||
<< SHIFT(PDP_RDMA_D_POOLING_PADDING_CFG_0, PAD_WIDTH));
|
||||
pdp_rdma_reg_write(D_POOLING_PADDING_CFG, reg);
|
||||
|
||||
reg = ((pdp_op->partial_in_width_first == 0 ? 0 :
|
||||
pdp_op->partial_in_width_first - 1)
|
||||
<< SHIFT(PDP_RDMA_D_PARTIAL_WIDTH_IN_0,
|
||||
PARTIAL_WIDTH_IN_FIRST)) |
|
||||
((pdp_op->partial_in_width_mid == 0 ? 0 :
|
||||
pdp_op->partial_in_width_mid - 1)
|
||||
<< SHIFT(PDP_RDMA_D_PARTIAL_WIDTH_IN_0,
|
||||
PARTIAL_WIDTH_IN_MID)) |
|
||||
((pdp_op->partial_in_width_last == 0 ? 0 :
|
||||
pdp_op->partial_in_width_last - 1)
|
||||
<< SHIFT(PDP_RDMA_D_PARTIAL_WIDTH_IN_0,
|
||||
PARTIAL_WIDTH_IN_LAST));
|
||||
pdp_rdma_reg_write(D_PARTIAL_WIDTH_IN, reg);
|
||||
} else {
|
||||
ASSERT_GOTO(pdp_op->split_num == 1, ret,
|
||||
ERR(INVALID_INPUT), exit);
|
||||
}
|
||||
|
||||
reg = ((pdp_surface->src_data.width - 1)
|
||||
<< SHIFT(PDP_D_DATA_CUBE_IN_WIDTH_0, CUBE_IN_WIDTH));
|
||||
pdp_reg_write(D_DATA_CUBE_IN_WIDTH, reg);
|
||||
|
||||
reg = ((pdp_surface->src_data.height - 1)
|
||||
<< SHIFT(PDP_D_DATA_CUBE_IN_HEIGHT_0, CUBE_IN_HEIGHT));
|
||||
pdp_reg_write(D_DATA_CUBE_IN_HEIGHT, reg);
|
||||
|
||||
reg = ((pdp_surface->src_data.channel - 1)
|
||||
<< SHIFT(PDP_D_DATA_CUBE_IN_CHANNEL_0, CUBE_IN_CHANNEL));
|
||||
pdp_reg_write(D_DATA_CUBE_IN_CHANNEL, reg);
|
||||
|
||||
reg = ((pdp_surface->dst_data.width - 1)
|
||||
<< SHIFT(PDP_D_DATA_CUBE_OUT_WIDTH_0, CUBE_OUT_WIDTH));
|
||||
pdp_reg_write(D_DATA_CUBE_OUT_WIDTH, reg);
|
||||
|
||||
reg = ((pdp_surface->dst_data.height - 1)
|
||||
<< SHIFT(PDP_D_DATA_CUBE_OUT_HEIGHT_0, CUBE_OUT_HEIGHT));
|
||||
pdp_reg_write(D_DATA_CUBE_OUT_HEIGHT, reg);
|
||||
|
||||
reg = ((pdp_surface->dst_data.channel - 1)
|
||||
<< SHIFT(PDP_D_DATA_CUBE_OUT_CHANNEL_0, CUBE_OUT_CHANNEL));
|
||||
pdp_reg_write(D_DATA_CUBE_OUT_CHANNEL, reg);
|
||||
|
||||
reg = (map_pool[pdp_op->pool_mode]
|
||||
<< SHIFT(PDP_D_OPERATION_MODE_CFG_0, POOLING_METHOD)) |
|
||||
(get_fly_mode(pdp_surface->src_data.type)
|
||||
<< SHIFT(PDP_D_OPERATION_MODE_CFG_0, FLYING_MODE)) |
|
||||
((pdp_op->split_num - 1)
|
||||
<< SHIFT(PDP_D_OPERATION_MODE_CFG_0, SPLIT_NUM));
|
||||
pdp_reg_write(D_OPERATION_MODE_CFG, reg);
|
||||
|
||||
reg = ((pdp_op->partial_in_width_first == 0 ? 0 :
|
||||
pdp_op->partial_in_width_first-1)
|
||||
<< SHIFT(PDP_D_PARTIAL_WIDTH_IN_0, PARTIAL_WIDTH_IN_FIRST)) |
|
||||
((pdp_op->partial_in_width_mid == 0 ? 0 :
|
||||
pdp_op->partial_in_width_mid-1)
|
||||
<< SHIFT(PDP_D_PARTIAL_WIDTH_IN_0, PARTIAL_WIDTH_IN_MID)) |
|
||||
((pdp_op->partial_in_width_last == 0 ? 0 :
|
||||
pdp_op->partial_in_width_last-1)
|
||||
<< SHIFT(PDP_D_PARTIAL_WIDTH_IN_0, PARTIAL_WIDTH_IN_LAST));
|
||||
pdp_reg_write(D_PARTIAL_WIDTH_IN, reg);
|
||||
|
||||
reg = ((pdp_op->partial_width_first == 0 ? 0 :
|
||||
pdp_op->partial_width_first-1)
|
||||
<< SHIFT(PDP_D_PARTIAL_WIDTH_OUT_0, PARTIAL_WIDTH_OUT_FIRST)) |
|
||||
((pdp_op->partial_width_mid == 0 ? 0 :
|
||||
pdp_op->partial_width_mid-1)
|
||||
<< SHIFT(PDP_D_PARTIAL_WIDTH_OUT_0, PARTIAL_WIDTH_OUT_MID)) |
|
||||
((pdp_op->partial_width_last == 0 ? 0 :
|
||||
pdp_op->partial_width_last-1)
|
||||
<< SHIFT(PDP_D_PARTIAL_WIDTH_OUT_0, PARTIAL_WIDTH_OUT_LAST));
|
||||
pdp_reg_write(D_PARTIAL_WIDTH_OUT, reg);
|
||||
|
||||
reg = (map_pool_kernel[pdp_op->pool_width]
|
||||
<< SHIFT(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_WIDTH)) |
|
||||
(map_pool_kernel[pdp_op->pool_height]
|
||||
<< SHIFT(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_HEIGHT))|
|
||||
((pdp_op->stride_x - 1)
|
||||
<< SHIFT(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_STRIDE_WIDTH)) |
|
||||
((pdp_op->stride_y - 1)
|
||||
<< SHIFT(PDP_D_POOLING_KERNEL_CFG_0, KERNEL_STRIDE_HEIGHT));
|
||||
pdp_reg_write(D_POOLING_KERNEL_CFG, reg);
|
||||
|
||||
pdp_reg_write(D_RECIP_KERNEL_WIDTH,
|
||||
recip_kernel_size[pdp_op->precision ==
|
||||
PRECISION_FP16][pdp_op->pool_width]);
|
||||
pdp_reg_write(D_RECIP_KERNEL_HEIGHT,
|
||||
recip_kernel_size[pdp_op->precision ==
|
||||
PRECISION_FP16][pdp_op->pool_height]);
|
||||
|
||||
reg = (pdp_op->pad_left
|
||||
<< SHIFT(PDP_D_POOLING_PADDING_CFG_0, PAD_LEFT)) |
|
||||
(pdp_op->pad_right
|
||||
<< SHIFT(PDP_D_POOLING_PADDING_CFG_0, PAD_RIGHT)) |
|
||||
(pdp_op->pad_top
|
||||
<< SHIFT(PDP_D_POOLING_PADDING_CFG_0, PAD_TOP)) |
|
||||
(pdp_op->pad_bottom
|
||||
<< SHIFT(PDP_D_POOLING_PADDING_CFG_0, PAD_BOTTOM));
|
||||
if (pdp_op->precision == PRECISION_FP16) {
|
||||
int32_t i;
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
ASSERT_GOTO(pdp_op->padding_value[i] == 0, ret,
|
||||
ERR(INVALID_INPUT), exit);
|
||||
}
|
||||
|
||||
pdp_reg_write(D_POOLING_PADDING_CFG, reg);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_1_CFG, pdp_op->padding_value[0]);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_2_CFG, pdp_op->padding_value[1]);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_3_CFG, pdp_op->padding_value[2]);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_4_CFG, pdp_op->padding_value[3]);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_5_CFG, pdp_op->padding_value[4]);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_6_CFG, pdp_op->padding_value[5]);
|
||||
pdp_reg_write(D_POOLING_PADDING_VALUE_7_CFG, pdp_op->padding_value[6]);
|
||||
|
||||
if (pdp_surface->src_data.type != DLA_MEM_HW) {
|
||||
pdp_reg_write(D_SRC_LINE_STRIDE,
|
||||
pdp_surface->src_data.line_stride);
|
||||
pdp_reg_write(D_SRC_SURFACE_STRIDE,
|
||||
pdp_surface->src_data.surf_stride);
|
||||
}
|
||||
|
||||
high = HIGH32BITS(output_address);
|
||||
low = LOW32BITS(output_address);
|
||||
pdp_reg_write(D_DST_BASE_ADDR_LOW, low);
|
||||
pdp_reg_write(D_DST_BASE_ADDR_HIGH, high);
|
||||
|
||||
pdp_reg_write(D_DST_LINE_STRIDE, pdp_surface->dst_data.line_stride);
|
||||
pdp_reg_write(D_DST_SURFACE_STRIDE, pdp_surface->dst_data.surf_stride);
|
||||
|
||||
reg = (map_ram[pdp_surface->dst_data.type]
|
||||
<< SHIFT(PDP_D_DST_RAM_CFG_0, DST_RAM_TYPE));
|
||||
pdp_reg_write(D_DST_RAM_CFG, reg);
|
||||
|
||||
reg = (map_precision[pdp_op->precision]
|
||||
<< SHIFT(PDP_D_DATA_FORMAT_0, INPUT_DATA));
|
||||
pdp_reg_write(D_DATA_FORMAT, reg);
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
dla_pdp_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
dla_pdp_dump_config(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_pdp_op_desc *pdp_op;
|
||||
struct dla_pdp_surface_desc *pdp_surface;
|
||||
|
||||
pdp_surface = &group->surface_desc->pdp_surface;
|
||||
pdp_op = &group->operation_desc->pdp_op;
|
||||
|
||||
dla_debug_pdp_surface_desc(pdp_surface, group->roi_index);
|
||||
dla_debug_pdp_op_desc(pdp_op, group->roi_index);
|
||||
}
|
||||
|
||||
int
|
||||
dla_pdp_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
if (!group) {
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, PDP_DONE_MASK1) |
|
||||
MASK(GLB_S_INTR_MASK_0, PDP_DONE_MASK0));
|
||||
|
||||
ret = processor_pdp_program(group);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
292
drivers/nvdla/rubik.c
Normal file
292
drivers/nvdla/rubik.c
Normal file
|
@ -0,0 +1,292 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_err.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "dla_engine_internal.h"
|
||||
#include "engine_debug.h"
|
||||
|
||||
static uint8_t map_rubik_mode[] = {
|
||||
FIELD_ENUM(RBK_D_MISC_CFG_0, RUBIK_MODE, CONTRACT),
|
||||
FIELD_ENUM(RBK_D_MISC_CFG_0, RUBIK_MODE, SPLIT),
|
||||
FIELD_ENUM(RBK_D_MISC_CFG_0, RUBIK_MODE, MERGE),
|
||||
};
|
||||
|
||||
static uint8_t map_ram_type[] = {
|
||||
FIELD_ENUM(RBK_D_DAIN_RAM_TYPE_0, DATAIN_RAM_TYPE, MCIF),
|
||||
FIELD_ENUM(RBK_D_DAIN_RAM_TYPE_0, DATAIN_RAM_TYPE, CVIF),
|
||||
};
|
||||
|
||||
static uint8_t map_precision[] = {
|
||||
FIELD_ENUM(RBK_D_MISC_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(RBK_D_MISC_CFG_0, IN_PRECISION, INT16),
|
||||
FIELD_ENUM(RBK_D_MISC_CFG_0, IN_PRECISION, FP16),
|
||||
};
|
||||
|
||||
static uint8_t map_bpe[] = {
|
||||
BPE_PRECISION_INT8,
|
||||
BPE_PRECISION_INT16,
|
||||
BPE_PRECISION_FP16,
|
||||
};
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_rubik_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
uint64_t end_time = 0;
|
||||
struct dla_rubik_stat_desc *rubik_stat;
|
||||
|
||||
rubik_stat = &processor->stat_data_desc->rubik_stat;
|
||||
|
||||
end_time = dla_get_time_us();
|
||||
|
||||
rubik_stat->read_stall = rubik_reg_read(D_PERF_READ_STALL);
|
||||
rubik_stat->write_stall = rubik_reg_read(D_PERF_WRITE_STALL);
|
||||
rubik_stat->runtime = (uint32_t)(end_time - group->start_time);
|
||||
}
|
||||
|
||||
void
|
||||
dla_rubik_dump_stat(struct dla_processor *processor)
|
||||
{
|
||||
struct dla_rubik_stat_desc *rubik_stat;
|
||||
|
||||
rubik_stat = &processor->stat_data_desc->rubik_stat;
|
||||
|
||||
dla_debug_rubik_stats(rubik_stat);
|
||||
}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
void
|
||||
dla_rubik_set_producer(int32_t group_id, int32_t __unused)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
||||
/**
|
||||
* set producer pointer for all sub-modules
|
||||
*/
|
||||
reg = group_id << SHIFT(RBK_S_POINTER_0, PRODUCER);
|
||||
rubik_reg_write(S_POINTER, reg);
|
||||
}
|
||||
|
||||
int
|
||||
dla_rubik_enable(struct dla_processor_group *group)
|
||||
{
|
||||
uint32_t reg;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
if (engine->stat_enable == (uint32_t)1) {
|
||||
rubik_reg_write(D_PERF_ENABLE, 1);
|
||||
group->start_time = dla_get_time_us();
|
||||
}
|
||||
|
||||
/**
|
||||
* enable all sub-modules
|
||||
*/
|
||||
reg = FIELD_ENUM(RBK_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
rubik_reg_write(D_OP_ENABLE, reg);
|
||||
|
||||
dla_trace("Exit: %s", __func__);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
void
|
||||
dla_rubik_rdma_check(struct dla_processor_group *group)
|
||||
{
|
||||
group->is_rdma_needed = 0;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
processor_rubik_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint32_t reg, high, low;
|
||||
uint64_t input_address = 0;
|
||||
uint64_t output_address = 0;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
struct dla_rubik_op_desc *rubik_op;
|
||||
struct dla_rubik_surface_desc *rubik_surface;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
rubik_op = &group->operation_desc->rubik_op;
|
||||
rubik_surface = &group->surface_desc->rubik_surface;
|
||||
|
||||
/* Argument check */
|
||||
ASSERT_GOTO((rubik_surface->src_data.type != DLA_MEM_HW),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO((rubik_surface->dst_data.type != DLA_MEM_HW),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
|
||||
/* get the addresses from task descriptor */
|
||||
ret = dla_read_input_address(&rubik_surface->src_data,
|
||||
&input_address,
|
||||
group->op_desc->index,
|
||||
group->roi_index,
|
||||
1);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
rubik_surface->dst_data.address,
|
||||
rubik_surface->dst_data.offset,
|
||||
(void *)&output_address,
|
||||
DESTINATION_DMA);
|
||||
|
||||
/* config rubik */
|
||||
reg = (((uint32_t)map_rubik_mode[rubik_op->mode]) <<
|
||||
SHIFT(RBK_D_MISC_CFG_0, RUBIK_MODE)) |
|
||||
(((uint32_t)map_precision[rubik_op->precision]) <<
|
||||
SHIFT(RBK_D_MISC_CFG_0, IN_PRECISION));
|
||||
rubik_reg_write(D_MISC_CFG, reg);
|
||||
reg = (((uint32_t)map_ram_type[rubik_surface->src_data.type]) <<
|
||||
SHIFT(RBK_D_DAIN_RAM_TYPE_0, DATAIN_RAM_TYPE));
|
||||
rubik_reg_write(D_DAIN_RAM_TYPE, reg);
|
||||
reg = ((rubik_surface->src_data.width-1) <<
|
||||
SHIFT(RBK_D_DATAIN_SIZE_0_0, DATAIN_WIDTH)) |
|
||||
((rubik_surface->src_data.height-1) <<
|
||||
SHIFT(RBK_D_DATAIN_SIZE_0_0, DATAIN_HEIGHT));
|
||||
rubik_reg_write(D_DATAIN_SIZE_0, reg);
|
||||
reg = ((rubik_surface->src_data.channel-1) <<
|
||||
SHIFT(RBK_D_DATAIN_SIZE_1_0, DATAIN_CHANNEL));
|
||||
rubik_reg_write(D_DATAIN_SIZE_1, reg);
|
||||
|
||||
high = HIGH32BITS(input_address);
|
||||
low = LOW32BITS(input_address);
|
||||
rubik_reg_write(D_DAIN_ADDR_LOW, low);
|
||||
rubik_reg_write(D_DAIN_ADDR_HIGH, high);
|
||||
if (rubik_op->mode == RUBIK_MODE_MERGE) {
|
||||
ASSERT_GOTO((rubik_surface->src_data.plane_stride != 0),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
ASSERT_GOTO(((rubik_surface->src_data.plane_stride&0x1F) == 0),
|
||||
ret, ERR(INVALID_INPUT), exit);
|
||||
rubik_reg_write(D_DAIN_PLANAR_STRIDE,
|
||||
rubik_surface->src_data.plane_stride);
|
||||
} else {
|
||||
rubik_reg_write(D_DAIN_SURF_STRIDE,
|
||||
rubik_surface->src_data.surf_stride);
|
||||
}
|
||||
rubik_reg_write(D_DAIN_LINE_STRIDE,
|
||||
rubik_surface->src_data.line_stride);
|
||||
|
||||
reg = (((uint32_t)map_ram_type[rubik_surface->dst_data.type]) <<
|
||||
SHIFT(RBK_D_DAOUT_RAM_TYPE_0, DATAOUT_RAM_TYPE));
|
||||
rubik_reg_write(D_DAOUT_RAM_TYPE, reg);
|
||||
reg = ((rubik_surface->dst_data.channel-1) <<
|
||||
SHIFT(RBK_D_DATAOUT_SIZE_1_0, DATAOUT_CHANNEL));
|
||||
rubik_reg_write(D_DATAOUT_SIZE_1, reg);
|
||||
|
||||
high = HIGH32BITS(output_address);
|
||||
low = LOW32BITS(output_address);
|
||||
rubik_reg_write(D_DAOUT_ADDR_LOW, low);
|
||||
rubik_reg_write(D_DAOUT_ADDR_HIGH, high);
|
||||
|
||||
rubik_reg_write(D_DAOUT_LINE_STRIDE,
|
||||
rubik_surface->dst_data.line_stride);
|
||||
if (rubik_op->mode != RUBIK_MODE_SPLIT) {
|
||||
rubik_reg_write(D_DAOUT_SURF_STRIDE,
|
||||
rubik_surface->dst_data.surf_stride);
|
||||
if (rubik_op->mode == RUBIK_MODE_CONTRACT) {
|
||||
reg = ((rubik_surface->dst_data.channel *
|
||||
map_bpe[rubik_op->precision] + 31) >> 5) *
|
||||
rubik_surface->src_data.surf_stride;
|
||||
rubik_reg_write(D_CONTRACT_STRIDE_0, reg);
|
||||
|
||||
reg = rubik_op->stride_y *
|
||||
rubik_surface->dst_data.line_stride;
|
||||
rubik_reg_write(D_CONTRACT_STRIDE_1, reg);
|
||||
|
||||
reg = (((uint32_t)(rubik_op->stride_x-1)) <<
|
||||
SHIFT(RBK_D_DECONV_STRIDE_0, DECONV_X_STRIDE)) |
|
||||
(((uint32_t)(rubik_op->stride_y-1)) <<
|
||||
SHIFT(RBK_D_DECONV_STRIDE_0, DECONV_Y_STRIDE));
|
||||
rubik_reg_write(D_DECONV_STRIDE, reg);
|
||||
}
|
||||
} else {
|
||||
rubik_reg_write(D_DAOUT_PLANAR_STRIDE,
|
||||
rubik_surface->dst_data.plane_stride);
|
||||
}
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
dla_rubik_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
dla_rubik_dump_config(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_rubik_op_desc *rubik_op;
|
||||
struct dla_rubik_surface_desc *rubik_surface;
|
||||
|
||||
rubik_surface = &group->surface_desc->rubik_surface;
|
||||
rubik_op = &group->operation_desc->rubik_op;
|
||||
|
||||
dla_debug_rubik_surface_desc(rubik_surface, group->roi_index);
|
||||
dla_debug_rubik_op_desc(rubik_op, group->roi_index);
|
||||
}
|
||||
|
||||
int
|
||||
dla_rubik_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
if (!engine->config_data->rubik_enable) {
|
||||
dla_error("RUBIK is not supported for this configuration\n");
|
||||
ret = ERR(INVALID_INPUT);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, RUBIK_DONE_MASK1) |
|
||||
MASK(GLB_S_INTR_MASK_0, RUBIK_DONE_MASK0));
|
||||
|
||||
ret = processor_rubik_program(group);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
1160
drivers/nvdla/scheduler.c
Normal file
1160
drivers/nvdla/scheduler.c
Normal file
File diff suppressed because it is too large
Load diff
817
drivers/nvdla/sdp.c
Normal file
817
drivers/nvdla/sdp.c
Normal file
|
@ -0,0 +1,817 @@
|
|||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <opendla.h>
|
||||
#include <dla_debug.h>
|
||||
#include <dla_interface.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "dla_engine_internal.h"
|
||||
#include "engine_debug.h"
|
||||
|
||||
static const uint8_t map_ena[] = {
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DISABLE, YES),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DISABLE, NO),
|
||||
};
|
||||
|
||||
static const uint8_t map_prelu[] = {
|
||||
FIELD_ENUM(SDP_D_DP_BS_CFG_0, BS_MUL_PRELU, NO),
|
||||
FIELD_ENUM(SDP_D_DP_BS_CFG_0, BS_MUL_PRELU, YES),
|
||||
};
|
||||
|
||||
static const uint8_t map_bypass[] = {
|
||||
FIELD_ENUM(SDP_D_DP_BS_CFG_0, BS_BYPASS, YES),
|
||||
FIELD_ENUM(SDP_D_DP_BS_CFG_0, BS_BYPASS, NO),
|
||||
};
|
||||
|
||||
static const uint8_t map_alu_op[] = {
|
||||
FIELD_ENUM(SDP_D_DP_EW_CFG_0, EW_ALU_ALGO, MAX),
|
||||
FIELD_ENUM(SDP_D_DP_EW_CFG_0, EW_ALU_ALGO, MIN),
|
||||
FIELD_ENUM(SDP_D_DP_EW_CFG_0, EW_ALU_ALGO, SUM),
|
||||
FIELD_ENUM(SDP_D_DP_EW_CFG_0, EW_ALU_ALGO, EQL),
|
||||
};
|
||||
|
||||
static const uint8_t map_alu_src[] = {
|
||||
FIELD_ENUM(SDP_D_DP_BS_ALU_CFG_0, BS_ALU_SRC, MEM),
|
||||
FIELD_ENUM(SDP_D_DP_BS_ALU_CFG_0, BS_ALU_SRC, REG),
|
||||
};
|
||||
|
||||
static const uint8_t map_fly[] = {
|
||||
FIELD_ENUM(SDP_D_FEATURE_MODE_CFG_0, FLYING_MODE, OFF),
|
||||
FIELD_ENUM(SDP_D_FEATURE_MODE_CFG_0, FLYING_MODE, ON),
|
||||
};
|
||||
|
||||
static const uint8_t map_dst[] = {
|
||||
FIELD_ENUM(SDP_D_FEATURE_MODE_CFG_0, OUTPUT_DST, MEM),
|
||||
FIELD_ENUM(SDP_D_FEATURE_MODE_CFG_0, OUTPUT_DST, PDP),
|
||||
};
|
||||
|
||||
|
||||
static const uint8_t map_wg[] = {
|
||||
FIELD_ENUM(SDP_D_FEATURE_MODE_CFG_0, WINOGRAD, OFF),
|
||||
FIELD_ENUM(SDP_D_FEATURE_MODE_CFG_0, WINOGRAD, ON),
|
||||
};
|
||||
|
||||
static const uint8_t map_precision[] = {
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT16),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, FP16),
|
||||
};
|
||||
|
||||
static const uint32_t map_proc_precision[3][3] = {
|
||||
{
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, FP16),
|
||||
},
|
||||
{
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT16),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, FP16),
|
||||
},
|
||||
{
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT8),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, INT16),
|
||||
FIELD_ENUM(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION, FP16),
|
||||
},
|
||||
};
|
||||
|
||||
static const uint8_t map_op_type[] = {
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_USE, MUL),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_USE, MUL),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_USE, ALU),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_USE, BOTH),
|
||||
};
|
||||
|
||||
static const uint8_t map_element_size[] = {
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_SIZE, ONE_BYTE),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_SIZE, TWO_BYTE),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_SIZE, TWO_BYTE),
|
||||
};
|
||||
|
||||
static const uint8_t map_op_mode[] = {
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_MODE, PER_ELEMENT),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_MODE, PER_KERNEL),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DATA_MODE, PER_ELEMENT),
|
||||
};
|
||||
|
||||
static const uint8_t map_ram_type[] = {
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_RAM_TYPE, MC),
|
||||
FIELD_ENUM(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_RAM_TYPE, CV),
|
||||
};
|
||||
|
||||
static const uint8_t map_perf_dma[] = {
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_DMA_EN, NO),
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_DMA_EN, YES),
|
||||
};
|
||||
|
||||
static const uint8_t map_perf_lut[] = {
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_LUT_EN, NO),
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_LUT_EN, YES),
|
||||
};
|
||||
|
||||
static const uint8_t map_perf_sat[] = {
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_SAT_EN, NO),
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_SAT_EN, YES),
|
||||
};
|
||||
|
||||
static const uint8_t map_perf_nan_inf[] = {
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_NAN_INF_COUNT_EN, NO),
|
||||
FIELD_ENUM(SDP_D_PERF_ENABLE_0, PERF_NAN_INF_COUNT_EN, YES),
|
||||
};
|
||||
|
||||
#if STAT_ENABLE
|
||||
void
|
||||
dla_sdp_stat_data(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
uint64_t end_time = 0;
|
||||
struct dla_sdp_stat_desc *sdp_stat;
|
||||
|
||||
sdp_stat = &processor->stat_data_desc->sdp_stat;
|
||||
|
||||
end_time = dla_get_time_us();
|
||||
|
||||
sdp_stat->nan_input_num = sdp_reg_read(D_STATUS_NAN_INPUT_NUM);
|
||||
sdp_stat->inf_input_num = sdp_reg_read(D_STATUS_INF_INPUT_NUM);
|
||||
sdp_stat->nan_output_num = sdp_reg_read(D_STATUS_NAN_OUTPUT_NUM);
|
||||
sdp_stat->wdma_write_stall = sdp_reg_read(D_PERF_WDMA_WRITE_STALL);
|
||||
sdp_stat->runtime = (uint32_t)(end_time - group->start_time);
|
||||
}
|
||||
|
||||
void
|
||||
dla_sdp_dump_stat(struct dla_processor *processor)
|
||||
{
|
||||
struct dla_sdp_stat_desc *sdp_stat;
|
||||
|
||||
sdp_stat = &processor->stat_data_desc->sdp_stat;
|
||||
|
||||
dla_debug_sdp_stats(sdp_stat);
|
||||
}
|
||||
#endif /* STAT_ENABLE */
|
||||
|
||||
void
|
||||
dla_sdp_set_producer(int32_t group_id, int32_t rdma_group_id)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
||||
/**
|
||||
* set producer pointer for all sub-modules
|
||||
*/
|
||||
reg = group_id << SHIFT(SDP_S_POINTER_0, PRODUCER);
|
||||
sdp_reg_write(S_POINTER, reg);
|
||||
reg = rdma_group_id << SHIFT(SDP_RDMA_S_POINTER_0, PRODUCER);
|
||||
sdp_rdma_reg_write(S_POINTER, reg);
|
||||
}
|
||||
|
||||
int
|
||||
dla_sdp_enable(struct dla_processor_group *group)
|
||||
{
|
||||
uint32_t reg;
|
||||
uint8_t perf_reg;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
|
||||
if (engine->stat_enable == (uint32_t)1) {
|
||||
perf_reg = (map_perf_dma[1] <<
|
||||
SHIFT(SDP_D_PERF_ENABLE_0, PERF_DMA_EN)) |
|
||||
(map_perf_lut[1] <<
|
||||
SHIFT(SDP_D_PERF_ENABLE_0, PERF_LUT_EN)) |
|
||||
(map_perf_sat[1] <<
|
||||
SHIFT(SDP_D_PERF_ENABLE_0, PERF_SAT_EN)) |
|
||||
(map_perf_nan_inf[1] <<
|
||||
SHIFT(SDP_D_PERF_ENABLE_0, PERF_NAN_INF_COUNT_EN));
|
||||
|
||||
sdp_reg_write(D_PERF_ENABLE, perf_reg);
|
||||
group->start_time = dla_get_time_us();
|
||||
}
|
||||
|
||||
/**
|
||||
* enable all sub-modules
|
||||
*/
|
||||
if (group->is_rdma_needed) {
|
||||
reg = FIELD_ENUM(SDP_RDMA_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
sdp_rdma_reg_write(D_OP_ENABLE, reg);
|
||||
}
|
||||
reg = FIELD_ENUM(SDP_D_OP_ENABLE_0, OP_EN, ENABLE);
|
||||
sdp_reg_write(D_OP_ENABLE, reg);
|
||||
|
||||
dla_trace("Exit: %s", __func__);
|
||||
|
||||
RETURN(0);
|
||||
}
|
||||
|
||||
void
|
||||
dla_sdp_rdma_check(struct dla_processor_group *group)
|
||||
{
|
||||
uint8_t x1_rdma_ena;
|
||||
uint8_t x2_rdma_ena;
|
||||
uint8_t y_rdma_ena;
|
||||
uint8_t fly;
|
||||
struct dla_sdp_op_desc *sdp_op;
|
||||
struct dla_sdp_surface_desc *sdp_surface;
|
||||
|
||||
sdp_op = &group->operation_desc->sdp_op;
|
||||
sdp_surface = &group->surface_desc->sdp_surface;
|
||||
|
||||
x1_rdma_ena = sdp_op->x1_op.enable;
|
||||
x2_rdma_ena = sdp_op->x2_op.enable;
|
||||
y_rdma_ena = sdp_op->y_op.enable;
|
||||
|
||||
x1_rdma_ena &= (sdp_op->x1_op.mode != SDP_OP_PER_LAYER);
|
||||
x2_rdma_ena &= (sdp_op->x2_op.mode != SDP_OP_PER_LAYER);
|
||||
y_rdma_ena &= (sdp_op->y_op.mode != SDP_OP_PER_LAYER);
|
||||
|
||||
fly = sdp_surface->src_data.type == DLA_MEM_HW;
|
||||
|
||||
group->is_rdma_needed = (!fly) || (x1_rdma_ena ||
|
||||
x2_rdma_ena || y_rdma_ena);
|
||||
}
|
||||
|
||||
static int32_t
|
||||
processor_sdp_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
uint64_t src_addr = -1, x1_addr = -1, x2_addr = -1;
|
||||
uint64_t y_addr = -1, dst_addr = -1;
|
||||
uint32_t reg, high, low;
|
||||
uint8_t fly;
|
||||
uint32_t atom_size;
|
||||
struct dla_sdp_op *x1_op;
|
||||
struct dla_sdp_op *x2_op;
|
||||
struct dla_sdp_op *y_op;
|
||||
uint8_t x1_rdma_ena;
|
||||
uint8_t x2_rdma_ena;
|
||||
uint8_t y_rdma_ena;
|
||||
uint8_t out_dma_ena;
|
||||
struct dla_lut_param lut;
|
||||
struct dla_engine *engine = dla_get_engine();
|
||||
struct dla_sdp_op_desc *sdp_op;
|
||||
struct dla_sdp_surface_desc *sdp_surface;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
atom_size = engine->config_data->atom_size;
|
||||
|
||||
sdp_op = &group->operation_desc->sdp_op;
|
||||
sdp_surface = &group->surface_desc->sdp_surface;
|
||||
|
||||
fly = sdp_surface->src_data.type == DLA_MEM_HW;
|
||||
out_dma_ena = sdp_surface->dst_data.type != DLA_MEM_HW;
|
||||
x1_op = &sdp_op->x1_op;
|
||||
x2_op = &sdp_op->x2_op;
|
||||
y_op = &sdp_op->y_op;
|
||||
x1_rdma_ena = x1_op->enable && x1_op->type != SDP_OP_NONE;
|
||||
x2_rdma_ena = x2_op->enable && x2_op->type != SDP_OP_NONE;
|
||||
y_rdma_ena = y_op->enable && y_op->type != SDP_OP_NONE;
|
||||
|
||||
/* load address */
|
||||
if (!fly) {
|
||||
ret = dla_read_input_address(&sdp_surface->src_data,
|
||||
&src_addr,
|
||||
group->op_desc->index,
|
||||
group->roi_index,
|
||||
1);
|
||||
if (ret)
|
||||
goto exit;
|
||||
CHECK_ALIGN(src_addr, atom_size);
|
||||
}
|
||||
|
||||
if (out_dma_ena) {
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
sdp_surface->dst_data.address,
|
||||
sdp_surface->dst_data.offset,
|
||||
(void *)&dst_addr,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(dst_addr, atom_size);
|
||||
}
|
||||
|
||||
if (sdp_op->lut_index >= 0) {
|
||||
group->lut_index = sdp_op->lut_index;
|
||||
dla_read_lut(engine, sdp_op->lut_index, (void *)&lut);
|
||||
dla_debug_lut_params(&lut);
|
||||
}
|
||||
|
||||
|
||||
x1_rdma_ena &= (x1_op->mode != SDP_OP_PER_LAYER);
|
||||
x2_rdma_ena &= (x2_op->mode != SDP_OP_PER_LAYER);
|
||||
y_rdma_ena &= (y_op->mode != SDP_OP_PER_LAYER);
|
||||
|
||||
if (x1_rdma_ena) {
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
sdp_surface->x1_data.address,
|
||||
sdp_surface->x1_data.offset,
|
||||
(void *)&x1_addr,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(x1_addr, atom_size);
|
||||
}
|
||||
if (x2_rdma_ena) {
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
sdp_surface->x2_data.address,
|
||||
sdp_surface->x2_data.offset,
|
||||
(void *)&x2_addr,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(x2_addr, atom_size);
|
||||
}
|
||||
if (y_rdma_ena) {
|
||||
dla_get_dma_cube_address(engine->driver_context,
|
||||
engine->task->task_data,
|
||||
sdp_surface->y_data.address,
|
||||
sdp_surface->y_data.offset,
|
||||
(void *)&y_addr,
|
||||
DESTINATION_DMA);
|
||||
CHECK_ALIGN(y_addr, atom_size);
|
||||
}
|
||||
|
||||
reg = (map_fly[0] << SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, FLYING_MODE));
|
||||
sdp_rdma_reg_write(D_FEATURE_MODE_CFG, reg);
|
||||
|
||||
reg = (map_ena[1] << SHIFT(SDP_RDMA_D_BRDMA_CFG_0, BRDMA_DISABLE));
|
||||
sdp_rdma_reg_write(D_BRDMA_CFG, reg);
|
||||
reg = (map_ena[1] << SHIFT(SDP_RDMA_D_NRDMA_CFG_0, NRDMA_DISABLE));
|
||||
sdp_rdma_reg_write(D_NRDMA_CFG, reg);
|
||||
reg = (map_ena[1] << SHIFT(SDP_RDMA_D_ERDMA_CFG_0, ERDMA_DISABLE));
|
||||
sdp_rdma_reg_write(D_ERDMA_CFG, reg);
|
||||
|
||||
reg = (map_fly[fly] <<
|
||||
SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, FLYING_MODE)) |
|
||||
(map_wg[sdp_op->conv_mode == CONV_MODE_WINOGRAD] <<
|
||||
SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, WINOGRAD)) |
|
||||
(map_precision[sdp_op->src_precision] <<
|
||||
SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, IN_PRECISION)) |
|
||||
(map_precision[sdp_op->dst_precision] <<
|
||||
SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, OUT_PRECISION)) |
|
||||
(map_proc_precision[sdp_op->dst_precision][sdp_op->src_precision] <<
|
||||
SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, PROC_PRECISION)) |
|
||||
((sdp_op->batch_num-1) <<
|
||||
SHIFT(SDP_RDMA_D_FEATURE_MODE_CFG_0, BATCH_NUMBER));
|
||||
sdp_rdma_reg_write(D_FEATURE_MODE_CFG, reg);
|
||||
|
||||
if (group->is_rdma_needed) {
|
||||
|
||||
sdp_rdma_reg_write(D_DATA_CUBE_WIDTH,
|
||||
sdp_surface->src_data.width - 1);
|
||||
sdp_rdma_reg_write(D_DATA_CUBE_HEIGHT,
|
||||
sdp_surface->src_data.height - 1);
|
||||
sdp_rdma_reg_write(D_DATA_CUBE_CHANNEL,
|
||||
sdp_surface->src_data.channel - 1);
|
||||
|
||||
/* config SDP source info */
|
||||
if (!fly) {
|
||||
/**
|
||||
* if not on-the-fly, we have to config
|
||||
* the source cube info
|
||||
*/
|
||||
high = HIGH32BITS(src_addr);
|
||||
low = LOW32BITS(src_addr);
|
||||
sdp_rdma_reg_write(D_SRC_BASE_ADDR_LOW, low);
|
||||
sdp_rdma_reg_write(D_SRC_BASE_ADDR_HIGH, high);
|
||||
sdp_rdma_reg_write(D_SRC_LINE_STRIDE,
|
||||
sdp_surface->src_data.line_stride);
|
||||
sdp_rdma_reg_write(D_SRC_SURFACE_STRIDE,
|
||||
sdp_surface->src_data.surf_stride);
|
||||
sdp_rdma_reg_write(D_SRC_DMA_CFG,
|
||||
map_ram_type[sdp_surface->src_data.type]);
|
||||
}
|
||||
|
||||
/* config x1 source info */
|
||||
reg = (map_ena[x1_rdma_ena] <<
|
||||
SHIFT(SDP_RDMA_D_BRDMA_CFG_0,
|
||||
BRDMA_DISABLE)) |
|
||||
(map_op_type[x1_op->type] <<
|
||||
SHIFT(SDP_RDMA_D_BRDMA_CFG_0,
|
||||
BRDMA_DATA_USE)) |
|
||||
(map_element_size[x1_op->precision] <<
|
||||
SHIFT(SDP_RDMA_D_BRDMA_CFG_0,
|
||||
BRDMA_DATA_SIZE)) |
|
||||
(map_op_mode[x1_op->mode] <<
|
||||
SHIFT(SDP_RDMA_D_BRDMA_CFG_0,
|
||||
BRDMA_DATA_MODE)) |
|
||||
(map_ram_type[sdp_surface->x1_data.type] <<
|
||||
SHIFT(SDP_RDMA_D_BRDMA_CFG_0,
|
||||
BRDMA_RAM_TYPE));
|
||||
sdp_rdma_reg_write(D_BRDMA_CFG, reg);
|
||||
|
||||
if (x1_rdma_ena) {
|
||||
high = HIGH32BITS(x1_addr);
|
||||
low = LOW32BITS(x1_addr);
|
||||
sdp_rdma_reg_write(D_BS_BASE_ADDR_LOW,
|
||||
low);
|
||||
sdp_rdma_reg_write(D_BS_BASE_ADDR_HIGH,
|
||||
high);
|
||||
sdp_rdma_reg_write(D_BS_LINE_STRIDE,
|
||||
sdp_surface->x1_data.line_stride);
|
||||
sdp_rdma_reg_write(D_BS_SURFACE_STRIDE,
|
||||
sdp_surface->x1_data.surf_stride);
|
||||
}
|
||||
|
||||
/* config x2 source info */
|
||||
reg = (map_ena[x2_rdma_ena] <<
|
||||
SHIFT(SDP_RDMA_D_NRDMA_CFG_0,
|
||||
NRDMA_DISABLE)) |
|
||||
(map_op_type[x2_op->type] <<
|
||||
SHIFT(SDP_RDMA_D_NRDMA_CFG_0,
|
||||
NRDMA_DATA_USE)) |
|
||||
(map_element_size[x2_op->precision] <<
|
||||
SHIFT(SDP_RDMA_D_NRDMA_CFG_0,
|
||||
NRDMA_DATA_SIZE)) |
|
||||
(map_op_mode[x2_op->mode] <<
|
||||
SHIFT(SDP_RDMA_D_NRDMA_CFG_0,
|
||||
NRDMA_DATA_MODE)) |
|
||||
(map_ram_type[sdp_surface->x2_data.type] <<
|
||||
SHIFT(SDP_RDMA_D_NRDMA_CFG_0,
|
||||
NRDMA_RAM_TYPE));
|
||||
|
||||
sdp_rdma_reg_write(D_NRDMA_CFG, reg);
|
||||
|
||||
if (x2_rdma_ena) {
|
||||
high = HIGH32BITS(x2_addr);
|
||||
low = LOW32BITS(x2_addr);
|
||||
sdp_rdma_reg_write(D_BN_BASE_ADDR_LOW,
|
||||
low);
|
||||
sdp_rdma_reg_write(D_BN_BASE_ADDR_HIGH,
|
||||
high);
|
||||
sdp_rdma_reg_write(D_BN_LINE_STRIDE,
|
||||
sdp_surface->x2_data.line_stride);
|
||||
sdp_rdma_reg_write(D_BN_SURFACE_STRIDE,
|
||||
sdp_surface->x2_data.surf_stride);
|
||||
}
|
||||
|
||||
/* config y source info */
|
||||
reg = (map_ena[y_rdma_ena] <<
|
||||
SHIFT(SDP_RDMA_D_ERDMA_CFG_0,
|
||||
ERDMA_DISABLE)) |
|
||||
(map_op_type[y_op->type] <<
|
||||
SHIFT(SDP_RDMA_D_ERDMA_CFG_0,
|
||||
ERDMA_DATA_USE)) |
|
||||
(map_element_size[y_op->precision] <<
|
||||
SHIFT(SDP_RDMA_D_ERDMA_CFG_0,
|
||||
ERDMA_DATA_SIZE)) |
|
||||
(map_op_mode[y_op->mode] <<
|
||||
SHIFT(SDP_RDMA_D_ERDMA_CFG_0,
|
||||
ERDMA_DATA_MODE)) |
|
||||
(map_ram_type[sdp_surface->y_data.type] <<
|
||||
SHIFT(SDP_RDMA_D_ERDMA_CFG_0,
|
||||
ERDMA_RAM_TYPE));
|
||||
|
||||
sdp_rdma_reg_write(D_ERDMA_CFG, reg);
|
||||
if (y_rdma_ena) {
|
||||
high = HIGH32BITS(y_addr);
|
||||
low = LOW32BITS(y_addr);
|
||||
sdp_rdma_reg_write(D_EW_BASE_ADDR_LOW,
|
||||
low);
|
||||
sdp_rdma_reg_write(D_EW_BASE_ADDR_HIGH,
|
||||
high);
|
||||
sdp_rdma_reg_write(D_EW_LINE_STRIDE,
|
||||
sdp_surface->y_data.line_stride);
|
||||
sdp_rdma_reg_write(D_EW_SURFACE_STRIDE,
|
||||
sdp_surface->y_data.surf_stride);
|
||||
}
|
||||
}
|
||||
|
||||
if (sdp_op->lut_index >= 0)
|
||||
update_lut(SDP_S_LUT_ACCESS_CFG_0, &lut,
|
||||
sdp_op->src_precision);
|
||||
|
||||
sdp_reg_write(D_DATA_CUBE_WIDTH, sdp_surface->src_data.width - 1);
|
||||
sdp_reg_write(D_DATA_CUBE_HEIGHT, sdp_surface->src_data.height - 1);
|
||||
sdp_reg_write(D_DATA_CUBE_CHANNEL, sdp_surface->src_data.channel - 1);
|
||||
|
||||
if (out_dma_ena) {
|
||||
high = HIGH32BITS(dst_addr);
|
||||
low = LOW32BITS(dst_addr);
|
||||
sdp_reg_write(D_DST_BASE_ADDR_HIGH,
|
||||
high);
|
||||
sdp_reg_write(D_DST_BASE_ADDR_LOW,
|
||||
low);
|
||||
sdp_reg_write(D_DST_LINE_STRIDE,
|
||||
sdp_surface->dst_data.line_stride);
|
||||
sdp_reg_write(D_DST_SURFACE_STRIDE,
|
||||
sdp_surface->dst_data.surf_stride);
|
||||
}
|
||||
|
||||
/* Config BS module */
|
||||
reg = (map_bypass[x1_op->enable] <<
|
||||
SHIFT(SDP_D_DP_BS_CFG_0,
|
||||
BS_BYPASS)) |
|
||||
(map_bypass[x1_op->type != SDP_OP_MUL &&
|
||||
x1_op->type != SDP_OP_NONE] <<
|
||||
SHIFT(SDP_D_DP_BS_CFG_0,
|
||||
BS_ALU_BYPASS)) |
|
||||
(map_alu_op[x1_op->alu_type] <<
|
||||
SHIFT(SDP_D_DP_BS_CFG_0,
|
||||
BS_ALU_ALGO)) |
|
||||
(map_bypass[x1_op->type != SDP_OP_ADD &&
|
||||
x1_op->type != SDP_OP_NONE] <<
|
||||
SHIFT(SDP_D_DP_BS_CFG_0,
|
||||
BS_MUL_BYPASS)) |
|
||||
(map_prelu[x1_op->act == ACTIVATION_PRELU]
|
||||
<< SHIFT(SDP_D_DP_BS_CFG_0,
|
||||
BS_MUL_PRELU)) |
|
||||
(map_bypass[x1_op->act == ACTIVATION_RELU] <<
|
||||
SHIFT(SDP_D_DP_BS_CFG_0,
|
||||
BS_RELU_BYPASS));
|
||||
sdp_reg_write(D_DP_BS_CFG, reg);
|
||||
|
||||
if (x1_op->enable) {
|
||||
if (x1_op->type == SDP_OP_ADD ||
|
||||
x1_op->type == SDP_OP_BOTH) {
|
||||
reg = (map_alu_src[x1_op->mode == SDP_OP_PER_LAYER] <<
|
||||
SHIFT(SDP_D_DP_BS_ALU_CFG_0,
|
||||
BS_ALU_SRC)) |
|
||||
(x1_op->shift_value <<
|
||||
SHIFT(SDP_D_DP_BS_ALU_CFG_0,
|
||||
BS_ALU_SHIFT_VALUE));
|
||||
sdp_reg_write(D_DP_BS_ALU_CFG, reg);
|
||||
}
|
||||
|
||||
if (x1_op->mode == SDP_OP_PER_LAYER) {
|
||||
sdp_reg_write(D_DP_BS_ALU_SRC_VALUE,
|
||||
x1_op->alu_operand);
|
||||
sdp_reg_write(D_DP_BS_MUL_SRC_VALUE,
|
||||
x1_op->mul_operand);
|
||||
}
|
||||
|
||||
/**
|
||||
* MUL truncate will take effect no matter
|
||||
* MUL is bypassed or not
|
||||
*/
|
||||
reg = (map_alu_src[x1_op->mode == SDP_OP_PER_LAYER] <<
|
||||
SHIFT(SDP_D_DP_BS_MUL_CFG_0,
|
||||
BS_MUL_SRC)) |
|
||||
(x1_op->truncate <<
|
||||
SHIFT(SDP_D_DP_BS_MUL_CFG_0,
|
||||
BS_MUL_SHIFT_VALUE));
|
||||
sdp_reg_write(D_DP_BS_MUL_CFG, reg);
|
||||
}
|
||||
|
||||
/* Config BN module */
|
||||
reg = (map_bypass[x2_op->enable] <<
|
||||
SHIFT(SDP_D_DP_BN_CFG_0,
|
||||
BN_BYPASS)) |
|
||||
(map_bypass[x2_op->type != SDP_OP_MUL &&
|
||||
x2_op->type != SDP_OP_NONE] <<
|
||||
SHIFT(SDP_D_DP_BN_CFG_0,
|
||||
BN_ALU_BYPASS)) |
|
||||
(map_alu_op[x2_op->alu_type] <<
|
||||
SHIFT(SDP_D_DP_BN_CFG_0,
|
||||
BN_ALU_ALGO)) |
|
||||
(map_bypass[x2_op->type != SDP_OP_ADD &&
|
||||
x2_op->type != SDP_OP_NONE] <<
|
||||
SHIFT(SDP_D_DP_BN_CFG_0,
|
||||
BN_MUL_BYPASS)) |
|
||||
(map_prelu[x2_op->act == ACTIVATION_PRELU]
|
||||
<< SHIFT(SDP_D_DP_BN_CFG_0,
|
||||
BN_MUL_PRELU)) |
|
||||
(map_bypass[x2_op->act == ACTIVATION_RELU]
|
||||
<< SHIFT(SDP_D_DP_BN_CFG_0,
|
||||
BN_RELU_BYPASS));
|
||||
sdp_reg_write(D_DP_BN_CFG, reg);
|
||||
|
||||
if (x2_op->enable) {
|
||||
if (x2_op->type == SDP_OP_ADD ||
|
||||
x2_op->type == SDP_OP_BOTH) {
|
||||
reg = (map_alu_src[x2_op->mode == SDP_OP_PER_LAYER] <<
|
||||
SHIFT(SDP_D_DP_BN_ALU_CFG_0,
|
||||
BN_ALU_SRC)) |
|
||||
(x2_op->shift_value <<
|
||||
SHIFT(SDP_D_DP_BN_ALU_CFG_0,
|
||||
BN_ALU_SHIFT_VALUE));
|
||||
sdp_reg_write(D_DP_BN_ALU_CFG, reg);
|
||||
}
|
||||
|
||||
if (x2_op->mode == SDP_OP_PER_LAYER) {
|
||||
sdp_reg_write(D_DP_BN_ALU_SRC_VALUE,
|
||||
x2_op->alu_operand);
|
||||
sdp_reg_write(D_DP_BN_MUL_SRC_VALUE,
|
||||
x2_op->mul_operand);
|
||||
}
|
||||
|
||||
reg = (map_alu_src[x2_op->mode == SDP_OP_PER_LAYER] <<
|
||||
SHIFT(SDP_D_DP_BN_MUL_CFG_0,
|
||||
BN_MUL_SRC)) |
|
||||
(x2_op->truncate <<
|
||||
SHIFT(SDP_D_DP_BN_MUL_CFG_0,
|
||||
BN_MUL_SHIFT_VALUE));
|
||||
sdp_reg_write(D_DP_BN_MUL_CFG, reg);
|
||||
}
|
||||
|
||||
/* Config EW module */
|
||||
reg = (map_bypass[y_op->enable] <<
|
||||
SHIFT(SDP_D_DP_EW_CFG_0,
|
||||
EW_BYPASS)) |
|
||||
(map_bypass[y_op->type != SDP_OP_MUL &&
|
||||
y_op->type != SDP_OP_NONE] <<
|
||||
SHIFT(SDP_D_DP_EW_CFG_0,
|
||||
EW_ALU_BYPASS)) |
|
||||
(map_alu_op[y_op->alu_type] <<
|
||||
SHIFT(SDP_D_DP_EW_CFG_0,
|
||||
EW_ALU_ALGO)) |
|
||||
(map_bypass[y_op->type != SDP_OP_ADD &&
|
||||
y_op->type != SDP_OP_NONE] <<
|
||||
SHIFT(SDP_D_DP_EW_CFG_0,
|
||||
EW_MUL_BYPASS)) |
|
||||
((map_prelu[y_op->act == ACTIVATION_PRELU]) <<
|
||||
SHIFT(SDP_D_DP_EW_CFG_0,
|
||||
EW_MUL_PRELU)) |
|
||||
(map_bypass[y_op->act == ACTIVATION_LUT] <<
|
||||
SHIFT(SDP_D_DP_EW_CFG_0,
|
||||
EW_LUT_BYPASS));
|
||||
sdp_reg_write(D_DP_EW_CFG, reg);
|
||||
|
||||
if (y_op->enable) {
|
||||
if (y_op->type == SDP_OP_ADD || y_op->type == SDP_OP_BOTH) {
|
||||
reg = (map_alu_src[y_op->mode == SDP_OP_PER_LAYER] <<
|
||||
SHIFT(SDP_D_DP_EW_ALU_CFG_0,
|
||||
EW_ALU_SRC)) |
|
||||
(map_bypass[y_op->cvt.alu_cvt.enable] <<
|
||||
SHIFT(SDP_D_DP_EW_ALU_CFG_0,
|
||||
EW_ALU_CVT_BYPASS));
|
||||
sdp_reg_write(D_DP_EW_ALU_CFG, reg);
|
||||
|
||||
if (y_op->mode == SDP_OP_PER_LAYER) {
|
||||
sdp_reg_write(D_DP_EW_ALU_SRC_VALUE,
|
||||
y_op->alu_operand);
|
||||
} else {
|
||||
sdp_reg_write(D_DP_EW_ALU_CVT_OFFSET_VALUE,
|
||||
y_op->cvt.alu_cvt.offset);
|
||||
sdp_reg_write(D_DP_EW_ALU_CVT_SCALE_VALUE,
|
||||
y_op->cvt.alu_cvt.scale);
|
||||
sdp_reg_write(D_DP_EW_ALU_CVT_TRUNCATE_VALUE,
|
||||
y_op->cvt.alu_cvt.truncate);
|
||||
}
|
||||
}
|
||||
|
||||
if (y_op->type == SDP_OP_MUL || y_op->type == SDP_OP_BOTH) {
|
||||
reg = (map_alu_src[y_op->mode == SDP_OP_PER_LAYER] <<
|
||||
SHIFT(SDP_D_DP_EW_MUL_CFG_0,
|
||||
EW_MUL_SRC)) |
|
||||
(map_bypass[y_op->cvt.mul_cvt.enable] <<
|
||||
SHIFT(SDP_D_DP_EW_MUL_CFG_0,
|
||||
EW_MUL_CVT_BYPASS));
|
||||
sdp_reg_write(D_DP_EW_MUL_CFG, reg);
|
||||
|
||||
if (y_op->mode == SDP_OP_PER_LAYER) {
|
||||
sdp_reg_write(D_DP_EW_MUL_SRC_VALUE,
|
||||
y_op->mul_operand);
|
||||
} else {
|
||||
sdp_reg_write(D_DP_EW_MUL_CVT_OFFSET_VALUE,
|
||||
y_op->cvt.mul_cvt.offset);
|
||||
sdp_reg_write(D_DP_EW_MUL_CVT_SCALE_VALUE,
|
||||
y_op->cvt.mul_cvt.scale);
|
||||
sdp_reg_write(D_DP_EW_MUL_CVT_TRUNCATE_VALUE,
|
||||
y_op->cvt.mul_cvt.truncate);
|
||||
}
|
||||
}
|
||||
|
||||
sdp_reg_write(D_DP_EW_TRUNCATE_VALUE, y_op->truncate);
|
||||
}
|
||||
|
||||
reg = (map_fly[sdp_surface->src_data.type == DLA_MEM_HW] <<
|
||||
SHIFT(SDP_D_FEATURE_MODE_CFG_0,
|
||||
FLYING_MODE)) |
|
||||
(map_dst[sdp_surface->dst_data.type == DLA_MEM_HW] <<
|
||||
SHIFT(SDP_D_FEATURE_MODE_CFG_0,
|
||||
OUTPUT_DST)) |
|
||||
(map_wg[sdp_op->conv_mode == CONV_MODE_WINOGRAD] <<
|
||||
SHIFT(SDP_D_FEATURE_MODE_CFG_0,
|
||||
WINOGRAD)) |
|
||||
((sdp_op->batch_num - 1) <<
|
||||
SHIFT(SDP_D_FEATURE_MODE_CFG_0,
|
||||
BATCH_NUMBER));
|
||||
sdp_reg_write(D_FEATURE_MODE_CFG, reg);
|
||||
sdp_reg_write(D_DST_DMA_CFG,
|
||||
map_ram_type[sdp_surface->dst_data.type]);
|
||||
if (sdp_op->batch_num > 1)
|
||||
sdp_reg_write(D_DST_BATCH_STRIDE, sdp_op->batch_stride);
|
||||
|
||||
reg =
|
||||
(map_proc_precision[sdp_op->dst_precision][sdp_op->src_precision] <<
|
||||
SHIFT(SDP_D_DATA_FORMAT_0,
|
||||
PROC_PRECISION)) |
|
||||
(map_precision[sdp_op->dst_precision] <<
|
||||
SHIFT(SDP_D_DATA_FORMAT_0,
|
||||
OUT_PRECISION));
|
||||
sdp_reg_write(D_DATA_FORMAT, reg);
|
||||
sdp_reg_write(D_CVT_OFFSET, sdp_op->out_cvt.offset);
|
||||
sdp_reg_write(D_CVT_SCALE, sdp_op->out_cvt.scale);
|
||||
sdp_reg_write(D_CVT_SHIFT, sdp_op->out_cvt.truncate);
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
||||
|
||||
int
|
||||
dla_sdp_is_ready(struct dla_processor *processor,
|
||||
struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_processor_group *next_group;
|
||||
struct dla_sdp_op_desc *sdp_op;
|
||||
|
||||
sdp_op = &group->operation_desc->sdp_op;
|
||||
next_group = &processor->groups[!group->id];
|
||||
|
||||
/**
|
||||
* Single LUT is shared between two SDP groups, need to make
|
||||
* sure that usage does not conflict. Also, LUT write
|
||||
* access is locked when SDP sub-engine is active, so delay
|
||||
* writing LUT when another group is active.
|
||||
*/
|
||||
|
||||
/**
|
||||
* if no LUT required for current group then it can be programmed
|
||||
* without further checks
|
||||
*/
|
||||
if (sdp_op->lut_index == -1)
|
||||
return 1;
|
||||
|
||||
/**
|
||||
* if same LUT is used for both groups then it can be programmed
|
||||
* without more checks. Even if another group is active and LUT
|
||||
* is locked, it would have been programmed by another group.
|
||||
*/
|
||||
if (next_group->lut_index == sdp_op->lut_index)
|
||||
return 1;
|
||||
|
||||
/**
|
||||
* if LUT index of another group is not -1 means some LUT is programmed,
|
||||
* then do not program current LUT as we already know current LUT is not
|
||||
* -1 and neither same as another group.
|
||||
*/
|
||||
if (next_group->lut_index != -1)
|
||||
return 0;
|
||||
|
||||
/**
|
||||
* if current group needs LUT different than another group and that
|
||||
* group is not active then program it.
|
||||
*/
|
||||
if (!next_group->active)
|
||||
return 1;
|
||||
|
||||
/**
|
||||
* if control is here it means current group is using LUT different than
|
||||
* another group and that group is active. Wait for another group to
|
||||
* become idle.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
dla_sdp_dump_config(struct dla_processor_group *group)
|
||||
{
|
||||
struct dla_sdp_op_desc *sdp_op;
|
||||
struct dla_sdp_surface_desc *sdp_surface;
|
||||
|
||||
sdp_surface = &group->surface_desc->sdp_surface;
|
||||
sdp_op = &group->operation_desc->sdp_op;
|
||||
|
||||
dla_debug_sdp_surface_desc(sdp_surface, group->roi_index);
|
||||
dla_debug_sdp_op_desc(sdp_op, group->roi_index);
|
||||
}
|
||||
|
||||
int
|
||||
dla_sdp_program(struct dla_processor_group *group)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
dla_trace("Enter: %s", __func__);
|
||||
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, SDP_DONE_MASK1) |
|
||||
MASK(GLB_S_INTR_MASK_0, SDP_DONE_MASK0));
|
||||
|
||||
ret = processor_sdp_program(group);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
exit:
|
||||
dla_trace("Exit: %s", __func__);
|
||||
RETURN(ret);
|
||||
}
|
Loading…
Add table
Reference in a new issue