Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions compiler/luci-value-test/luci_eval_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
if output_details["dtype"] == np.uint8:
if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
atol=atolint) == False:
print("input data", input_data)
print("intp_output_data", intp_output_data)
print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
Expand All @@ -127,6 +128,7 @@
elif output_details["dtype"] == np.float32:
if np.allclose(luci_output_data, intp_output_data, rtol=rtolf32,
atol=atolf32) == False:
print("input data", input_data)
print("intp_output_data", intp_output_data)
print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
Expand All @@ -135,6 +137,7 @@
elif output_details["dtype"] == np.int64:
if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
atol=atolint) == False:
print("input data", input_data)
print("intp_output_data", intp_output_data)
print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
Expand All @@ -143,6 +146,7 @@
elif output_details["dtype"] == np.int32:
if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
atol=atolint) == False:
print("input data", input_data)
print("intp_output_data", intp_output_data)
print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
Expand All @@ -151,13 +155,15 @@
elif output_details["dtype"] == np.int16:
if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
atol=atolint) == False:
print("input data", input_data)
print("intp_output_data", intp_output_data)
print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "int16"
elif output_details["dtype"] == np.bool_:
if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
print("input data", input_data)
print("intp_output_data", intp_output_data)
print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
Expand Down
3 changes: 2 additions & 1 deletion compiler/luci-value-test/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ addeval(MaxPool2D_000)
addeval(MaxPool2D_U8_000)
addeval(Mean_000)
addeval(Mean_001)
addeval(Mean_U8_000)
#addeval(Mean_U8_000) --> test with tolerance
addeval(Minimum_000)
#addeval(MirrorPad_000)
addeval(Mul_000)
Expand Down Expand Up @@ -207,3 +207,4 @@ addevaltol(SVDF_001 8e-3 8e-3)
addevaltol(Conv2D_U8_000 5 5)
# refer https://github.com/Samsung/ONE/issues/10438
addevaltol(YUV_TO_RGB_U8_000 1 1)
addevaltol(Mean_U8_000 8e-3 1)
7 changes: 7 additions & 0 deletions nnpackage/schema/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ enum TensorType : byte {
GGML_Q8_1 = -5,

// MX dtypes
// Current restrictions of MX dtypes
// - MX dtypes are not used for model I/O
// - MX dtypes are used for activations, not for constant inputs (ex. weight)
// - MX dtype's parameters (block size, exponent scale, etc) follows
// OCP Microscaling Formats Specification
// - Model does not have exponent scale data.
// Backend should define and use internally if needed
MXFP4 = -6,
MXINT8 = -7,
}
Expand Down
7 changes: 7 additions & 0 deletions res/CircleSchema/0.10/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ enum TensorType : byte {
GGML_Q8_1 = -5,

// MX dtypes
// Current restrictions of MX dtypes
// - MX dtypes are not used for model I/O
// - MX dtypes are used for activations, not for constant inputs (ex. weight)
// - MX dtype's parameters (block size, exponent scale, etc) follows
// OCP Microscaling Formats Specification
// - Model does not have exponent scale data.
// Backend should define and use internally if needed
MXFP4 = -6,
MXINT8 = -7,
}
Expand Down
7 changes: 7 additions & 0 deletions runtime/libs/circle-schema/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ enum TensorType : byte {
GGML_Q8_1 = -5,

// MX dtypes
// Current restrictions of MX dtypes
// - MX dtypes are not used for model I/O
// - MX dtypes are used for activations, not for constant inputs (ex. weight)
// - MX dtype's parameters (block size, exponent scale, etc) follows
// OCP Microscaling Formats Specification
// - Model does not have exponent scale data.
// Backend should define and use internally if needed
MXFP4 = -6,
MXINT8 = -7,
}
Expand Down
23 changes: 23 additions & 0 deletions runtime/onert/backend/trix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ message(STATUS "ONERT backend: Found TRIXEngine")

file(GLOB_RECURSE SOURCES "*.cc")

file(GLOB_RECURSE TESTS "*.test.cc")
list(REMOVE_ITEM SOURCES ${TESTS})

add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES})

target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core)
Expand All @@ -24,3 +27,23 @@ set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES
INSTALL_RPATH ${ONERT_RPATH_PLUGIN})

install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION ${ONERT_INSTALL_BACKENDDIR})

if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)

# Unit Tests
set(TEST_ONERT_TRIX_BACKEND test_onert_trix_backend)

add_executable(${TEST_ONERT_TRIX_BACKEND} ${TESTS})
target_link_libraries(${TEST_ONERT_TRIX_BACKEND} onert_core)
target_link_libraries(${TEST_ONERT_TRIX_BACKEND} trix-engine)
target_link_libraries(${TEST_ONERT_TRIX_BACKEND} ${LIB_ONERT_BACKEND_TRIX})
target_link_libraries(${TEST_ONERT_TRIX_BACKEND} nnfw_common)
target_link_libraries(${TEST_ONERT_TRIX_BACKEND} nnfw_coverage)
target_link_libraries(${TEST_ONERT_TRIX_BACKEND} gtest gtest_main Threads::Threads)
set_target_properties(${TEST_ONERT_TRIX_BACKEND} PROPERTIES
INSTALL_RPATH "$ORIGIN/../${ONERT_INSTALL_COREDIR}:$ORIGIN/../${ONERT_INSTALL_BACKENDDIR}")

add_test(${TEST_ONERT_TRIX_BACKEND} ${TEST_ONERT_TRIX_BACKEND})
install(TARGETS ${TEST_ONERT_TRIX_BACKEND} DESTINATION unittest)
7 changes: 5 additions & 2 deletions runtime/onert/backend/trix/KernelGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "KernelGenerator.h"

#include "ops/BulkPipelineLayer.h"
#include "ops/BulkLayer.h"

#include <backend/Backend.h>
Expand Down Expand Up @@ -75,8 +76,10 @@ void KernelGenerator::visit(const ir::operation::Bulk &node)
}
else
{
// TODO: Implement multiple model execution
throw std::runtime_error("NYI: multiple model execution");
// For pipeline execution (multiple models)
auto fn = std::make_unique<ops::BulkPipelineLayer>();
fn->configure(input_tensors, output_tensors, binary_path);
_return_fn = std::move(fn);
}
}

Expand Down
167 changes: 167 additions & 0 deletions runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "BulkPipelineBuffer.h"

#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <cstring>
#include <iostream>

namespace onert::backend::trix::ops
{

// FIXME: Using higher level API instead of raw API
struct trix_ioctl_hwmem
{
int32_t type;
uint64_t size;
int32_t dbuf_fd;
} __attribute__((packed));

#define TRIX_IOCTL_HWMEM_ALLOC _IOW(136, 21, struct trix_ioctl_hwmem)
#define TRIX_IOCTL_HWMEM_DEALLOC _IOW(136, 22, struct trix_ioctl_hwmem)

BulkPipelineBuffer::BulkPipelineBuffer(BufferType type, size_t size, int device_id)
: _type(type), _size(size), _device_id(device_id)
{
// DO NOTHING
}

BulkPipelineBuffer::~BulkPipelineBuffer() { deallocate(); }

size_t BulkPipelineBuffer::size() const { return _buffer ? _buffer->size : 0; }

bool BulkPipelineBuffer::isReady() const { return _buffer && _buffer->addr != nullptr; }

void BulkPipelineBuffer::allocate()
{
if (_buffer && _buffer->addr != nullptr)
{
// Already allocated
return;
}

if (!_buffer)
{
_buffer = new generic_buffer{};
}

// Open the device
char devname[16];
snprintf(devname, sizeof(devname), "/dev/triv2-%d", _device_id);
_dev_fd = open(devname, O_RDWR);
if (_dev_fd < 0)
{
throw std::runtime_error("Failed to open NPU device: " + std::string(devname));
}

// Allocate a buffer
struct trix_ioctl_hwmem hwmem;
hwmem.type = (_type == BufferType::DMABUF_CONT) ? 0 : 1;
hwmem.size = getAlignedSize(_size);

_buffer->dmabuf = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
if (_buffer->dmabuf < 0)
{
close(_dev_fd);
_dev_fd = -1;
throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
}
Comment on lines +79 to +85
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Return value from ioctl() not checked correctly

The code assigns the return value of ioctl() to _buffer->dmabuf and checks if it's negative. However, ioctl() returns -1 on error (not the fd). The actual dmabuf fd should come from the hwmem.dbuf_fd field after a successful call.

Suggested change
_buffer->dmabuf = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
if (_buffer->dmabuf < 0)
{
close(_dev_fd);
_dev_fd = -1;
throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
}
int ret = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
if (ret < 0)
{
close(_dev_fd);
_dev_fd = -1;
throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
}
_buffer->dmabuf = hwmem.dbuf_fd;
Prompt To Fix With AI
This is a comment left during a code review.
Path: runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc
Line: 85:91

Comment:
**logic:** Return value from `ioctl()` not checked correctly

The code assigns the return value of `ioctl()` to `_buffer->dmabuf` and checks if it's negative. However, `ioctl()` returns `-1` on error (not the fd). The actual dmabuf fd should come from the `hwmem.dbuf_fd` field after a successful call.

```suggestion
  int ret = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
  if (ret < 0)
  {
    close(_dev_fd);
    _dev_fd = -1;
    throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
  }
  _buffer->dmabuf = hwmem.dbuf_fd;
```

How can I resolve this? If you propose a fix, please make it concise.


// Mapping the buffer
_buffer->addr = mmap(nullptr, hwmem.size, PROT_READ | PROT_WRITE, MAP_SHARED, _buffer->dmabuf, 0);
if (_buffer->addr == MAP_FAILED)
{
close(_buffer->dmabuf);
close(_dev_fd);
_buffer->dmabuf = -1;
_dev_fd = -1;
_buffer->addr = nullptr;
throw std::runtime_error("Failed to mmap DMA buffer");
}

_buffer->size = _size;
_buffer->type = BUFFER_DMABUF;
}

void BulkPipelineBuffer::deallocate()
{
if (!_buffer)
{
return;
}

if (_buffer->addr != nullptr)
{
size_t aligned_sz = getAlignedSize(_buffer->size);
munmap(_buffer->addr, aligned_sz);
_buffer->addr = nullptr;
}

if (_buffer->dmabuf >= 0)
{
struct trix_ioctl_hwmem hwmem;
hwmem.dbuf_fd = _buffer->dmabuf;
ioctl(_dev_fd, TRIX_IOCTL_HWMEM_DEALLOC, &hwmem);
close(_buffer->dmabuf);
_buffer->dmabuf = -1;
}

if (_dev_fd >= 0)
{
close(_dev_fd);
_dev_fd = -1;
}

delete _buffer;
_buffer = nullptr;
}

void BulkPipelineBuffer::fillFromFile(FILE *fp, size_t offset)
{
if (!isReady())
{
throw std::runtime_error("Buffer is not allocated");
}

if (!fp)
{
throw std::runtime_error("Invalid file pointer");
}

if (fseek(fp, static_cast<long>(offset), SEEK_SET) != 0)
{
throw std::runtime_error("Failed to seek file to offset: " + std::to_string(offset));
}

if (fread(_buffer->addr, _buffer->size, 1, fp) != 1)
{
throw std::runtime_error("Failed to read " + std::to_string(_buffer->size) +
" bytes from file");
}
}

size_t BulkPipelineBuffer::getAlignedSize(size_t size) const
{
// 4 KB (= Page size) aligned size
constexpr size_t _4KB_M_1 = (1 << 12) - 1;
return (size + _4KB_M_1) & ~_4KB_M_1;
}

} // namespace onert::backend::trix::ops
69 changes: 69 additions & 0 deletions runtime/onert/backend/trix/ops/BulkPipelineBuffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__
#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__

#include <memory>
#include <cstdio>
#include <stdexcept>
#include <libnpuhost.h>

namespace onert::backend::trix::ops
{

class BulkPipelineBuffer
{
public:
enum class BufferType
{
DMABUF_CONT, // Contiguous DMA buffer
DMABUF_IOMMU // IOMMU DMA buffer
};

public:
BulkPipelineBuffer(BufferType type, size_t size, int device_id);
~BulkPipelineBuffer();

// Disallow copying
BulkPipelineBuffer(const BulkPipelineBuffer &) = delete;
BulkPipelineBuffer &operator=(const BulkPipelineBuffer &) = delete;

// Buffer management functions
void allocate();
void deallocate();
size_t size() const;

generic_buffer *getGenericBuffer() { return _buffer; }

// Data manipulation functions
void fillFromFile(FILE *fp, size_t offset = 0);
bool isReady() const;

private:
size_t getAlignedSize(size_t size) const;

private:
BufferType _type;
size_t _size;
int _device_id;
int _dev_fd{-1};
generic_buffer *_buffer{nullptr};
};

} // namespace onert::backend::trix::ops

#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__
Loading