From d84669e077cd727e2d96f953d46714006a65a627 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Wed, 3 Dec 2025 13:08:31 +0900 Subject: [PATCH 01/13] [onert] Add BulkPipelineBuffer for NPU buffer management (#16330) This commit introduces the BulkPipelineBuffer class to manage the buffers in the TRIX backend. ONt-DCO-1.0-ONE-DCO-1.0-Signed-off-by: Jonghwa Lee ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- runtime/onert/backend/trix/CMakeLists.txt | 23 +++ .../backend/trix/ops/BulkPipelineBuffer.cc | 176 ++++++++++++++++++ .../backend/trix/ops/BulkPipelineBuffer.h | 78 ++++++++ .../trix/ops/test/BulkPipelineBuffer.test.cc | 73 ++++++++ .../backend/trix/ops/test/mock_syscalls.h | 33 ++++ 5 files changed, 383 insertions(+) create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineBuffer.h create mode 100644 runtime/onert/backend/trix/ops/test/BulkPipelineBuffer.test.cc create mode 100644 runtime/onert/backend/trix/ops/test/mock_syscalls.h diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt index 649ae7dba29..db22817dd30 100644 --- a/runtime/onert/backend/trix/CMakeLists.txt +++ b/runtime/onert/backend/trix/CMakeLists.txt @@ -9,6 +9,9 @@ message(STATUS "ONERT backend: Found TRIXEngine") file(GLOB_RECURSE SOURCES "*.cc") +file(GLOB_RECURSE TESTS "*.test.cc") +list(REMOVE_ITEM SOURCES ${TESTS}) + add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES}) target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core) @@ -24,3 +27,23 @@ set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES INSTALL_RPATH ${ONERT_RPATH_PLUGIN}) install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION ${ONERT_INSTALL_BACKENDDIR}) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +# Unit Tests +set(TEST_ONERT_TRIX_BACKEND test_onert_trix_backend) + +add_executable(${TEST_ONERT_TRIX_BACKEND} ${TESTS}) +target_link_libraries(${TEST_ONERT_TRIX_BACKEND} onert_core) +target_link_libraries(${TEST_ONERT_TRIX_BACKEND} trix-engine) +target_link_libraries(${TEST_ONERT_TRIX_BACKEND} ${LIB_ONERT_BACKEND_TRIX}) +target_link_libraries(${TEST_ONERT_TRIX_BACKEND} nnfw_common) +target_link_libraries(${TEST_ONERT_TRIX_BACKEND} nnfw_coverage) +target_link_libraries(${TEST_ONERT_TRIX_BACKEND} gtest gtest_main Threads::Threads) +set_target_properties(${TEST_ONERT_TRIX_BACKEND} PROPERTIES +INSTALL_RPATH "$ORIGIN/../${ONERT_INSTALL_COREDIR}:$ORIGIN/../${ONERT_INSTALL_BACKENDDIR}") + +add_test(${TEST_ONERT_TRIX_BACKEND} ${TEST_ONERT_TRIX_BACKEND}) +install(TARGETS ${TEST_ONERT_TRIX_BACKEND} DESTINATION unittest) diff --git a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc new file mode 100644 index 00000000000..7a997b95378 --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BulkPipelineBuffer.h" + +#include +#include +#include +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +// FIXME: Using higher level API instead of raw API +struct trix_ioctl_hwmem +{ + int32_t type; + uint64_t size; + int32_t dbuf_fd; +} __attribute__((packed)); + +#define TRIX_IOCTL_HWMEM_ALLOC _IOW(136, 21, struct trix_ioctl_hwmem) +#define TRIX_IOCTL_HWMEM_DEALLOC _IOW(136, 22, struct trix_ioctl_hwmem) + +BulkPipelineBuffer::BulkPipelineBuffer(BufferType type, size_t size, int device_id) + : _type(type), _size(size), _device_id(device_id) +{ + // DO NOTHING +} + +BulkPipelineBuffer::~BulkPipelineBuffer() { deallocate(); } + +size_t BulkPipelineBuffer::size() const { return _buffer ? _buffer->size : 0; } + +bool BulkPipelineBuffer::isReady() const { return _buffer && _buffer->addr != nullptr; } + +void BulkPipelineBuffer::allocate() +{ + if (_buffer && _buffer->addr != nullptr) + { + // Already allocated + return; + } + + if (!_buffer) + { + _buffer = new generic_buffer{}; + } + + // Open the device + char devname[16]; + snprintf(devname, sizeof(devname), "/dev/triv2-%d", _device_id); + _dev_fd = open(devname, O_RDWR); + if (_dev_fd < 0) + { + throw std::runtime_error("Failed to open NPU device: " + std::string(devname)); + } + + // Allocate a buffer + struct trix_ioctl_hwmem hwmem; + hwmem.type = (_type == BufferType::DMABUF_CONT) ? 0 : 1; + hwmem.size = getAlignedSize(_size); + + _buffer->dmabuf = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem); + if (_buffer->dmabuf < 0) + { + close(_dev_fd); + _dev_fd = -1; + throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size)); + } + + // Mapping the buffer + _buffer->addr = mmap(nullptr, hwmem.size, PROT_READ | PROT_WRITE, MAP_SHARED, _buffer->dmabuf, 0); + if (_buffer->addr == MAP_FAILED) + { + close(_buffer->dmabuf); + close(_dev_fd); + _buffer->dmabuf = -1; + _dev_fd = -1; + _buffer->addr = nullptr; + throw std::runtime_error("Failed to mmap DMA buffer"); + } + + _buffer->size = _size; + _buffer->type = BUFFER_DMABUF; +} + +void BulkPipelineBuffer::deallocate() +{ + if (!_buffer) + { + return; + } + + if (_buffer->addr != nullptr) + { + size_t aligned_sz = getAlignedSize(_buffer->size); + munmap(_buffer->addr, aligned_sz); + _buffer->addr = nullptr; + } + + if (_buffer->dmabuf >= 0) + { + struct trix_ioctl_hwmem hwmem; + hwmem.dbuf_fd = _buffer->dmabuf; + ioctl(_dev_fd, TRIX_IOCTL_HWMEM_DEALLOC, &hwmem); + close(_buffer->dmabuf); + _buffer->dmabuf = -1; + } + + if (_dev_fd >= 0) + { + close(_dev_fd); + _dev_fd = -1; + } + + delete _buffer; + _buffer = nullptr; +} + +void BulkPipelineBuffer::fillFromFile(FILE *fp, size_t offset) +{ + if (!isReady()) + { + throw std::runtime_error("Buffer is not allocated"); + } + + if (!fp) + { + throw std::runtime_error("Invalid file pointer"); + } + + if (fseek(fp, static_cast(offset), SEEK_SET) != 0) + { + throw std::runtime_error("Failed to seek file to offset: " + std::to_string(offset)); + } + + if (fread(_buffer->addr, _buffer->size, 1, fp) != 1) + { + throw std::runtime_error("Failed to read " + std::to_string(_buffer->size) + + " bytes from file"); + } +} + +size_t BulkPipelineBuffer::getAlignedSize(size_t size) const +{ + // 4 KB (= Page size) aligned size + constexpr size_t _4KB_M_1 = (1 << 12) - 1; + return (size + _4KB_M_1) & ~_4KB_M_1; +} + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h new file mode 100644 index 00000000000..30314e4b544 --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEBUFFER_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEBUFFER_H__ + +#include +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +class BulkPipelineBuffer +{ +public: + enum class BufferType + { + DMABUF_CONT, // Contiguous DMA buffer + DMABUF_IOMMU // IOMMU DMA buffer + }; + +public: + BulkPipelineBuffer(BufferType type, size_t size, int device_id); + ~BulkPipelineBuffer(); + + // Disallow copying + BulkPipelineBuffer(const BulkPipelineBuffer &) = delete; + BulkPipelineBuffer &operator=(const BulkPipelineBuffer &) = delete; + + // Buffer management functions + void allocate(); + void deallocate(); + size_t size() const; + + generic_buffer *getGenericBuffer() { return _buffer; } + + // Data manipulation functions + void fillFromFile(FILE *fp, size_t offset = 0); + bool isReady() const; + +private: + size_t getAlignedSize(size_t size) const; + +private: + BufferType _type; + size_t _size; + int _device_id; + int _dev_fd{-1}; + generic_buffer *_buffer{nullptr}; +}; + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEBUFFER_H__ diff --git a/runtime/onert/backend/trix/ops/test/BulkPipelineBuffer.test.cc b/runtime/onert/backend/trix/ops/test/BulkPipelineBuffer.test.cc new file mode 100644 index 00000000000..91fb6b4b1ad --- /dev/null +++ b/runtime/onert/backend/trix/ops/test/BulkPipelineBuffer.test.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../BulkPipelineBuffer.h" +#include + +#include "mock_syscalls.h" + +using namespace onert::backend::trix::ops; + +class BulkPipelineBufferTest : public ::testing::Test +{ +protected: + void SetUp() override + { + // Create a standard buffer for testing + buffer = + std::make_unique(BulkPipelineBuffer::BufferType::DMABUF_CONT, 1024, 0); + } + + void TearDown() override + { + // Ensure buffer is properly deallocated + if (buffer && buffer->isReady()) + { + buffer->deallocate(); + } + } + + std::unique_ptr buffer; +}; + +TEST_F(BulkPipelineBufferTest, test_allocate) +{ + EXPECT_NO_THROW(buffer->allocate()); + EXPECT_TRUE(buffer->isReady()); + EXPECT_EQ(buffer->size(), 1024); +} + +TEST_F(BulkPipelineBufferTest, test_deallocate) +{ + buffer->allocate(); + buffer->deallocate(); + EXPECT_FALSE(buffer->isReady()); + EXPECT_EQ(buffer->size(), 0); +} + +TEST_F(BulkPipelineBufferTest, test_fillFromFile) +{ + auto dummy_fp = fopen("/dev/null", "r"); + ASSERT_NE(dummy_fp, nullptr) << "Failed to open /dev/null for testing"; + + EXPECT_ANY_THROW(buffer->fillFromFile(nullptr, 0)); + + buffer->allocate(); + EXPECT_NO_THROW(buffer->fillFromFile(dummy_fp, 0)); + buffer->deallocate(); + + fclose(dummy_fp); +} diff --git a/runtime/onert/backend/trix/ops/test/mock_syscalls.h b/runtime/onert/backend/trix/ops/test/mock_syscalls.h new file mode 100644 index 00000000000..22e85ff71ee --- /dev/null +++ b/runtime/onert/backend/trix/ops/test/mock_syscalls.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _MOCK_SYSCALLS_H_ +#define _MOCK_SYSCALLS_H_ + +#include +#include +#include +#include + +int open(const char *, int, ...) { return 0; } +void *mmap(void *, size_t, int, int, int, off_t) { return (void *)0x1; } +int munmap(void *, size_t) { return 0; } +int close(int) { return 0; } +int ioctl(int, unsigned long, ...) { return 0; } +size_t fread(void *, size_t, size_t, FILE *) { return 1; } +int fseek(FILE *, long, int) { return 0; } + +#endif From 31acaa411b9dd6dc023eb4942f5fb95bf5d86b1d Mon Sep 17 00:00:00 2001 From: Arkadiusz Bokowy Date: Thu, 4 Dec 2025 02:50:26 +0100 Subject: [PATCH 02/13] [onert] Fix Python typing for the benchmark_inference() function (#16334) This commits fixes the typing by allowing passing None as a value for the input_shapes parameter in the benchmark_inference() function. In case of input_shapes being None, the function uses the shape retrieved from the initialized session. ONE-DCO-1.0-Signed-off-by: Arkadiusz Bokowy --- runtime/onert/sample/minimal-python/inference_benchmark.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/onert/sample/minimal-python/inference_benchmark.py b/runtime/onert/sample/minimal-python/inference_benchmark.py index aa265ce1c66..badbd066b65 100755 --- a/runtime/onert/sample/minimal-python/inference_benchmark.py +++ b/runtime/onert/sample/minimal-python/inference_benchmark.py @@ -4,7 +4,7 @@ import numpy as np import psutil import os -from typing import List +from typing import List, Optional from onert import infer, tensorinfo @@ -47,8 +47,8 @@ def get_validated_input_tensorinfos(sess: infer.session, return updated_infos -def benchmark_inference(nnpackage_path: str, backends: str, input_shapes: List[List[int]], - repeat: int): +def benchmark_inference(nnpackage_path: str, backends: str, + input_shapes: Optional[List[List[int]]], repeat: int): mem_before_kb = get_memory_usage_mb() * 1024 sess = infer.session(path=nnpackage_path, backends=backends) From e248efe47b558de91ab896f00d4f0b192e5729c0 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Mon, 8 Dec 2025 10:14:32 +0900 Subject: [PATCH 03/13] [onert] Add MockSycallManager for customized system call mocking (#16333) It adds new MockSyscallsManager class to provide configurable hook system for mocking system calls in tests. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/test/mock_syscalls.h | 131 ++++++++++++++++-- .../trix/ops/test/mock_syscalls.test.cc | 127 +++++++++++++++++ 2 files changed, 250 insertions(+), 8 deletions(-) create mode 100644 runtime/onert/backend/trix/ops/test/mock_syscalls.test.cc diff --git a/runtime/onert/backend/trix/ops/test/mock_syscalls.h b/runtime/onert/backend/trix/ops/test/mock_syscalls.h index 22e85ff71ee..729f77f959c 100644 --- a/runtime/onert/backend/trix/ops/test/mock_syscalls.h +++ b/runtime/onert/backend/trix/ops/test/mock_syscalls.h @@ -21,13 +21,128 @@ #include #include #include +#include +#include +#include -int open(const char *, int, ...) { return 0; } -void *mmap(void *, size_t, int, int, int, off_t) { return (void *)0x1; } -int munmap(void *, size_t) { return 0; } -int close(int) { return 0; } -int ioctl(int, unsigned long, ...) { return 0; } -size_t fread(void *, size_t, size_t, FILE *) { return 1; } -int fseek(FILE *, long, int) { return 0; } +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ +namespace test +{ -#endif +class MockSyscallsManager +{ +public: + // Function type definitions for each syscall + // Note: std::function doesn't work well with variadic functions, so we use specific signatures + using OpenHook = std::function; + using OpenCreatHook = std::function; + using MmapHook = std::function; + using MunmapHook = std::function; + using CloseHook = std::function; + using IoctlHook = std::function; + using FopenHook = std::function; + using FcloseHook = std::function; + using FreadHook = std::function; + using FseekHook = std::function; + + static MockSyscallsManager &getInstance() + { + static MockSyscallsManager instance; + return instance; + } + + // Hook registration functions + void setOpenHook(OpenHook hook) { _openHook = hook; } + void setOpenCreatHook(OpenCreatHook hook) { _openCreatHook = hook; } + void setMmapHook(MmapHook hook) { _mmapHook = hook; } + void setMunmapHook(MunmapHook hook) { _munmapHook = hook; } + void setCloseHook(CloseHook hook) { _closeHook = hook; } + void setIoctlHook(IoctlHook hook) { _ioctlHook = hook; } + void setFopenHook(FopenHook hook) { _fopenHook = hook; } + void setFcloseHook(FcloseHook hook) { _fcloseHook = hook; } + void setFreadHook(FreadHook hook) { _freadHook = hook; } + void setFseekHook(FseekHook hook) { _fseekHook = hook; } + + // Hook retrieval functions + OpenHook getOpenHook() const { return _openHook; } + OpenCreatHook getOpenCreatHook() const { return _openCreatHook; } + MmapHook getMmapHook() const { return _mmapHook; } + MunmapHook getMunmapHook() const { return _munmapHook; } + CloseHook getCloseHook() const { return _closeHook; } + IoctlHook getIoctlHook() const { return _ioctlHook; } + FopenHook getFopenHook() const { return _fopenHook; } + FcloseHook getFcloseHook() const { return _fcloseHook; } + FreadHook getFreadHook() const { return _freadHook; } + FseekHook getFseekHook() const { return _fseekHook; } + + // Hook clearing functions + void clearOpenHook() { _openHook = nullptr; } + void clearOpenCreatHook() { _openCreatHook = nullptr; } + void clearMmapHook() { _mmapHook = nullptr; } + void clearMunmapHook() { _munmapHook = nullptr; } + void clearCloseHook() { _closeHook = nullptr; } + void clearIoctlHook() { _ioctlHook = nullptr; } + void clearFopenHook() { _fopenHook = nullptr; } + void clearFcloseHook() { _fcloseHook = nullptr; } + void clearFreadHook() { _freadHook = nullptr; } + void clearFseekHook() { _fseekHook = nullptr; } + + // Reset all hooks + void resetAll() + { + clearOpenHook(); + clearOpenCreatHook(); + clearMmapHook(); + clearMunmapHook(); + clearCloseHook(); + clearIoctlHook(); + clearFopenHook(); + clearFcloseHook(); + clearFreadHook(); + clearFseekHook(); + } + +private: + MockSyscallsManager() = default; + ~MockSyscallsManager() = default; + MockSyscallsManager(const MockSyscallsManager &) = delete; + MockSyscallsManager &operator=(const MockSyscallsManager &) = delete; + + // Hook function pointers + OpenHook _openHook; + OpenCreatHook _openCreatHook; + MmapHook _mmapHook; + MunmapHook _munmapHook; + CloseHook _closeHook; + IoctlHook _ioctlHook; + FopenHook _fopenHook; + FcloseHook _fcloseHook; + FreadHook _freadHook; + FseekHook _fseekHook; +}; + +} // namespace test +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert + +// Mock syscall implementations +int open(const char *pathname, int flags, ...); +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); +int munmap(void *addr, size_t length); +int close(int fd); +int ioctl(int fd, unsigned long request, ...); +FILE *fopen(const char *path, const char *mode); +int fclose(FILE *stream); +size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); +int fseek(FILE *stream, long offset, int whence); + +#endif // _MOCK_SYSCALLS_H_ diff --git a/runtime/onert/backend/trix/ops/test/mock_syscalls.test.cc b/runtime/onert/backend/trix/ops/test/mock_syscalls.test.cc new file mode 100644 index 00000000000..2ae1dc171e1 --- /dev/null +++ b/runtime/onert/backend/trix/ops/test/mock_syscalls.test.cc @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mock_syscalls.h" + +int open(const char *pathname, int flags, ...) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + + // Handle variable arguments for open() + if (flags & O_CREAT) + { + if (auto creatHook = manager.getOpenCreatHook()) + { + va_list args; + va_start(args, flags); + mode_t mode = va_arg(args, mode_t); + va_end(args); + return creatHook(pathname, flags, mode); + } + } + else + { + if (auto hook = manager.getOpenHook()) + { + return hook(pathname, flags); + } + } + return 0; // Default mock return value +} + +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getMmapHook()) + { + return hook(addr, length, prot, flags, fd, offset); + } + return (void *)0x1; // Default mock return value +} + +int munmap(void *addr, size_t length) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getMunmapHook()) + { + return hook(addr, length); + } + return 0; // Default mock return value +} + +int close(int fd) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getCloseHook()) + { + return hook(fd); + } + return 0; // Default mock return value +} + +int ioctl(int fd, unsigned long request, ...) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getIoctlHook()) + { + va_list args; + va_start(args, request); + void *arg = va_arg(args, void *); + va_end(args); + return hook(fd, request, arg); + } + return 0; // Default mock return value +} + +FILE *fopen(const char *path, const char *mode) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getFopenHook()) + { + return hook(path, mode); + } + return (FILE *)0x1; // Default mock return value +} + +int fclose(FILE *stream) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getFcloseHook()) + { + return hook(stream); + } + return 0; +} + +size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getFreadHook()) + { + return hook(ptr, size, nmemb, stream); + } + return 1; // Default mock return value +} + +int fseek(FILE *stream, long offset, int whence) +{ + auto &manager = onert::backend::trix::ops::test::MockSyscallsManager::getInstance(); + if (auto hook = manager.getFseekHook()) + { + return hook(stream, offset, whence); + } + return 0; // Default mock return value +} From efee6c8897795a48e9794decb5b04252bcec8bbd Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Fri, 19 Dec 2025 09:03:23 +0900 Subject: [PATCH 04/13] [luci-value-test] Add tolerance for Mean_U8_000 test (#16337) This commit adds Mean_U8_000 to tolerance-based evaluation with 1 absolute tolerance to handle precision issues in uint8 operations. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh --- compiler/luci-value-test/test.lst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst index 591421eb3c6..ea944fdf83e 100644 --- a/compiler/luci-value-test/test.lst +++ b/compiler/luci-value-test/test.lst @@ -90,7 +90,7 @@ addeval(MaxPool2D_000) addeval(MaxPool2D_U8_000) addeval(Mean_000) addeval(Mean_001) -addeval(Mean_U8_000) +#addeval(Mean_U8_000) --> test with tolerance addeval(Minimum_000) #addeval(MirrorPad_000) addeval(Mul_000) @@ -207,3 +207,4 @@ addevaltol(SVDF_001 8e-3 8e-3) addevaltol(Conv2D_U8_000 5 5) # refer https://github.com/Samsung/ONE/issues/10438 addevaltol(YUV_TO_RGB_U8_000 1 1) +addevaltol(Mean_U8_000 8e-3 1) From 413f9e2c0a7f9d6c3fad51eb93637e885f580f49 Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Mon, 29 Dec 2025 15:56:56 +0900 Subject: [PATCH 05/13] [luci-value-test] Add input data logging for test failures (#16338) This commit adds debug print statements to log input data when model execution results differ between interpreter and luci outputs. This change helps diagnose test failures by providing complete context including input data, interpreter output, and luci output. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh --- compiler/luci-value-test/luci_eval_verifier.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py index e934260b869..ed49e6c269d 100755 --- a/compiler/luci-value-test/luci_eval_verifier.py +++ b/compiler/luci-value-test/luci_eval_verifier.py @@ -119,6 +119,7 @@ if output_details["dtype"] == np.uint8: if np.allclose(luci_output_data, intp_output_data, rtol=rtolint, atol=atolint) == False: + print("input data", input_data) print("intp_output_data", intp_output_data) print("luci_output_data", luci_output_data) raise SystemExit("Execution result of " + tflite_model + @@ -127,6 +128,7 @@ elif output_details["dtype"] == np.float32: if np.allclose(luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32) == False: + print("input data", input_data) print("intp_output_data", intp_output_data) print("luci_output_data", luci_output_data) raise SystemExit("Execution result of " + tflite_model + @@ -135,6 +137,7 @@ elif output_details["dtype"] == np.int64: if np.allclose(luci_output_data, intp_output_data, rtol=rtolint, atol=atolint) == False: + print("input data", input_data) print("intp_output_data", intp_output_data) print("luci_output_data", luci_output_data) raise SystemExit("Execution result of " + tflite_model + @@ -143,6 +146,7 @@ elif output_details["dtype"] == np.int32: if np.allclose(luci_output_data, intp_output_data, rtol=rtolint, atol=atolint) == False: + print("input data", input_data) print("intp_output_data", intp_output_data) print("luci_output_data", luci_output_data) raise SystemExit("Execution result of " + tflite_model + @@ -151,6 +155,7 @@ elif output_details["dtype"] == np.int16: if np.allclose(luci_output_data, intp_output_data, rtol=rtolint, atol=atolint) == False: + print("input data", input_data) print("intp_output_data", intp_output_data) print("luci_output_data", luci_output_data) raise SystemExit("Execution result of " + tflite_model + @@ -158,6 +163,7 @@ output_dtype = "int16" elif output_details["dtype"] == np.bool_: if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: + print("input data", input_data) print("intp_output_data", intp_output_data) print("luci_output_data", luci_output_data) raise SystemExit("Execution result of " + tflite_model + From 5b5a249ff7765c45f4ec70890229d4126c091bdc Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Wed, 31 Dec 2025 14:07:15 +0900 Subject: [PATCH 06/13] [onert] Implement BulkPipelineModel for Trix model execution (#16332) This implements new BulkPipelineModel class to handle NPU model loading. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/BulkPipelineModel.cc | 281 ++++++++++++++++++ .../backend/trix/ops/BulkPipelineModel.h | 104 +++++++ .../trix/ops/test/BulkPipelineModel.test.cc | 116 ++++++++ 3 files changed, 501 insertions(+) create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineModel.cc create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineModel.h create mode 100644 runtime/onert/backend/trix/ops/test/BulkPipelineModel.test.cc diff --git a/runtime/onert/backend/trix/ops/BulkPipelineModel.cc b/runtime/onert/backend/trix/ops/BulkPipelineModel.cc new file mode 100644 index 00000000000..ce7674c2cbf --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineModel.cc @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BulkPipelineModel.h" + +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +BulkPipelineModel::BulkPipelineModel(const std::string &model_path, int device_id) + : _model_path(model_path), _device_id(device_id) +{ + // DO NOTHING +} + +BulkPipelineModel::~BulkPipelineModel() { release(); } + +bool BulkPipelineModel::initialize() +{ + if (_initialized.load()) + { + return true; + } + + if (!loadMetadata()) + { + return false; + } + + _initialized = true; + return true; +} + +bool BulkPipelineModel::prepare() +{ + if (_prepared.load()) + { + return true; + } + + try + { + openDevice(); + allocateBuffers(); + fillBuffers(); + markBufferReady(); + registerModel(); + + _prepared = true; + return true; + } + catch (const std::exception &e) + { + std::cerr << "Failed to prepare model " << _model_path << ": " << e.what() << std::endl; + release(); + return false; + } +} + +void BulkPipelineModel::release() +{ + if (!_prepared.load()) + { + return; + } + + unregisterModel(); + closeDevice(); + + if (_fp) + { + fclose(_fp); + _fp = nullptr; + } + + _program_buffer.reset(); + _weight_buffer.reset(); + _meta.reset(); + _meta_size = 0; + _model_id = 0; + + _prepared = false; +} + +void BulkPipelineModel::run(const std::vector &inputs, + std::vector &outputs) +{ + if (!_prepared.load()) + { + throw std::runtime_error("Model is not prepared: " + _model_path); + } + + if (!_meta) + { + throw std::runtime_error("Model metadata is not loaded: " + _model_path); + } + + // Prepare input buffers + input_buffers input; + input.num_buffers = _meta->input_seg_num; + for (uint32_t i = 0; i < input.num_buffers; i++) + { + uint32_t idx = _meta->input_seg_idx[i]; + input.bufs[i].addr = inputs[i]->buffer(); + input.bufs[i].type = BUFFER_MAPPED; + input.bufs[i].size = _meta->segment_size[idx]; + } + + // Prepare output buffers + output_buffers output; + output.num_buffers = _meta->output_seg_num; + for (uint32_t i = 0; i < output.num_buffers; i++) + { + uint32_t idx = _meta->output_seg_idx[i]; + output.bufs[i].addr = outputs[i]->buffer(); + output.bufs[i].type = BUFFER_MAPPED; + output.bufs[i].size = _meta->segment_size[idx]; + } + + // Execute the model + int ret = runNPU_model(_dev, _model_id, NPU_INFER_BLOCKING, &input, &output, nullptr, nullptr); + if (ret < 0) + { + throw std::runtime_error("runNPU_model() failed for " + _model_path + + ", ret: " + std::to_string(ret)); + } +} + +void BulkPipelineModel::waitForBufferReady() +{ + std::unique_lock lock(_buffer_mutex); + _buffer_cv.wait(lock, [this] { return _buffer_ready.load(); }); +} + +void BulkPipelineModel::markBufferReady() +{ + { + std::lock_guard lock(_buffer_mutex); + _buffer_ready = true; + } + _buffer_cv.notify_all(); +} + +bool BulkPipelineModel::loadMetadata() +{ + _fp = fopen(_model_path.c_str(), "rb"); + if (!_fp) + { + throw std::runtime_error("Failed to open model file: " + _model_path); + } + + _meta = std::make_unique(); + if (fread(_meta.get(), NPUBIN_META_SIZE, 1, _fp) != 1) + { + throw std::runtime_error("Failed to read metadata from: " + _model_path); + } + + _meta_size = _meta->extended_metasize ? sizeof(npubin_meta) + _meta->extended_metasize + : NPUBIN_META_TOTAL_SIZE(_meta->magiccode); + + return true; +} + +void BulkPipelineModel::allocateBuffers() +{ + if (!_meta) + { + throw std::runtime_error("Metadata not loaded for: " + _model_path); + } + + _program_buffer = + std::make_shared(BulkPipelineBuffer::BufferType::DMABUF_CONT, + static_cast(_meta->program_size), _device_id); + + _weight_buffer = + std::make_shared(BulkPipelineBuffer::BufferType::DMABUF_IOMMU, + static_cast(_meta->weight_size), _device_id); + + _program_buffer->allocate(); + if (_meta->weight_size > 0) + { + _weight_buffer->allocate(); + } +} + +void BulkPipelineModel::fillBuffers() +{ + if (!_fp || !_program_buffer || !_weight_buffer) + { + throw std::runtime_error("Buffers not properly initialized for: " + _model_path); + } + + // Fill program buffer + _program_buffer->fillFromFile(_fp, _meta_size); + + // Fill weight buffer + if (_weight_buffer->size() > 0) + { + _weight_buffer->fillFromFile(_fp, _meta_size + _meta->program_size); + } +} + +void BulkPipelineModel::registerModel() +{ + if (!_dev || !_program_buffer || !_weight_buffer) + { + throw std::runtime_error("Device or buffers not ready for: " + _model_path); + } + + generic_buffer modelfile; + modelfile.type = BUFFER_FILE; + modelfile.filepath = _model_path.c_str(); + modelfile.size = _meta->size; + + int ret = registerNPUmodel_ext(_dev, &modelfile, _program_buffer->getGenericBuffer(), + _weight_buffer->getGenericBuffer(), &_model_id); + if (ret < 0) + { + throw std::runtime_error("Failed to register model: " + _model_path + + ", ret: " + std::to_string(ret)); + } +} + +void BulkPipelineModel::unregisterModel() +{ + if (_dev && _model_id > 0) + { + int ret = unregisterNPUmodel(_dev, _model_id); + if (ret < 0) + { + std::cerr << "Failed to unregister model: " << _model_path << ", ret: " << ret << std::endl; + } + _model_id = 0; + } +} + +void BulkPipelineModel::openDevice() +{ + int ret = getNPUdeviceByType(&_dev, NPUCOND_TRIV24_CONN_SOCIP, _device_id); + if (ret < 0) + { + throw std::runtime_error("Failed to open NPU device for: " + _model_path + + ", ret: " + std::to_string(ret)); + } +} + +void BulkPipelineModel::closeDevice() +{ + if (_dev) + { + putNPUdevice(_dev); + _dev = nullptr; + } +} + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/trix/ops/BulkPipelineModel.h b/runtime/onert/backend/trix/ops/BulkPipelineModel.h new file mode 100644 index 00000000000..d39a49c0a3e --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineModel.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "BulkPipelineBuffer.h" + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +class BulkPipelineModel +{ +public: + BulkPipelineModel(const std::string &model_path, int device_id); + ~BulkPipelineModel(); + + // Disallow copying + BulkPipelineModel(const BulkPipelineModel &) = delete; + BulkPipelineModel &operator=(const BulkPipelineModel &) = delete; + + bool initialize(); + bool prepare(); + void release(); + bool isPrepared() const { return _prepared; } + + void run(const std::vector &inputs, + std::vector &outputs); + + void waitForBufferReady(); + void markBufferReady(); + + const npubin_meta *metadata() const { return _meta.get(); } + uint64_t programSize() const { return _meta->program_size; } + uint64_t weightSize() const { return _meta->weight_size; } + uint32_t modelId() const { return _model_id; } + npudev_h device() const { return _dev; } + const std::string &modelPath() const { return _model_path; } + +private: + bool loadMetadata(); + void allocateBuffers(); + void fillBuffers(); + void registerModel(); + void unregisterModel(); + void openDevice(); + void closeDevice(); + +private: + std::string _model_path; + int _device_id; + std::atomic _initialized{false}; + std::atomic _prepared{false}; + + npudev_h _dev; + uint32_t _model_id{0}; + + std::unique_ptr _meta; + size_t _meta_size{0}; + FILE *_fp{nullptr}; + + std::shared_ptr _program_buffer; + std::shared_ptr _weight_buffer; + + std::mutex _buffer_mutex; + std::condition_variable _buffer_cv; + std::atomic _buffer_ready{false}; +}; + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__ diff --git a/runtime/onert/backend/trix/ops/test/BulkPipelineModel.test.cc b/runtime/onert/backend/trix/ops/test/BulkPipelineModel.test.cc new file mode 100644 index 00000000000..587b8367e0a --- /dev/null +++ b/runtime/onert/backend/trix/ops/test/BulkPipelineModel.test.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../BulkPipelineModel.h" +#include + +#include "mock_syscalls.h" + +using namespace onert::backend::trix::ops; +using namespace onert::backend::trix::ops::test; + +class BulkPipelineModelTest : public ::testing::Test +{ + +protected: + void SetUp() override + { + model = std::make_unique("model_path", 0); + + // Reset all mock syscalls before each test + MockSyscallsManager::getInstance().resetAll(); + + // Add a hook for fread() + MockSyscallsManager::getInstance().setFreadHook( + [](void *ptr, size_t size, size_t, FILE *) -> int { + if (size == NPUBIN_META_SIZE) + { + auto meta = reinterpret_cast(ptr); + meta->program_size = 1024; + meta->weight_size = 1024; + meta->size = 4096; + } + return 1; + }); + + MockSyscallsManager::getInstance().setIoctlHook( + [](int, unsigned long request, void *arg) -> int { + // Get Version + if (request == _IOR(0x88, 1, unsigned int)) + { + // Return version 3.2.X.X for trix backend sanity checking + *static_cast(arg) = 0x3020000; + } + return 0; + }); + } + + void TearDown() override + { + // Clear all mock syscalls after each test + MockSyscallsManager::getInstance().resetAll(); + } + + std::unique_ptr model; +}; + +// Mock trix-engine api +int registerNPUmodel_ext(npudev_h, generic_buffer *, generic_buffer *, generic_buffer *, + uint32_t *model_id) +{ + *model_id = 1; + return 0; +} + +int runNPU_model(npudev_h, uint32_t, npu_infer_mode, const input_buffers *, output_buffers *, + npuOutputNotify, void *) +{ + return 0; +} + +int unregisterNPUmodel(npudev_h, uint32_t) { return 0; } + +TEST_F(BulkPipelineModelTest, test_model_creation) +{ + EXPECT_TRUE(model->initialize()); + EXPECT_TRUE(model->prepare()); + + EXPECT_NE(model->metadata(), nullptr); + EXPECT_EQ(model->programSize(), 1024); + EXPECT_EQ(model->weightSize(), 1024); + EXPECT_NE(model->device(), nullptr); + EXPECT_NE(model->modelId(), 0); + EXPECT_EQ(model->modelPath(), "model_path"); +} + +TEST_F(BulkPipelineModelTest, test_model_run) +{ + EXPECT_TRUE(model->initialize()); + EXPECT_TRUE(model->prepare()); + const std::vector inputs; + std::vector outputs; + EXPECT_NO_THROW(model->run(inputs, outputs)); +} + +TEST_F(BulkPipelineModelTest, test_model_release) +{ + EXPECT_TRUE(model->initialize()); + EXPECT_TRUE(model->prepare()); + model->release(); + EXPECT_EQ(model->device(), nullptr); + EXPECT_EQ(model->modelId(), 0); + EXPECT_EQ(model->metadata(), nullptr); +} From dae2d8a494103ffe275d02b31db23c924467c0dd Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Mon, 12 Jan 2026 13:01:41 +0900 Subject: [PATCH 07/13] [onert] Add BulkPipelineManager for multi-model orchestration (#16339) Add new BulkPipelineManager class to coordinate execution of multiple models in sequence with proper resource management. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/BulkPipelineManager.cc | 175 ++++++++++++++++++ .../backend/trix/ops/BulkPipelineManager.h | 80 ++++++++ .../trix/ops/test/BulkPipelineManager.test.cc | 85 +++++++++ 3 files changed, 340 insertions(+) create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineManager.cc create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineManager.h create mode 100644 runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc new file mode 100644 index 00000000000..1109a24c55f --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BulkPipelineManager.h" + +#include +#include +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +BulkPipelineManager::BulkPipelineManager(const PipelineConfig &config) : _config(config) +{ + // DO NOTHING +} + +BulkPipelineManager::~BulkPipelineManager() { shutdown(); } + +bool BulkPipelineManager::initialize() +{ + if (_initialized.load()) + { + // Already initialized + return true; + } + + try + { + createModels(); + prepareModels(); + + _initialized = true; + return true; + } + catch (const std::exception &e) + { + std::cerr << "Failed to initialize pipeline: " + std::string(e.what()) << std::endl; + shutdown(); + return false; + } +} + +void BulkPipelineManager::shutdown() +{ + if (!_initialized.load()) + { + return; + } + + _initialized = false; + + // Wait until all executions are finished + while (_executing.load()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + // Release models and clear buffer pool + for (auto &model : _models) + { + if (model) + { + model->release(); + } + } + _models.clear(); +} + +void BulkPipelineManager::execute(const std::vector &inputs, + std::vector &outputs) +{ + if (!_initialized.load()) + { + throw std::runtime_error("Pipeline is not initialized"); + } + + if (_models.empty()) + { + throw std::runtime_error("No models in pipeline"); + } + + _executing = true; + + try + { + auto current_inputs = inputs; + auto current_outputs = outputs; + + for (size_t i = 0; i < _models.size(); ++i) + { + auto &model = _models[i]; + if (!model || !model->isPrepared()) + { + throw std::runtime_error("Model at index " + std::to_string(i) + " is not prepared"); + } + + // Wait for buffer ready before execution + model->waitForBufferReady(); + + // Execute model + model->run(current_inputs, current_outputs); + + // The input of the next model is the output of the current model + if (i < _models.size() - 1) + { + current_inputs.clear(); + for (const auto &output : current_outputs) + { + current_inputs.push_back(const_cast(output)); + } + } + } + } + catch (...) + { + _executing = false; + throw; + } + + _executing = false; +} + +void BulkPipelineManager::createModels() +{ + _models.clear(); + _models.reserve(_config.model_paths.size()); + + for (size_t i = 0; i < _config.model_paths.size(); ++i) + { + auto model = std::make_shared(_config.model_paths[i], _config.device_id); + if (!model->initialize()) + { + throw std::runtime_error("Failed to initialize model: " + model->modelPath()); + } + _models.push_back(model); + } +} + +void BulkPipelineManager::prepareModels() +{ + for (auto &model : _models) + { + if (!model->prepare()) + { + throw std::runtime_error("Failed to prepare model: " + model->modelPath()); + } + } +} + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.h b/runtime/onert/backend/trix/ops/BulkPipelineManager.h new file mode 100644 index 00000000000..8f82c2e93a3 --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPE_LINE_MANAGER_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPE_LINE_MANAGER_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "BulkPipelineModel.h" + +namespace onert +{ +namespace backend +{ +namespace trix +{ +namespace ops +{ + +class BulkPipelineManager +{ +public: + struct PipelineConfig + { + std::vector model_paths; + int device_id{0}; + }; + +public: + explicit BulkPipelineManager(const PipelineConfig &config); + ~BulkPipelineManager(); + + // Disallow copying + BulkPipelineManager(const BulkPipelineManager &) = delete; + BulkPipelineManager &operator=(const BulkPipelineManager &) = delete; + + bool initialize(); + void shutdown(); + bool isInitialized() const { return _initialized; } + + void execute(const std::vector &inputs, + std::vector &outputs); + +private: + void createModels(); + void prepareModels(); + +private: + PipelineConfig _config; + std::atomic _initialized{false}; + std::atomic _executing{false}; + + std::vector> _models; +}; + +} // namespace ops +} // namespace trix +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPE_LINE_MANAGER_H__ diff --git a/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc b/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc new file mode 100644 index 00000000000..359a0df6ec6 --- /dev/null +++ b/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../BulkPipelineManager.h" +#include + +#include "mock_syscalls.h" + +using namespace onert::backend::trix::ops; +using namespace onert::backend::trix::ops::test; + +class BulkPipelineManagerTest : public ::testing::Test +{ +protected: + void SetUp() override + { + BulkPipelineManager::PipelineConfig config; + config.device_id = 0; + config.model_paths.push_back("model_path"); + manager = std::make_unique(config); + + // Reset all mock syscalls before each test + MockSyscallsManager::getInstance().resetAll(); + + MockSyscallsManager::getInstance().setFreadHook( + [](void *ptr, size_t size, size_t, FILE *) -> int { + if (size == NPUBIN_META_SIZE) + { + auto meta = reinterpret_cast(ptr); + meta->program_size = 1024; + meta->weight_size = 1024; + meta->size = 4096; + } + return 1; + }); + + MockSyscallsManager::getInstance().setIoctlHook( + [](int, unsigned long request, void *arg) -> int { + // Get Version + if (request == _IOR(0x88, 1, unsigned int)) + { + // Return version 3.2.X.X for trix backend sanity checking + *static_cast(arg) = 0x3020000; + } + return 0; + }); + } + void TearDown() override {} + + std::unique_ptr manager; +}; + +TEST_F(BulkPipelineManagerTest, test_initilize) +{ + EXPECT_TRUE(manager->initialize()); + EXPECT_TRUE(manager->isInitialized()); +} + +TEST_F(BulkPipelineManagerTest, test_shutdown) +{ + EXPECT_TRUE(manager->initialize()); + manager->shutdown(); + EXPECT_FALSE(manager->isInitialized()); +} + +TEST_F(BulkPipelineManagerTest, test_execute) +{ + EXPECT_TRUE(manager->initialize()); + const std::vector inputs; + std::vector outputs; + EXPECT_NO_THROW(manager->execute(inputs, outputs)); +} From e37d6e7b4635fea7c767b0a5b7e558a3d3b21c90 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Tue, 13 Jan 2026 16:31:44 +0900 Subject: [PATCH 08/13] [onert] Refactor header guard and namespace formatting in BulkPipeline (#16342) This commit updates header guard names and nested namespace declarations in bulk pipeline headers. This improves code consistency and readability. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/BulkPipelineBuffer.cc | 13 ++----------- .../backend/trix/ops/BulkPipelineBuffer.h | 19 +++++-------------- .../backend/trix/ops/BulkPipelineManager.cc | 13 ++----------- .../backend/trix/ops/BulkPipelineManager.h | 19 +++++-------------- .../backend/trix/ops/BulkPipelineModel.cc | 13 ++----------- .../backend/trix/ops/BulkPipelineModel.h | 19 +++++-------------- 6 files changed, 21 insertions(+), 75 deletions(-) diff --git a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc index 7a997b95378..fc59b517756 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc @@ -23,13 +23,7 @@ #include #include -namespace onert -{ -namespace backend -{ -namespace trix -{ -namespace ops +namespace onert::backend::trix::ops { // FIXME: Using higher level API instead of raw API @@ -170,7 +164,4 @@ size_t BulkPipelineBuffer::getAlignedSize(size_t size) const return (size + _4KB_M_1) & ~_4KB_M_1; } -} // namespace ops -} // namespace trix -} // namespace backend -} // namespace onert +} // namespace onert::backend::trix::ops diff --git a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h index 30314e4b544..7170dc91cc1 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h +++ b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h @@ -14,21 +14,15 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEBUFFER_H__ -#define __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEBUFFER_H__ +#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__ #include #include #include #include -namespace onert -{ -namespace backend -{ -namespace trix -{ -namespace ops +namespace onert::backend::trix::ops { class BulkPipelineBuffer @@ -70,9 +64,6 @@ class BulkPipelineBuffer generic_buffer *_buffer{nullptr}; }; -} // namespace ops -} // namespace trix -} // namespace backend -} // namespace onert +} // namespace onert::backend::trix::ops -#endif // __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEBUFFER_H__ +#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__ diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc index 1109a24c55f..13a712a3c63 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc @@ -22,13 +22,7 @@ #include #include -namespace onert -{ -namespace backend -{ -namespace trix -{ -namespace ops +namespace onert::backend::trix::ops { BulkPipelineManager::BulkPipelineManager(const PipelineConfig &config) : _config(config) @@ -169,7 +163,4 @@ void BulkPipelineManager::prepareModels() } } -} // namespace ops -} // namespace trix -} // namespace backend -} // namespace onert +} // namespace onert::backend::trix::ops diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.h b/runtime/onert/backend/trix/ops/BulkPipelineManager.h index 8f82c2e93a3..04af5dd5255 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineManager.h +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPE_LINE_MANAGER_H__ -#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPE_LINE_MANAGER_H__ +#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_MANAGER_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_MANAGER_H__ #include #include @@ -27,13 +27,7 @@ #include #include "BulkPipelineModel.h" -namespace onert -{ -namespace backend -{ -namespace trix -{ -namespace ops +namespace onert::backend::trix::ops { class BulkPipelineManager @@ -72,9 +66,6 @@ class BulkPipelineManager std::vector> _models; }; -} // namespace ops -} // namespace trix -} // namespace backend -} // namespace onert +} // namespace onert::backend::trix::ops -#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPE_LINE_MANAGER_H__ +#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_MANAGER_H__ diff --git a/runtime/onert/backend/trix/ops/BulkPipelineModel.cc b/runtime/onert/backend/trix/ops/BulkPipelineModel.cc index ce7674c2cbf..91b2d5ea1c3 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineModel.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineModel.cc @@ -20,13 +20,7 @@ #include #include -namespace onert -{ -namespace backend -{ -namespace trix -{ -namespace ops +namespace onert::backend::trix::ops { BulkPipelineModel::BulkPipelineModel(const std::string &model_path, int device_id) @@ -275,7 +269,4 @@ void BulkPipelineModel::closeDevice() } } -} // namespace ops -} // namespace trix -} // namespace backend -} // namespace onert +} // namespace onert::backend::trix::ops diff --git a/runtime/onert/backend/trix/ops/BulkPipelineModel.h b/runtime/onert/backend/trix/ops/BulkPipelineModel.h index d39a49c0a3e..a3f9474429d 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineModel.h +++ b/runtime/onert/backend/trix/ops/BulkPipelineModel.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__ -#define __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__ +#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_MODEL_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_MODEL_H__ #include #include @@ -29,13 +29,7 @@ #include "BulkPipelineBuffer.h" -namespace onert -{ -namespace backend -{ -namespace trix -{ -namespace ops +namespace onert::backend::trix::ops { class BulkPipelineModel @@ -96,9 +90,6 @@ class BulkPipelineModel std::atomic _buffer_ready{false}; }; -} // namespace ops -} // namespace trix -} // namespace backend -} // namespace onert +} // namespace onert::backend::trix::ops -#endif // __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__ +#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_MODEL_H__ From d3587cc6c04fde796827f4794f6b4af695cad84a Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Wed, 14 Jan 2026 16:51:47 +0900 Subject: [PATCH 09/13] [circle-schema] Add MX dtype restrictions comment (#16344) This commit adds comprehensive comments documenting current restrictions for MX dtypes (MXFP4, MXINT8) in the circle schema. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh --- nnpackage/schema/circle_schema.fbs | 7 +++++++ res/CircleSchema/0.10/circle_schema.fbs | 7 +++++++ runtime/libs/circle-schema/circle_schema.fbs | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs index 5584de0afa3..0f682d38f40 100644 --- a/nnpackage/schema/circle_schema.fbs +++ b/nnpackage/schema/circle_schema.fbs @@ -87,6 +87,13 @@ enum TensorType : byte { GGML_Q8_1 = -5, // MX dtypes + // Current restrictions of MX dtypes + // - MX dtypes are not used for model I/O + // - MX dtypes are used for activations, not for constant inputs (ex. weight) + // - MX dtype's parameters (block size, exponent scale, etc) follows + // OCP Microscaling Formats Specification + // - Model does not have exponent scale data. + // Backend should define and use internally if needed MXFP4 = -6, MXINT8 = -7, } diff --git a/res/CircleSchema/0.10/circle_schema.fbs b/res/CircleSchema/0.10/circle_schema.fbs index 5584de0afa3..0f682d38f40 100644 --- a/res/CircleSchema/0.10/circle_schema.fbs +++ b/res/CircleSchema/0.10/circle_schema.fbs @@ -87,6 +87,13 @@ enum TensorType : byte { GGML_Q8_1 = -5, // MX dtypes + // Current restrictions of MX dtypes + // - MX dtypes are not used for model I/O + // - MX dtypes are used for activations, not for constant inputs (ex. weight) + // - MX dtype's parameters (block size, exponent scale, etc) follows + // OCP Microscaling Formats Specification + // - Model does not have exponent scale data. + // Backend should define and use internally if needed MXFP4 = -6, MXINT8 = -7, } diff --git a/runtime/libs/circle-schema/circle_schema.fbs b/runtime/libs/circle-schema/circle_schema.fbs index 5584de0afa3..0f682d38f40 100644 --- a/runtime/libs/circle-schema/circle_schema.fbs +++ b/runtime/libs/circle-schema/circle_schema.fbs @@ -87,6 +87,13 @@ enum TensorType : byte { GGML_Q8_1 = -5, // MX dtypes + // Current restrictions of MX dtypes + // - MX dtypes are not used for model I/O + // - MX dtypes are used for activations, not for constant inputs (ex. weight) + // - MX dtype's parameters (block size, exponent scale, etc) follows + // OCP Microscaling Formats Specification + // - Model does not have exponent scale data. + // Backend should define and use internally if needed MXFP4 = -6, MXINT8 = -7, } From bbd17bd24bd01cb1d30322ea90803d582c63c88e Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 15 Jan 2026 07:57:28 +0900 Subject: [PATCH 10/13] [onert] Add BulkPipelineLayer implementation for trix backend (#16343) It adds new BulkPipelineLayer class to handle bulk pipeline operations in the trix backend. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/BulkPipelineLayer.cc | 73 +++++++++++++++++++ .../backend/trix/ops/BulkPipelineLayer.h | 53 ++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineLayer.cc create mode 100644 runtime/onert/backend/trix/ops/BulkPipelineLayer.h diff --git a/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc b/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc new file mode 100644 index 00000000000..dfdd95f32d3 --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BulkPipelineLayer.h" + +#include +#include + +namespace onert::backend::trix::ops +{ + +BulkPipelineLayer::BulkPipelineLayer() : _inputs(), _outputs() +{ + // DO NOTHING +} + +BulkPipelineLayer::~BulkPipelineLayer() +{ + // DO NOTHING - _pipeline_manager will be automatically cleaned up by unique_ptr +} + +void BulkPipelineLayer::configure(const std::vector &inputs, + std::vector &outputs, + const std::vector &binary_path) +{ + _inputs = inputs; + _outputs = outputs; + + // Configure BulkPipeLineManager + BulkPipelineManager::PipelineConfig config; + config.model_paths = binary_path; + config.device_id = 0; // default device id = 0 + + _pipeline_manager = std::make_unique(config); + + if (!_pipeline_manager->initialize()) + { + throw std::runtime_error("Failed to initialize BulkPipelineManager"); + } +} + +void BulkPipelineLayer::run() +{ + try + { + _pipeline_manager->execute(_inputs, _outputs); + } + catch (const std::exception &e) + { + std::cerr << "BulkPipelineLayer execution failed: " << e.what() << std::endl; + throw; + } +} + +void BulkPipelineLayer::prepare() +{ + // DO NOTHING +} + +} // namespace onert::backend::trix::ops diff --git a/runtime/onert/backend/trix/ops/BulkPipelineLayer.h b/runtime/onert/backend/trix/ops/BulkPipelineLayer.h new file mode 100644 index 00000000000..63f78bd2bb4 --- /dev/null +++ b/runtime/onert/backend/trix/ops/BulkPipelineLayer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_LAYER_H__ +#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_LAYER_H__ + +#include +#include "../DevContext.h" +#include +#include "BulkPipelineManager.h" + +namespace onert::backend::trix::ops +{ + +class BulkPipelineLayer : public ::onert::exec::IFunction +{ +public: + BulkPipelineLayer(); + ~BulkPipelineLayer() override; + +public: + void configure(const std::vector &inputs, + std::vector &outputs, + const std::vector &binary_path); + + void run() override; + + void prepare() override; + +private: + std::vector _inputs; + std::vector _outputs; + + // Pipeline manager + std::unique_ptr _pipeline_manager; +}; + +} // namespace onert::backend::trix::ops + +#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_LAYER_H__ From 6816ad743b51a77a301f8652a662c27ff264d52b Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Tue, 13 Jan 2026 15:45:34 +0900 Subject: [PATCH 11/13] [onert] Implement pipeline execution for bulk operations This replaces the previous NYI exception with actual pipeline execution functionality for the trix backend. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- runtime/onert/backend/trix/KernelGenerator.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc index 9a190685a4a..c5c85a669da 100644 --- a/runtime/onert/backend/trix/KernelGenerator.cc +++ b/runtime/onert/backend/trix/KernelGenerator.cc @@ -16,6 +16,7 @@ #include "KernelGenerator.h" +#include "ops/BulkPipelineLayer.h" #include "ops/BulkLayer.h" #include @@ -75,8 +76,10 @@ void KernelGenerator::visit(const ir::operation::Bulk &node) } else { - // TODO: Implement multiple model execution - throw std::runtime_error("NYI: multiple model execution"); + // For pipeline execution (multiple models) + auto fn = std::make_unique(); + fn->configure(input_tensors, output_tensors, binary_path); + _return_fn = std::move(fn); } } From 1dd4da229ae3074dc7c151e22a2c945eb3bf093d Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Tue, 13 Jan 2026 15:47:10 +0900 Subject: [PATCH 12/13] [onert] Implement buffer sharing optimization for BulkPipeline This commit adds buffer sharing mechanism to reduce memory usage in bulk pipeline execution. Link models for async buffer preparation and optimize execution performance when models have identical program and weight sizes. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/BulkPipelineLayer.cc | 3 +- .../backend/trix/ops/BulkPipelineManager.cc | 60 ++++++++++++++ .../backend/trix/ops/BulkPipelineManager.h | 3 + .../backend/trix/ops/BulkPipelineModel.cc | 78 ++++++++++++++++--- .../backend/trix/ops/BulkPipelineModel.h | 21 ++++- 5 files changed, 154 insertions(+), 11 deletions(-) diff --git a/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc b/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc index dfdd95f32d3..1d8b1a0f958 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc @@ -42,7 +42,8 @@ void BulkPipelineLayer::configure(const std::vector &in // Configure BulkPipeLineManager BulkPipelineManager::PipelineConfig config; config.model_paths = binary_path; - config.device_id = 0; // default device id = 0 + config.device_id = 0; // default device id = 0 + config.n_owner_models = 2; // Use 2 owner models for buffer sharing _pipeline_manager = std::make_unique(config); diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc index 13a712a3c63..92ec79eea01 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc @@ -44,6 +44,7 @@ bool BulkPipelineManager::initialize() { createModels(); prepareModels(); + linkModels(); _initialized = true; return true; @@ -125,6 +126,15 @@ void BulkPipelineManager::execute(const std::vector &in current_inputs.push_back(const_cast(output)); } } + + // Prepare next shared neighbor model + if (_use_buffer_sharing) + { + if (auto next = model->getNextModel()) + { + next->startAsyncBufferFill(); + } + } } } catch (...) @@ -150,6 +160,56 @@ void BulkPipelineManager::createModels() } _models.push_back(model); } + + auto first_values = + std::pair{_models.front()->programSize(), _models.front()->weightSize()}; + _use_buffer_sharing = + std::all_of(_models.begin(), _models.end(), [first_values](const auto &model) { + return model->programSize() == first_values.first && + model->weightSize() == first_values.second; + }); + + if (_use_buffer_sharing) + { + int model_idx = 0; + for (auto model : _models) + { + if (model_idx++ < _config.n_owner_models) + { + // First n_shared_owner_models models are OWNERS + continue; + } + + // Other models are SHARED + model->setBufferOwnership(BulkPipelineModel::BufferOwnership::SHARED); + } + } +} + +void BulkPipelineManager::linkModels() +{ + // If models are not shared, no need to link them + if (!_use_buffer_sharing) + return; + + for (size_t i = 0; i < _models.size(); ++i) + { + if (i + _config.n_owner_models < _models.size()) + { + _models[i]->setNextModel(_models[i + _config.n_owner_models]); + } + else + { + _models[i]->setNextModel(nullptr); + } + + // Shared models share buffers from owners in buffer pool + if (_models[i]->ownership() == BulkPipelineModel::BufferOwnership::SHARED) + { + size_t owner_index = i % _config.n_owner_models; + _models[i]->shareBuffersFrom(*_models[owner_index]); + } + } } void BulkPipelineManager::prepareModels() diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.h b/runtime/onert/backend/trix/ops/BulkPipelineManager.h index 04af5dd5255..7ee64a40168 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineManager.h +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.h @@ -37,6 +37,7 @@ class BulkPipelineManager { std::vector model_paths; int device_id{0}; + int n_owner_models{2}; // number of models that share the buffers }; public: @@ -56,11 +57,13 @@ class BulkPipelineManager private: void createModels(); + void linkModels(); void prepareModels(); private: PipelineConfig _config; std::atomic _initialized{false}; + std::atomic _use_buffer_sharing; std::atomic _executing{false}; std::vector> _models; diff --git a/runtime/onert/backend/trix/ops/BulkPipelineModel.cc b/runtime/onert/backend/trix/ops/BulkPipelineModel.cc index 91b2d5ea1c3..3e716fd6d08 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineModel.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineModel.cc @@ -23,8 +23,9 @@ namespace onert::backend::trix::ops { -BulkPipelineModel::BulkPipelineModel(const std::string &model_path, int device_id) - : _model_path(model_path), _device_id(device_id) +BulkPipelineModel::BulkPipelineModel(const std::string &model_path, int device_id, + BufferOwnership ownership) + : _model_path(model_path), _device_id(device_id), _ownership(ownership) { // DO NOTHING } @@ -56,11 +57,14 @@ bool BulkPipelineModel::prepare() try { - openDevice(); - allocateBuffers(); - fillBuffers(); - markBufferReady(); - registerModel(); + if (_ownership == BufferOwnership::OWNER) + { + openDevice(); + allocateBuffers(); + fillBuffers(); + markBufferReady(); + registerModel(); + } _prepared = true; return true; @@ -80,8 +84,17 @@ void BulkPipelineModel::release() return; } - unregisterModel(); - closeDevice(); + // Cancel a asynchronous job + if (_async_fill_future.valid()) + { + _async_fill_future.wait(); + } + + if (_ownership == BufferOwnership::OWNER) + { + unregisterModel(); + closeDevice(); + } if (_fp) { @@ -142,6 +155,32 @@ void BulkPipelineModel::run(const std::vector &inputs, } } +void BulkPipelineModel::shareBuffersFrom(const BulkPipelineModel &owner) +{ + if (_ownership == BufferOwnership::OWNER) + { + throw std::runtime_error("Cannot share buffers with owner model: " + _model_path); + } + + if (!owner.isPrepared()) + { + throw std::runtime_error("Owner model is not prepared: " + owner.modelPath()); + } + + // Sharing the buffers + _program_buffer = owner._program_buffer; + _weight_buffer = owner._weight_buffer; + + // Sharing the device and model id + _dev = owner.device(); + _model_id = owner.modelId(); +} + +void BulkPipelineModel::setNextModel(std::shared_ptr next) +{ + _next_model = next; +} + void BulkPipelineModel::waitForBufferReady() { std::unique_lock lock(_buffer_mutex); @@ -157,6 +196,22 @@ void BulkPipelineModel::markBufferReady() _buffer_cv.notify_all(); } +void BulkPipelineModel::startAsyncBufferFill() +{ + _buffer_ready = false; + _async_fill_future = std::async(std::launch::async, [this] { + try + { + fillBuffers(); + markBufferReady(); + } + catch (const std::exception &e) + { + std::cerr << "Failed to fill buffers asynchronously: " << e.what() << std::endl; + } + }); +} + bool BulkPipelineModel::loadMetadata() { _fp = fopen(_model_path.c_str(), "rb"); @@ -179,6 +234,11 @@ bool BulkPipelineModel::loadMetadata() void BulkPipelineModel::allocateBuffers() { + if (_ownership != BufferOwnership::OWNER) + { + throw std::runtime_error("Not allowed to allocate buffers for non-owner model: " + _model_path); + } + if (!_meta) { throw std::runtime_error("Metadata not loaded for: " + _model_path); diff --git a/runtime/onert/backend/trix/ops/BulkPipelineModel.h b/runtime/onert/backend/trix/ops/BulkPipelineModel.h index a3f9474429d..9f0e7ea09e6 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineModel.h +++ b/runtime/onert/backend/trix/ops/BulkPipelineModel.h @@ -35,7 +35,15 @@ namespace onert::backend::trix::ops class BulkPipelineModel { public: - BulkPipelineModel(const std::string &model_path, int device_id); + enum class BufferOwnership + { + OWNER, + SHARED + }; + +public: + BulkPipelineModel(const std::string &model_path, int device_id, + BufferOwnership ownership = BufferOwnership::OWNER); ~BulkPipelineModel(); // Disallow copying @@ -50,8 +58,14 @@ class BulkPipelineModel void run(const std::vector &inputs, std::vector &outputs); + void shareBuffersFrom(const BulkPipelineModel &owner); + void setNextModel(std::shared_ptr next); + std::shared_ptr getNextModel() { return _next_model; }; + void setBufferOwnership(BufferOwnership ownership) { _ownership = ownership; } + void waitForBufferReady(); void markBufferReady(); + void startAsyncBufferFill(); const npubin_meta *metadata() const { return _meta.get(); } uint64_t programSize() const { return _meta->program_size; } @@ -59,6 +73,7 @@ class BulkPipelineModel uint32_t modelId() const { return _model_id; } npudev_h device() const { return _dev; } const std::string &modelPath() const { return _model_path; } + BufferOwnership ownership() const { return _ownership; } private: bool loadMetadata(); @@ -72,6 +87,7 @@ class BulkPipelineModel private: std::string _model_path; int _device_id; + BufferOwnership _ownership; std::atomic _initialized{false}; std::atomic _prepared{false}; @@ -85,9 +101,12 @@ class BulkPipelineModel std::shared_ptr _program_buffer; std::shared_ptr _weight_buffer; + std::shared_ptr _next_model; + std::mutex _buffer_mutex; std::condition_variable _buffer_cv; std::atomic _buffer_ready{false}; + std::future _async_fill_future; }; } // namespace onert::backend::trix::ops From 784f6ea9b843169bc3001903102dc69cd3687cb0 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Tue, 13 Jan 2026 15:36:39 +0900 Subject: [PATCH 13/13] [onert] Add model verification for I/O consistency This commit adds verification step to ensure loaded models have matching input/output counts with the pipeline configuration. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee --- .../backend/trix/ops/BulkPipelineLayer.cc | 2 + .../backend/trix/ops/BulkPipelineManager.cc | 14 +++++++ .../backend/trix/ops/BulkPipelineManager.h | 3 ++ .../trix/ops/test/BulkPipelineManager.test.cc | 41 ++++++++++++++++++- 4 files changed, 59 insertions(+), 1 deletion(-) diff --git a/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc b/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc index 1d8b1a0f958..9f30bdb96ad 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineLayer.cc @@ -44,6 +44,8 @@ void BulkPipelineLayer::configure(const std::vector &in config.model_paths = binary_path; config.device_id = 0; // default device id = 0 config.n_owner_models = 2; // Use 2 owner models for buffer sharing + config.n_inputs = inputs.size(); + config.n_outputs = outputs.size(); _pipeline_manager = std::make_unique(config); diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc index 92ec79eea01..4ddd0a26fa8 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineManager.cc +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.cc @@ -43,6 +43,7 @@ bool BulkPipelineManager::initialize() try { createModels(); + verifyModels(); prepareModels(); linkModels(); @@ -212,6 +213,19 @@ void BulkPipelineManager::linkModels() } } +void BulkPipelineManager::verifyModels() +{ + for (auto &model : _models) + { + if ((static_cast(model->metadata()->input_seg_num) != _config.n_inputs) || + (static_cast(model->metadata()->output_seg_num) != _config.n_outputs)) + { + throw std::runtime_error("Model " + model->modelPath() + + " has different number of inputs/outputs"); + } + } +} + void BulkPipelineManager::prepareModels() { for (auto &model : _models) diff --git a/runtime/onert/backend/trix/ops/BulkPipelineManager.h b/runtime/onert/backend/trix/ops/BulkPipelineManager.h index 7ee64a40168..6801b26bf97 100644 --- a/runtime/onert/backend/trix/ops/BulkPipelineManager.h +++ b/runtime/onert/backend/trix/ops/BulkPipelineManager.h @@ -38,6 +38,8 @@ class BulkPipelineManager std::vector model_paths; int device_id{0}; int n_owner_models{2}; // number of models that share the buffers + uint32_t n_inputs{1}; + uint32_t n_outputs{1}; }; public: @@ -59,6 +61,7 @@ class BulkPipelineManager void createModels(); void linkModels(); void prepareModels(); + void verifyModels(); private: PipelineConfig _config; diff --git a/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc b/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc index 359a0df6ec6..8f1811c8fb1 100644 --- a/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc +++ b/runtime/onert/backend/trix/ops/test/BulkPipelineManager.test.cc @@ -29,12 +29,23 @@ class BulkPipelineManagerTest : public ::testing::Test { BulkPipelineManager::PipelineConfig config; config.device_id = 0; - config.model_paths.push_back("model_path"); + config.model_paths.push_back("model_path_0"); + config.model_paths.push_back("model_path_1"); + config.n_inputs = 0; + config.n_outputs = 0; manager = std::make_unique(config); // Reset all mock syscalls before each test MockSyscallsManager::getInstance().resetAll(); + MockSyscallsManager::getInstance().setFopenHook([](const char *path, const char *) -> FILE * { + if (strcmp(path, "model_path_0") == 0) + { + return (FILE *)1; + } + return (FILE *)2; + }); + MockSyscallsManager::getInstance().setFreadHook( [](void *ptr, size_t size, size_t, FILE *) -> int { if (size == NPUBIN_META_SIZE) @@ -43,6 +54,8 @@ class BulkPipelineManagerTest : public ::testing::Test meta->program_size = 1024; meta->weight_size = 1024; meta->size = 4096; + meta->input_seg_num = 0; + meta->output_seg_num = 0; } return 1; }); @@ -83,3 +96,29 @@ TEST_F(BulkPipelineManagerTest, test_execute) std::vector outputs; EXPECT_NO_THROW(manager->execute(inputs, outputs)); } + +TEST_F(BulkPipelineManagerTest, test_verify_models) +{ + MockSyscallsManager::getInstance().clearFreadHook(); + MockSyscallsManager::getInstance().setFreadHook( + [](void *ptr, size_t size, size_t, FILE *fp) -> int { + if (size == NPUBIN_META_SIZE) + { + auto meta = reinterpret_cast(ptr); + meta->program_size = 1024; + meta->weight_size = 1024; + meta->size = 4096; + meta->input_seg_num = 0; + if (fp == (FILE *)1) + { + meta->output_seg_num = 1; + } + else + { + meta->output_seg_num = 0; + } + } + return 1; + }); + EXPECT_FALSE(manager->initialize()); +}