batcheu · batcheu · Dec 3, 2025 · Dec 4, 2025 · Dec 8, 2025 · Dec 19, 2025
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py
@@ -119,6 +119,7 @@
         if output_details["dtype"] == np.uint8:
             if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
                            atol=atolint) == False:
+                print("input data", input_data)
                 print("intp_output_data", intp_output_data)
                 print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
@@ -127,6 +128,7 @@
         elif output_details["dtype"] == np.float32:
             if np.allclose(luci_output_data, intp_output_data, rtol=rtolf32,
                            atol=atolf32) == False:
+                print("input data", input_data)
                 print("intp_output_data", intp_output_data)
                 print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
@@ -135,6 +137,7 @@
         elif output_details["dtype"] == np.int64:
             if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
                            atol=atolint) == False:
+                print("input data", input_data)
                 print("intp_output_data", intp_output_data)
                 print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
@@ -143,6 +146,7 @@
         elif output_details["dtype"] == np.int32:
             if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
                            atol=atolint) == False:
+                print("input data", input_data)
                 print("intp_output_data", intp_output_data)
                 print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
@@ -151,13 +155,15 @@
         elif output_details["dtype"] == np.int16:
             if np.allclose(luci_output_data, intp_output_data, rtol=rtolint,
                            atol=atolint) == False:
+                print("input data", input_data)
                 print("intp_output_data", intp_output_data)
                 print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "int16"
         elif output_details["dtype"] == np.bool_:
             if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                print("input data", input_data)
                 print("intp_output_data", intp_output_data)
                 print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +

diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst
@@ -90,7 +90,7 @@ addeval(MaxPool2D_000)
 addeval(MaxPool2D_U8_000)
 addeval(Mean_000)
 addeval(Mean_001)
-addeval(Mean_U8_000)
+#addeval(Mean_U8_000) --> test with tolerance
 addeval(Minimum_000)
 #addeval(MirrorPad_000)
 addeval(Mul_000)
@@ -207,3 +207,4 @@ addevaltol(SVDF_001 8e-3 8e-3)
 addevaltol(Conv2D_U8_000 5 5)
 # refer https://github.com/Samsung/ONE/issues/10438
 addevaltol(YUV_TO_RGB_U8_000 1 1)
+addevaltol(Mean_U8_000 8e-3 1)
diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs
@@ -87,6 +87,13 @@ enum TensorType : byte {
   GGML_Q8_1 = -5,
 
   // MX dtypes
+  // Current restrictions of MX dtypes
+  // - MX dtypes are not used for model I/O
+  // - MX dtypes are used for activations, not for constant inputs (ex. weight)
+  // - MX dtype's parameters (block size, exponent scale, etc) follows
+  //   OCP Microscaling Formats Specification
+  // - Model does not have exponent scale data.
+  //   Backend should define and use internally if needed
   MXFP4 = -6,
   MXINT8 = -7,
 }

diff --git a/res/CircleSchema/0.10/circle_schema.fbs b/res/CircleSchema/0.10/circle_schema.fbs
@@ -87,6 +87,13 @@ enum TensorType : byte {
   GGML_Q8_1 = -5,
 
   // MX dtypes
+  // Current restrictions of MX dtypes
+  // - MX dtypes are not used for model I/O
+  // - MX dtypes are used for activations, not for constant inputs (ex. weight)
+  // - MX dtype's parameters (block size, exponent scale, etc) follows
+  //   OCP Microscaling Formats Specification
+  // - Model does not have exponent scale data.
+  //   Backend should define and use internally if needed
   MXFP4 = -6,
   MXINT8 = -7,
 }

diff --git a/runtime/libs/circle-schema/circle_schema.fbs b/runtime/libs/circle-schema/circle_schema.fbs
@@ -87,6 +87,13 @@ enum TensorType : byte {
   GGML_Q8_1 = -5,
 
   // MX dtypes
+  // Current restrictions of MX dtypes
+  // - MX dtypes are not used for model I/O
+  // - MX dtypes are used for activations, not for constant inputs (ex. weight)
+  // - MX dtype's parameters (block size, exponent scale, etc) follows
+  //   OCP Microscaling Formats Specification
+  // - Model does not have exponent scale data.
+  //   Backend should define and use internally if needed
   MXFP4 = -6,
   MXINT8 = -7,
 }

diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
@@ -9,6 +9,9 @@ message(STATUS "ONERT backend: Found TRIXEngine")
 
 file(GLOB_RECURSE SOURCES "*.cc")
 
+file(GLOB_RECURSE TESTS "*.test.cc")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
 add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES})
 
 target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core)
@@ -24,3 +27,23 @@ set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES
   INSTALL_RPATH ${ONERT_RPATH_PLUGIN})
 
 install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION ${ONERT_INSTALL_BACKENDDIR})
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_TRIX_BACKEND test_onert_trix_backend)
+
+add_executable(${TEST_ONERT_TRIX_BACKEND} ${TESTS})
+target_link_libraries(${TEST_ONERT_TRIX_BACKEND} onert_core)
+target_link_libraries(${TEST_ONERT_TRIX_BACKEND} trix-engine)
+target_link_libraries(${TEST_ONERT_TRIX_BACKEND} ${LIB_ONERT_BACKEND_TRIX})
+target_link_libraries(${TEST_ONERT_TRIX_BACKEND} nnfw_common)
+target_link_libraries(${TEST_ONERT_TRIX_BACKEND} nnfw_coverage)
+target_link_libraries(${TEST_ONERT_TRIX_BACKEND} gtest gtest_main Threads::Threads)
+set_target_properties(${TEST_ONERT_TRIX_BACKEND} PROPERTIES
+INSTALL_RPATH "$ORIGIN/../${ONERT_INSTALL_COREDIR}:$ORIGIN/../${ONERT_INSTALL_BACKENDDIR}")
+
+add_test(${TEST_ONERT_TRIX_BACKEND} ${TEST_ONERT_TRIX_BACKEND})
+install(TARGETS ${TEST_ONERT_TRIX_BACKEND} DESTINATION unittest)
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -16,6 +16,7 @@
 
 #include "KernelGenerator.h"
 
+#include "ops/BulkPipelineLayer.h"
 #include "ops/BulkLayer.h"
 
 #include <backend/Backend.h>
@@ -75,8 +76,10 @@ void KernelGenerator::visit(const ir::operation::Bulk &node)
   }
   else
   {
-    // TODO: Implement multiple model execution
-    throw std::runtime_error("NYI: multiple model execution");
+    // For pipeline execution (multiple models)
+    auto fn = std::make_unique<ops::BulkPipelineLayer>();
+    fn->configure(input_tensors, output_tensors, binary_path);
+    _return_fn = std::move(fn);
   }
 }
 

diff --git a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.cc
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BulkPipelineBuffer.h"
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <cstring>
+#include <iostream>
+
+namespace onert::backend::trix::ops
+{
+
+// FIXME: Using higher level API instead of raw API
+struct trix_ioctl_hwmem
+{
+  int32_t type;
+  uint64_t size;
+  int32_t dbuf_fd;
+} __attribute__((packed));
+
+#define TRIX_IOCTL_HWMEM_ALLOC _IOW(136, 21, struct trix_ioctl_hwmem)
+#define TRIX_IOCTL_HWMEM_DEALLOC _IOW(136, 22, struct trix_ioctl_hwmem)
+
+BulkPipelineBuffer::BulkPipelineBuffer(BufferType type, size_t size, int device_id)
+  : _type(type), _size(size), _device_id(device_id)
+{
+  // DO NOTHING
+}
+
+BulkPipelineBuffer::~BulkPipelineBuffer() { deallocate(); }
+
+size_t BulkPipelineBuffer::size() const { return _buffer ? _buffer->size : 0; }
+
+bool BulkPipelineBuffer::isReady() const { return _buffer && _buffer->addr != nullptr; }
+
+void BulkPipelineBuffer::allocate()
+{
+  if (_buffer && _buffer->addr != nullptr)
+  {
+    // Already allocated
+    return;
+  }
+
+  if (!_buffer)
+  {
+    _buffer = new generic_buffer{};
+  }
+
+  // Open the device
+  char devname[16];
+  snprintf(devname, sizeof(devname), "/dev/triv2-%d", _device_id);
+  _dev_fd = open(devname, O_RDWR);
+  if (_dev_fd < 0)
+  {
+    throw std::runtime_error("Failed to open NPU device: " + std::string(devname));
+  }
+
+  // Allocate a buffer
+  struct trix_ioctl_hwmem hwmem;
+  hwmem.type = (_type == BufferType::DMABUF_CONT) ? 0 : 1;
+  hwmem.size = getAlignedSize(_size);
+
+  _buffer->dmabuf = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
+  if (_buffer->dmabuf < 0)
+  {
+    close(_dev_fd);
+    _dev_fd = -1;
+    throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
+  }
-  _buffer->dmabuf = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
-  if (_buffer->dmabuf < 0)
-  {
-    close(_dev_fd);
-    _dev_fd = -1;
-    throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
-  }
+  int ret = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
+  if (ret < 0)
+  {
+    close(_dev_fd);
+    _dev_fd = -1;
+    throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
+  }
+  _buffer->dmabuf = hwmem.dbuf_fd;
-  _buffer->dmabuf = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
-  if (_buffer->dmabuf < 0)
-  {
-    close(_dev_fd);
-    _dev_fd = -1;
-    throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
-  }
+  int ret = ioctl(_dev_fd, TRIX_IOCTL_HWMEM_ALLOC, &hwmem);
+  if (ret < 0)
+  {
+    close(_dev_fd);
+    _dev_fd = -1;
+    throw std::runtime_error("Failed to allocate DMA buffer, size: " + std::to_string(hwmem.size));
+  }
+  _buffer->dmabuf = hwmem.dbuf_fd;
+
+  // Mapping the buffer
+  _buffer->addr = mmap(nullptr, hwmem.size, PROT_READ | PROT_WRITE, MAP_SHARED, _buffer->dmabuf, 0);
+  if (_buffer->addr == MAP_FAILED)
+  {
+    close(_buffer->dmabuf);
+    close(_dev_fd);
+    _buffer->dmabuf = -1;
+    _dev_fd = -1;
+    _buffer->addr = nullptr;
+    throw std::runtime_error("Failed to mmap DMA buffer");
+  }
+
+  _buffer->size = _size;
+  _buffer->type = BUFFER_DMABUF;
+}
+
+void BulkPipelineBuffer::deallocate()
+{
+  if (!_buffer)
+  {
+    return;
+  }
+
+  if (_buffer->addr != nullptr)
+  {
+    size_t aligned_sz = getAlignedSize(_buffer->size);
+    munmap(_buffer->addr, aligned_sz);
+    _buffer->addr = nullptr;
+  }
+
+  if (_buffer->dmabuf >= 0)
+  {
+    struct trix_ioctl_hwmem hwmem;
+    hwmem.dbuf_fd = _buffer->dmabuf;
+    ioctl(_dev_fd, TRIX_IOCTL_HWMEM_DEALLOC, &hwmem);
+    close(_buffer->dmabuf);
+    _buffer->dmabuf = -1;
+  }
+
+  if (_dev_fd >= 0)
+  {
+    close(_dev_fd);
+    _dev_fd = -1;
+  }
+
+  delete _buffer;
+  _buffer = nullptr;
+}
+
+void BulkPipelineBuffer::fillFromFile(FILE *fp, size_t offset)
+{
+  if (!isReady())
+  {
+    throw std::runtime_error("Buffer is not allocated");
+  }
+
+  if (!fp)
+  {
+    throw std::runtime_error("Invalid file pointer");
+  }
+
+  if (fseek(fp, static_cast<long>(offset), SEEK_SET) != 0)
+  {
+    throw std::runtime_error("Failed to seek file to offset: " + std::to_string(offset));
+  }
+
+  if (fread(_buffer->addr, _buffer->size, 1, fp) != 1)
+  {
+    throw std::runtime_error("Failed to read " + std::to_string(_buffer->size) +
+                             " bytes from file");
+  }
+}
+
+size_t BulkPipelineBuffer::getAlignedSize(size_t size) const
+{
+  // 4 KB (= Page size) aligned size
+  constexpr size_t _4KB_M_1 = (1 << 12) - 1;
+  return (size + _4KB_M_1) & ~_4KB_M_1;
+}
+
+} // namespace onert::backend::trix::ops
diff --git a/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h b/runtime/onert/backend/trix/ops/BulkPipelineBuffer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__
+#define __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__
+
+#include <memory>
+#include <cstdio>
+#include <stdexcept>
+#include <libnpuhost.h>
+
+namespace onert::backend::trix::ops
+{
+
+class BulkPipelineBuffer
+{
+public:
+  enum class BufferType
+  {
+    DMABUF_CONT, // Contiguous DMA buffer
+    DMABUF_IOMMU // IOMMU DMA buffer
+  };
+
+public:
+  BulkPipelineBuffer(BufferType type, size_t size, int device_id);
+  ~BulkPipelineBuffer();
+
+  // Disallow copying
+  BulkPipelineBuffer(const BulkPipelineBuffer &) = delete;
+  BulkPipelineBuffer &operator=(const BulkPipelineBuffer &) = delete;
+
+  // Buffer management functions
+  void allocate();
+  void deallocate();
+  size_t size() const;
+
+  generic_buffer *getGenericBuffer() { return _buffer; }
+
+  // Data manipulation functions
+  void fillFromFile(FILE *fp, size_t offset = 0);
+  bool isReady() const;
+
+private:
+  size_t getAlignedSize(size_t size) const;
+
+private:
+  BufferType _type;
+  size_t _size;
+  int _device_id;
+  int _dev_fd{-1};
+  generic_buffer *_buffer{nullptr};
+};
+
+} // namespace onert::backend::trix::ops
+
+#endif // __ONERT_BACKEND_TRIX_OPS_BULK_PIPELINE_BUFFER_H__