From 51bf88d9ad63930e5637a77f09ac50c95630929f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:09:44 -0500 Subject: [PATCH 01/15] __init__: import warn once at the top --- pyopencl/__init__.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 6420e5afb..016ad4d82 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -21,6 +21,7 @@ """ from sys import intern +from warnings import warn from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa @@ -43,7 +44,6 @@ import os from os.path import dirname, join, realpath if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)): - from warnings import warn warn("It looks like you are importing PyOpenCL from " "its source directory. This likely won't work.") raise @@ -267,7 +267,6 @@ class CommandQueueUsedAfterExit(UserWarning): def compiler_output(text): import os - from warnings import warn if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): warn(text, CompilerWarning) else: @@ -389,7 +388,6 @@ def enable_debugging(platform_or_context): import os os.environ["CPU_MAX_COMPUTE_UNITS"] = "1" else: - from warnings import warn warn("do not know how to enable debugging on '%s'" % platform.name) @@ -428,7 +426,6 @@ def _get_prg(self): return self._prg else: # "no program" can only happen in from-source case. - from warnings import warn warn("Pre-build attribute access defeats compiler caching.", stacklevel=3) @@ -662,7 +659,6 @@ def device_hashable_model_and_version_identifier(self): return ("v1", self.vendor, self.vendor_id, self.name, self.version) def device_persistent_unique_id(self): - from warnings import warn warn("Device.persistent_unique_id is deprecated. " "Use Device.hashable_model_and_version_identifier instead.", DeprecationWarning, stacklevel=2) @@ -684,7 +680,6 @@ def device_persistent_unique_id(self): def context_init(self, devices, properties, dev_type, cache_dir=None): if cache_dir is not None: - from warnings import warn warn("The 'cache_dir' argument to the Context constructor " "is deprecated and no longer has an effect. " "It was removed because it only applied to the wrapper " @@ -970,7 +965,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None, if hostbuf is not None and not \ (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): - from warnings import warn warn("'hostbuf' was passed, but no memory flags to make use of it.") if hostbuf is None and pitches is not None: @@ -1043,7 +1037,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None, class _ImageInfoGetter: def __init__(self, event): - from warnings import warn warn("Image.image.attr is deprecated and will go away in 2021. " "Use Image.attr directly, instead.") @@ -1927,7 +1920,6 @@ def enqueue_barrier(queue, wait_for=None): def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)): - from warnings import warn warn("The context for this queue does not declare OpenCL 1.2 support, so " "the next thing you might see is a crash") From 3446d215118b36acbe95998ca67b1faed09ae248 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:14:58 -0500 Subject: [PATCH 02/15] Add is_queue_in_order --- src/wrap_cl.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 98964056f..8de8cd0df 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -496,6 +496,19 @@ namespace pyopencl // }}} + // {{{ utility functions + + inline bool is_queue_in_order(cl_command_queue queue) + { + cl_command_queue_properties param_value; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, + (queue, CL_QUEUE_PROPERTIES, sizeof(param_value), ¶m_value, 0)); + return !(param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); + } + + // }}} + + // {{{ buffer interface helper From 9722b58779d78599bed789a5ed5637ff0dce03b5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:15:32 -0500 Subject: [PATCH 03/15] Add command_queue_ref --- src/wrap_cl.hpp | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 8de8cd0df..8147cb183 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1668,6 +1668,76 @@ namespace pyopencl // }}} + // {{{ command_queue_ref + + // In contrast to command_queue, command_queue_ref is "nullable", i.e. + // it is a RAII *optional* reference to a command queue. + + class command_queue_ref + { + private: + bool m_valid; + cl_command_queue m_queue; + + public: + command_queue_ref() + : m_valid(false) + {} + + command_queue_ref(cl_command_queue queue) + : m_valid(true), m_queue(queue) + { + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + } + + command_queue_ref(command_queue_ref &&src) + : m_valid(src.m_valid), m_queue(src.m_queue) + { + src.m_valid = false; + } + + command_queue_ref(const command_queue_ref &) = delete; + command_queue_ref &operator=(const command_queue_ref &) = delete; + + ~command_queue_ref() + { + reset(); + } + + bool is_valid() const + { + return m_valid; + } + + cl_command_queue data() const + { + if (m_valid) + return m_queue; + else + throw error("command_queue_ref.data", CL_INVALID_VALUE, + "command_queue_ref is not valid"); + } + + void reset() + { + if (m_valid) + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, (m_queue)); + m_valid = false; + } + + void set(cl_command_queue queue) + { + if (m_valid) + PYOPENCL_CALL_GUARDED(clReleaseCommandQueue, (m_queue)); + m_queue = queue; + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + m_valid = true; + } + }; + + // }}} + + // {{{ event/synchronization class event : noncopyable From b65cbe4c9700ae42d6ee8136682530adb6cdd4e2 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 30 Jun 2022 23:18:16 -0500 Subject: [PATCH 04/15] Add, use enqueue_fill --- doc/runtime_memory.rst | 2 ++ pyopencl/__init__.py | 21 +++++++++++++++++++++ pyopencl/array.py | 4 ++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst index f4e01f266..ffe6661e8 100644 --- a/doc/runtime_memory.rst +++ b/doc/runtime_memory.rst @@ -281,6 +281,8 @@ Transfers .. autofunction:: enqueue_copy(queue, dest, src, **kwargs) +.. autofunction:: enqueue_fill(queue, dest, src, **kwargs) + Mapping Memory into Host Address Space -------------------------------------- diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 016ad4d82..86c77f05b 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -22,6 +22,7 @@ from sys import intern from warnings import warn +from typing import Union, Any from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa @@ -1815,6 +1816,26 @@ def enqueue_copy(queue, dest, src, **kwargs): # }}} +# {{{ enqueue_fill + +def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVM], + pattern: Any, size: int, *, offset: int = 0, wait_for=None) -> Event: + """ + .. versionadded:: 2022.2 + """ + if isinstance(dest, MemoryObjectHolder): + return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for) + elif isinstance(dest, SVM): + if offset: + raise NotImplementedError("enqueue_fill with SVM does not yet support " + "offsets") + return enqueue_svm_memfill(queue, dest, pattern, size, wait_for) + else: + raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'") + +# }}} + + # {{{ image creation DTYPE_TO_CHANNEL_TYPE = { diff --git a/pyopencl/array.py b/pyopencl/array.py index cb66121a5..28c253c21 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -1458,8 +1458,8 @@ def _zero_fill(self, queue=None, wait_for=None): # https://github.com/inducer/pyopencl/issues/395 if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31): self.add_event( - cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0), - self.offset, self.nbytes, wait_for=wait_for)) + cl.enqueue_fill(queue, self.base_data, np.int8(0), + self.nbytes, offset=self.offset, wait_for=wait_for)) else: zero = np.zeros((), self.dtype) self.fill(zero, queue=queue) From fe0d49e32dbf6d8c5d1843e6ac20190632b8f617 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 29 Mar 2021 00:21:04 -0500 Subject: [PATCH 05/15] Experiment with SVM backing arrays --- examples/demo_array_svm.py | 57 ++++++++++++++++++++++++++++++++++++++ pyopencl/array.py | 22 +++++++++++---- 2 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 examples/demo_array_svm.py diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py new file mode 100644 index 000000000..07454f1fe --- /dev/null +++ b/examples/demo_array_svm.py @@ -0,0 +1,57 @@ +import pyopencl as cl +import pyopencl.array as cl_array +import numpy as np +import numpy.linalg as la + +n = 5000000 +a = np.random.rand(n).astype(np.float32) +b = np.random.rand(n).astype(np.float32) + + +class SVMAllocator: + def __init__(self, ctx, flags, alignment): + self._context = ctx + self._flags = flags + self._alignment = alignment + + def __call__(self, nbytes): + return cl.SVM(cl.svm_empty( + ctx, self._flags, (nbytes,), np.int8, "C", self._alignment)) + + +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +alloc = SVMAllocator(ctx, + cl.svm_mem_flags.READ_WRITE | cl.svm_mem_flags.SVM_FINE_GRAIN_BUFFER, + 0) + +a_dev = cl_array.to_device(queue, a, allocator=alloc) +print("A_DEV", a_dev.data.mem.nbytes, a_dev.data.mem.__array_interface__) +b_dev = cl_array.to_device(queue, b, allocator=alloc) +dest_dev = cl_array.empty_like(a_dev) +print("DEST", dest_dev.data.mem.__array_interface__) + +prg = cl.Program(ctx, """ + __kernel void sum(__global const float *a, + __global const float *b, __global float *c) + { + int gid = get_global_id(0); + c[gid] = a[gid] + b[gid]; + } + """).build() + +knl = prg.sum # Use this Kernel object for repeated calls +knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data) + +# PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before +# we're done with it +diff = dest_dev - (a_dev+b_dev) +if 0: + diff = diff.get() + np.set_printoptions(linewidth=400) + print(dest_dev) + print((a_dev+b_dev).get()) + print(diff) + print(la.norm(diff)) + print("A_DEV", a_dev.data.mem.__array_interface__) diff --git a/pyopencl/array.py b/pyopencl/array.py index 28c253c21..540e43ad5 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -720,9 +720,14 @@ def set(self, ary, queue=None, async_=None, **kwargs): stacklevel=2) if self.size: - event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, - device_offset=self.offset, - is_blocking=not async_) + if self.offset: + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, + device_offset=self.offset, + is_blocking=not async_) + else: + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, + is_blocking=not async_) + self.add_event(event1) def _get(self, queue=None, ary=None, async_=None, **kwargs): @@ -770,9 +775,14 @@ def _get(self, queue=None, ary=None, async_=None, **kwargs): "to associate one.") if self.size: - event1 = cl.enqueue_copy(queue, ary, self.base_data, - device_offset=self.offset, - wait_for=self.events, is_blocking=not async_) + if self.offset: + event1 = cl.enqueue_copy(queue, ary, self.base_data, + device_offset=self.offset, + wait_for=self.events, is_blocking=not async_) + else: + event1 = cl.enqueue_copy(queue, ary, self.base_data, + wait_for=self.events, is_blocking=not async_) + self.add_event(event1) else: event1 = None From f3684b8905a2b5734980fd1d7d3f3028617c9b43 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Jun 2022 01:12:54 -0500 Subject: [PATCH 06/15] Allow tying an svm_allocation to a queue --- pyopencl/__init__.py | 14 +++++++++---- src/wrap_cl.hpp | 45 ++++++++++++++++++++++++++++++++++++++---- src/wrap_cl_part_2.cpp | 10 +++++++++- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 86c77f05b..5b8d8b72e 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1146,12 +1146,13 @@ def memory_map_exit(self, exc_type, exc_val, exc_tb): if get_cl_header_version() >= (2, 0): svmallocation_old_init = SVMAllocation.__init__ - def svmallocation_init(self, ctx, size, alignment, flags, _interface=None): + def svmallocation_init(self, ctx, size, alignment, flags, _interface=None, + queue=None): """ :arg ctx: a :class:`Context` :arg flags: some of :class:`svm_mem_flags`. """ - svmallocation_old_init(self, ctx, size, alignment, flags) + svmallocation_old_init(self, ctx, size, alignment, flags, queue) # mem_flags.READ_ONLY applies to kernels, not the host read_write = True @@ -1996,7 +1997,7 @@ def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): wait_for) -def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): +def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None): """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype* and *order*. (See :func:`numpy.empty` for the meaning of these arguments.) The array will be allocated in shared virtual memory belonging @@ -2014,6 +2015,10 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): will likely want to wrap the returned array in an :class:`SVM` tag. .. versionadded:: 2016.2 + + .. versionchanged:: 2022.2 + + *queue* argument added. """ dtype = np.dtype(dtype) @@ -2060,7 +2065,8 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): if alignment is None: alignment = itemsize - svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface) + svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface, + queue=queue) return np.asarray(svm_alloc) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 8147cb183..2582a5103 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3562,16 +3562,28 @@ namespace pyopencl }; - class svm_allocation : noncopyable + class svm_allocation { private: std::shared_ptr m_context; void *m_allocation; + command_queue_ref m_queue; + // FIXME Should keep a list of events so that we can wait for users + // to finish in the case of out-of-order queues. public: - svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags) + svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, + cl_svm_mem_flags flags, const command_queue *queue = nullptr) : m_context(ctx) { + if (queue) + { + m_queue.set(queue->data()); + if (!is_queue_in_order(m_queue.data())) + throw error("SVMAllocation.__init__", CL_INVALID_VALUE, + "supplying an out-of-order queue to SVMAllocation is invalid"); + } + PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); m_allocation = clSVMAlloc( ctx->data(), @@ -3581,6 +3593,9 @@ namespace pyopencl throw pyopencl::error("clSVMAlloc", CL_OUT_OF_RESOURCES); } + svm_allocation(const svm_allocation &) = delete; + svm_allocation &operator=(const svm_allocation &) = delete; + ~svm_allocation() { if (m_allocation) @@ -3593,8 +3608,20 @@ namespace pyopencl throw error("SVMAllocation.release", CL_INVALID_VALUE, "trying to double-unref svm allocation"); - clSVMFree(m_context->data(), m_allocation); - m_allocation = nullptr; + if (m_queue.is_valid()) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( + m_queue.data(), 1, &m_allocation, + nullptr, nullptr, + 0, nullptr, nullptr)); + m_queue.reset(); + } + else + { + PYOPENCL_PRINT_CALL_TRACE("clSVMFree"); + clSVMFree(m_context->data(), m_allocation); + m_allocation = nullptr; + } } void enqueue_release(command_queue &queue, py::object py_wait_for) @@ -3634,6 +3661,16 @@ namespace pyopencl { return m_allocation != other.m_allocation; } + + void bind_to_queue(command_queue const &queue) + { + m_queue.set(queue.data()); + } + + void unbind_from_queue() + { + m_queue.reset(); + } }; diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 0c9a0d1b1..ace5e9777 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -305,7 +305,13 @@ void pyopencl_expose_part_2(py::module &m) { typedef svm_allocation cls; py::class_(m, "SVMAllocation", py::dynamic_attr()) - .def(py::init, size_t, cl_uint, cl_svm_mem_flags>()) + .def(py::init, size_t, cl_uint, cl_svm_mem_flags, const command_queue *>(), + py::arg("context"), + py::arg("size"), + py::arg("alignment"), + py::arg("flags"), + py::arg("queue").none(true)=py::none() + ) .DEF_SIMPLE_METHOD(release) .def("enqueue_release", &cls::enqueue_release, ":returns: a :class:`pyopencl.Event`\n\n" @@ -314,6 +320,8 @@ void pyopencl_expose_part_2(py::module &m) .def(py::self == py::self) .def(py::self != py::self) .def("__hash__", &cls::ptr_as_int) + .DEF_SIMPLE_METHOD(bind_to_queue) + .DEF_SIMPLE_METHOD(unbind_from_queue) ; } From 18a52ae3cfad2a7e87923cef0261340c8e7c36d0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Jun 2022 01:13:52 -0500 Subject: [PATCH 07/15] Add fold markers in wrap_mempool --- src/wrap_mempool.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 6b014ba5e..2738ccca2 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -70,6 +70,8 @@ namespace }; + // {{{ cl allocators + class cl_allocator_base { protected: @@ -210,8 +212,10 @@ namespace } }; + // }}} + // {{{ allocator_call inline pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) @@ -256,8 +260,10 @@ namespace } } + // }}} + // {{{ pooled_buffer class pooled_buffer : public pyopencl::pooled_allocation >, @@ -278,8 +284,10 @@ namespace { return ptr(); } }; + // }}} + // {{{{ device_pool_allocate pooled_buffer *device_pool_allocate( std::shared_ptr > pool, @@ -288,6 +296,9 @@ namespace return new pooled_buffer(pool, sz); } + // }}} + + @@ -398,3 +409,5 @@ void pyopencl_expose_mempool(py::module &m) ; } } + +// vim: foldmethod=marker From 130527217a651bc1ffd8785fc68c2d7bb5b08576 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Jun 2022 01:14:15 -0500 Subject: [PATCH 08/15] Draft svm_allocator for mempool --- src/wrap_mempool.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 2738ccca2..35630b036 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -299,6 +299,60 @@ namespace // }}} + // {{{ svm allocator + + // FIXME: Does this need deferred and immediate just like the buffer-level + // allocators? (I.e. can I tell whether I am out of memory just from allocations?) + class svm_allocator + { + protected: + std::shared_ptr m_context; + cl_uint m_alignment; + cl_mem_flags m_flags; + + public: + svm_allocator(std::shared_ptr const &ctx, + cl_uint alignment, cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_alignment(alignment), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + throw pyopencl::error("Allocator", CL_INVALID_VALUE, + "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + } + + svm_allocator(svm_allocator const &src) + : m_context(src.m_context), m_alignment(src.m_alignment), + m_flags(src.m_flags) + { } + + virtual ~svm_allocator() + { } + + typedef void *pointer_type; + typedef size_t size_type; + + pointer_type allocate(size_type size) + { + if (size == 0) + return nullptr; + + PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); + return clSVMAlloc(m_context->data(), m_flags, size, m_alignment); + } + + void free(pointer_type p) + { + clSVMFree(m_context->data(), p); + } + + void try_release_blocks() + { + pyopencl::run_python_gc(); + } + }; + + // }}} + From 008ac676bd03fdfa6dd7e60afd594cf14cbfa728 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:11:50 -0500 Subject: [PATCH 09/15] enqueue_copy for SVM: Accept no-op {src,dest}_offset, byte_count parameters --- pyopencl/__init__.py | 3 +++ src/wrap_cl_part_2.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 5b8d8b72e..12ae3be04 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1779,6 +1779,9 @@ def enqueue_copy(queue, dest, src, **kwargs): src = SVM(src) is_blocking = kwargs.pop("is_blocking", True) + assert kwargs.pop("src_offset", 0) == 0 + assert kwargs.pop("dest_offset", 0) == 0 + assert "byte_count" not in kwargs or kwargs.pop("byte_count") == src._size() return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs) else: diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index ace5e9777..a6f604ee7 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -299,6 +299,7 @@ void pyopencl_expose_part_2(py::module &m) typedef svm_arg_wrapper cls; py::class_(m, "SVM", py::dynamic_attr()) .def(py::init()) + .def("_size", &cls::size) ; } From a71ba747a473b383dee1a5ccd5954865538250b7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:17:38 -0500 Subject: [PATCH 10/15] Add SVMAllocator (in Python) --- pyopencl/tools.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 27adac75b..1b027f0ff 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -67,6 +67,25 @@ def _register_types(): # }}} +# {{{ svm allocator + +# FIXME: Replace me with C++ +class SVMAllocator: + def __init__(self, ctx, flags, *, alignment=0, queue=None): + self._context = ctx + self._flags = flags + self._alignment = alignment + self._queue = queue + + def __call__(self, nbytes): + import pyopencl as cl + return cl.SVM(cl.svm_empty( + self._context, self._flags, (nbytes,), np.int8, "C", self._alignment, + self._queue)) + +# }}} + + # {{{ first-arg caches _first_arg_dependent_caches = [] From 04b9bb22cf9fa1a5d38db2b54822f74371a11f29 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 1 Jul 2022 01:18:13 -0500 Subject: [PATCH 11/15] Jostle some section headers in wrap_cl.hpp --- src/wrap_cl.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 2582a5103..ec6089ccc 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -137,10 +137,8 @@ // }}} +// {{{ macros and typedefs for wrappers - - -// {{{ tools #if PY_VERSION_HEX >= 0x02050000 typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T; #else @@ -262,6 +260,7 @@ // }}} + // {{{ tracing and error reporting #ifdef PYOPENCL_TRACE #define PYOPENCL_PRINT_CALL_TRACE(NAME) \ @@ -329,6 +328,7 @@ // }}} + // {{{ get_info helpers #define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \ { \ @@ -383,6 +383,7 @@ // }}} + // {{{ event helpers -------------------------------------------------------------- #define PYOPENCL_PARSE_WAIT_FOR \ cl_uint num_events_in_wait_list = 0; \ @@ -424,7 +425,9 @@ // }}} + // {{{ equality testing + #define PYOPENCL_EQUALITY_TESTS(cls) \ bool operator==(cls const &other) const \ { return data() == other.data(); } \ @@ -432,8 +435,8 @@ { return data() != other.data(); } \ long hash() const \ { return (long) (intptr_t) data(); } -// }}} +// }}} namespace pyopencl From c6685de8ca224382c8afdc41740e2ade3ecf736d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 2 Jul 2022 00:07:27 -0500 Subject: [PATCH 12/15] Add SVM._ptr_as_int() --- src/wrap_cl_part_2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index a6f604ee7..a2b5cde17 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -299,6 +299,7 @@ void pyopencl_expose_part_2(py::module &m) typedef svm_arg_wrapper cls; py::class_(m, "SVM", py::dynamic_attr()) .def(py::init()) + .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) .def("_size", &cls::size) ; } From 0dcd53e5aca9ab3c9d2a2fc4dd012cba8b47c03c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 7 Jul 2022 17:52:07 +0200 Subject: [PATCH 13/15] Move from SVM(np.ndarray(SVMAllocation)) to bare SVMAllocation --- examples/demo_array_svm.py | 23 ++--- pyopencl/__init__.py | 60 +++++------ pyopencl/tools.py | 6 +- src/wrap_cl.hpp | 200 ++++++++++++++++++++++++++++++++----- src/wrap_cl_part_2.cpp | 33 ++++-- 5 files changed, 234 insertions(+), 88 deletions(-) diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py index 07454f1fe..054e582c0 100644 --- a/examples/demo_array_svm.py +++ b/examples/demo_array_svm.py @@ -1,36 +1,24 @@ import pyopencl as cl import pyopencl.array as cl_array +from pyopencl.tools import SVMAllocator import numpy as np import numpy.linalg as la -n = 5000000 +n = 500000 a = np.random.rand(n).astype(np.float32) b = np.random.rand(n).astype(np.float32) -class SVMAllocator: - def __init__(self, ctx, flags, alignment): - self._context = ctx - self._flags = flags - self._alignment = alignment - - def __call__(self, nbytes): - return cl.SVM(cl.svm_empty( - ctx, self._flags, (nbytes,), np.int8, "C", self._alignment)) - - ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -alloc = SVMAllocator(ctx, - cl.svm_mem_flags.READ_WRITE | cl.svm_mem_flags.SVM_FINE_GRAIN_BUFFER, - 0) +alloc = SVMAllocator(ctx, cl.svm_mem_flags.READ_WRITE, queue=queue) a_dev = cl_array.to_device(queue, a, allocator=alloc) -print("A_DEV", a_dev.data.mem.nbytes, a_dev.data.mem.__array_interface__) +print("A_DEV", a_dev.data) b_dev = cl_array.to_device(queue, b, allocator=alloc) dest_dev = cl_array.empty_like(a_dev) -print("DEST", dest_dev.data.mem.__array_interface__) +print("DEST", dest_dev.data) prg = cl.Program(ctx, """ __kernel void sum(__global const float *a, @@ -47,6 +35,7 @@ def __call__(self, nbytes): # PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before # we're done with it diff = dest_dev - (a_dev+b_dev) + if 0: diff = diff.get() np.set_printoptions(linewidth=400) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 12ae3be04..22562d1f9 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -200,11 +200,9 @@ if get_cl_header_version() >= (2, 0): from pyopencl._cl import ( # noqa - SVMAllocation, + SVMPointer, SVM, - - # FIXME - #enqueue_svm_migratemem, + SVMAllocation, ) if _cl.have_gl(): @@ -1144,25 +1142,19 @@ def memory_map_exit(self, exc_type, exc_val, exc_tb): """ if get_cl_header_version() >= (2, 0): - svmallocation_old_init = SVMAllocation.__init__ - - def svmallocation_init(self, ctx, size, alignment, flags, _interface=None, - queue=None): - """ - :arg ctx: a :class:`Context` - :arg flags: some of :class:`svm_mem_flags`. - """ - svmallocation_old_init(self, ctx, size, alignment, flags, queue) - - # mem_flags.READ_ONLY applies to kernels, not the host - read_write = True - _interface["data"] = ( - int(self._ptr_as_int()), not read_write) - - self.__array_interface__ = _interface - - if get_cl_header_version() >= (2, 0): - SVMAllocation.__init__ = svmallocation_init + class _ArrayInterfaceSVMAllocation(SVMAllocation): + def __init__(self, ctx, size, alignment, flags, _interface=None, + queue=None): + """ + :arg ctx: a :class:`Context` + :arg flags: some of :class:`svm_mem_flags`. + """ + super().__init__(ctx, size, alignment, flags, queue) + + # mem_flags.READ_ONLY applies to kernels, not the host + read_write = True + _interface["data"] = ( + int(self._ptr_as_int()), not read_write) # }}} @@ -1773,15 +1765,14 @@ def enqueue_copy(queue, dest, src, **kwargs): else: raise ValueError("invalid dest mem object type") - elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVM): + elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer): # to SVM - if not isinstance(src, SVM): + if not isinstance(src, SVMPointer): src = SVM(src) is_blocking = kwargs.pop("is_blocking", True) assert kwargs.pop("src_offset", 0) == 0 assert kwargs.pop("dest_offset", 0) == 0 - assert "byte_count" not in kwargs or kwargs.pop("byte_count") == src._size() return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs) else: @@ -1807,7 +1798,7 @@ def enqueue_copy(queue, dest, src, **kwargs): queue, src, origin, region, dest, **kwargs) else: raise ValueError("invalid src mem object type") - elif isinstance(src, SVM): + elif isinstance(src, SVMPointer): # from svm # dest is not a SVM instance, otherwise we'd be in the branch above is_blocking = kwargs.pop("is_blocking", True) @@ -1822,14 +1813,14 @@ def enqueue_copy(queue, dest, src, **kwargs): # {{{ enqueue_fill -def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVM], +def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVMPointer], pattern: Any, size: int, *, offset: int = 0, wait_for=None) -> Event: """ .. versionadded:: 2022.2 """ if isinstance(dest, MemoryObjectHolder): return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for) - elif isinstance(dest, SVM): + elif isinstance(dest, SVMPointer): if offset: raise NotImplementedError("enqueue_fill with SVM does not yet support " "offsets") @@ -1961,7 +1952,7 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): """Fill shared virtual memory with a pattern. - :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object + :arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`. :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the fill pattern to be used. :arg byte_count: The size of the memory to be fill. Defaults to the @@ -1972,8 +1963,8 @@ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): .. versionadded:: 2016.2 """ - if not isinstance(dest, SVM): - dest = SVM(dest) + if not isinstance(dest, SVMPointer): + dest = SVMPointer(dest) return _cl._enqueue_svm_memfill( queue, dest, pattern, byte_count=None, wait_for=None) @@ -1982,7 +1973,7 @@ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): """ :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy` - arrays), optionally wrapped in :class:`SVM` objects. + arrays), or any implementation of :class:`SVMPointer`. :arg flags: a combination of :class:`mem_migration_flags` |std-enqueue-blurb| @@ -2068,7 +2059,8 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None): if alignment is None: alignment = itemsize - svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface, + svm_alloc = _ArrayInterfaceSVMAllocation( + ctx, nbytes, alignment, flags, _interface=interface, queue=queue) return np.asarray(svm_alloc) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 1b027f0ff..09e4d0566 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -79,9 +79,9 @@ def __init__(self, ctx, flags, *, alignment=0, queue=None): def __call__(self, nbytes): import pyopencl as cl - return cl.SVM(cl.svm_empty( - self._context, self._flags, (nbytes,), np.int8, "C", self._alignment, - self._queue)) + return cl.SVMAllocation( + self._context, nbytes, self._alignment, self._flags, + queue=self._queue) # }}} diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index ec6089ccc..76477f55f 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -227,8 +227,6 @@ } - - #define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \ { \ bool failed_with_mem_error = false; \ @@ -258,6 +256,17 @@ } \ } + +#define PYOPENCL_GET_SVM_SIZE(NAME) \ + size_t NAME##_size; \ + bool NAME##_has_size = false; \ + try \ + { \ + NAME##_size = NAME.size(); \ + NAME##_has_size = true; \ + } \ + catch (size_not_available) { } + // }}} @@ -501,12 +510,12 @@ namespace pyopencl // {{{ utility functions - inline bool is_queue_in_order(cl_command_queue queue) + inline bool is_queue_out_of_order(cl_command_queue queue) { cl_command_queue_properties param_value; PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (queue, CL_QUEUE_PROPERTIES, sizeof(param_value), ¶m_value, 0)); - return !(param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); + return param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } // }}} @@ -3527,11 +3536,26 @@ namespace pyopencl // }}} - // {{{ svm - #if PYOPENCL_CL_VERSION >= 0x2000 - class svm_arg_wrapper + // {{{ svm pointer + + class size_not_available { }; + + class svm_pointer + { + public: + virtual void *ptr() const = 0; + // may throw size_not_available + virtual size_t size() const = 0; + }; + + // }}} + + + // {{{ svm_arg_wrapper + + class svm_arg_wrapper : public svm_pointer { private: void *m_ptr; @@ -3564,12 +3588,17 @@ namespace pyopencl } }; + // }}} + - class svm_allocation + // {{{ svm_allocation + + class svm_allocation : public svm_pointer { private: std::shared_ptr m_context; void *m_allocation; + size_t m_size; command_queue_ref m_queue; // FIXME Should keep a list of events so that we can wait for users // to finish in the case of out-of-order queues. @@ -3577,12 +3606,12 @@ namespace pyopencl public: svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags, const command_queue *queue = nullptr) - : m_context(ctx) + : m_context(ctx), m_size(size) { if (queue) { m_queue.set(queue->data()); - if (!is_queue_in_order(m_queue.data())) + if (is_queue_out_of_order(m_queue.data())) throw error("SVMAllocation.__init__", CL_INVALID_VALUE, "supplying an out-of-order queue to SVMAllocation is invalid"); } @@ -3650,6 +3679,11 @@ namespace pyopencl return m_allocation; } + size_t size() const + { + return m_size; + } + intptr_t ptr_as_int() const { return (intptr_t) m_allocation; @@ -3667,29 +3701,92 @@ namespace pyopencl void bind_to_queue(command_queue const &queue) { + if (is_queue_out_of_order(m_queue.data())) + throw error("SVMAllocation.bind_to_queue", CL_INVALID_VALUE, + "supplying an out-of-order queue to SVMAllocation is invalid"); + + if (m_queue.is_valid()) + { + // make sure synchronization promises stay valid in new queue + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_queue.data(), &evt)); + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (queue.data(), 1, &evt)); + } + m_queue.set(queue.data()); } void unbind_from_queue() { + // NOTE: This absolves the allocation from any synchronization promises + // made. Keeping those before calling this method is the responsibility + // of the user. m_queue.reset(); } }; + // }}} + + + // {{{ svm operations inline event *enqueue_svm_memcpy( command_queue &cq, cl_bool is_blocking, - svm_arg_wrapper &dst, svm_arg_wrapper &src, - py::object py_wait_for + svm_pointer &dst, svm_pointer &src, + py::object py_wait_for, + py::object byte_count_py ) { PYOPENCL_PARSE_WAIT_FOR; - if (src.size() != dst.size()) + // {{{ process size + + PYOPENCL_GET_SVM_SIZE(src); + PYOPENCL_GET_SVM_SIZE(dst); + + size_t size; + bool have_size = false; + + if (src_has_size) + { + size = src_size; + have_size = true; + } + if (dst_has_size) + { + if (have_size) + { + if (!byte_count_py.is_none()) + size = std::min(size, dst_size); + else if (size != dst_size) + throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, + "sizes of source and destination buffer do not match"); + } + else + { + size = dst_size; + have_size = true; + } + } + + if (!byte_count_py.is_none()) + { + size_t byte_count = byte_count_py.cast(); + if (have_size && byte_count > size) + throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, + "specified byte_count larger than size of source or destination buffers"); + size = byte_count; + have_size = true; + } + + if (!have_size) throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, - "sizes of source and destination buffer do not match"); + "size not passed and could not be determined"); + + // }}} cl_event evt; PYOPENCL_CALL_GUARDED( @@ -3698,7 +3795,7 @@ namespace pyopencl cq.data(), is_blocking, dst.ptr(), src.ptr(), - dst.size(), + size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3710,7 +3807,7 @@ namespace pyopencl inline event *enqueue_svm_memfill( command_queue &cq, - svm_arg_wrapper &dst, py::object py_pattern, + svm_pointer &dst, py::object py_pattern, py::object byte_count, py::object py_wait_for ) @@ -3727,9 +3824,32 @@ namespace pyopencl pattern_ptr = pattern_ward->m_buf.buf; pattern_len = pattern_ward->m_buf.len; - size_t fill_size = dst.size(); + // {{{ process size + + PYOPENCL_GET_SVM_SIZE(dst); + + size_t size; + bool have_size = false; + if (dst_has_size) + { + size = dst_size; + have_size = true; + } if (!byte_count.is_none()) - fill_size = py::cast(byte_count); + { + size_t user_size = py::cast(byte_count); + if (have_size && user_size > size) + throw error("enqueue_svm_memfill", CL_INVALID_VALUE, + "byte_count too large for specified SVM buffer"); + } + + if (!have_size) + { + throw error("enqueue_svm_memfill", CL_INVALID_VALUE, + "byte_count not passed and could not be determined"); + } + + // }}} cl_event evt; PYOPENCL_CALL_GUARDED( @@ -3738,7 +3858,7 @@ namespace pyopencl cq.data(), dst.ptr(), pattern_ptr, pattern_len, - fill_size, + size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3752,12 +3872,40 @@ namespace pyopencl command_queue &cq, cl_bool is_blocking, cl_map_flags flags, - svm_arg_wrapper &svm, - py::object py_wait_for + svm_pointer &svm, + py::object py_wait_for, + py::object user_size_py ) { PYOPENCL_PARSE_WAIT_FOR; + // {{{ process size + + PYOPENCL_GET_SVM_SIZE(svm); + + size_t size; + bool have_size = false; + if (svm_has_size) + { + size = svm_size; + have_size = true; + } + if (!user_size_py.is_none()) + { + size_t user_size = py::cast(user_size_py); + if (have_size && user_size > size) + throw error("enqueue_svm_memfill", CL_INVALID_VALUE, + "user-provided size too large for specified SVM buffer"); + } + + if (!have_size) + { + throw error("enqueue_svm_mem_map", CL_INVALID_VALUE, + "size not passed and could not be determined"); + } + + // }}} + cl_event evt; PYOPENCL_CALL_GUARDED( clEnqueueSVMMap, @@ -3765,7 +3913,7 @@ namespace pyopencl cq.data(), is_blocking, flags, - svm.ptr(), svm.size(), + svm.ptr(), size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3777,7 +3925,7 @@ namespace pyopencl inline event *enqueue_svm_unmap( command_queue &cq, - svm_arg_wrapper &svm, + svm_pointer &svm, py::object py_wait_for ) { @@ -3814,7 +3962,7 @@ namespace pyopencl for (py::handle py_svm: svms) { - svm_arg_wrapper &svm(py::cast(py_svm)); + svm_pointer &svm(py::cast(py_svm)); svm_pointers.push_back(svm.ptr()); sizes.push_back(svm.size()); @@ -4609,7 +4757,7 @@ namespace pyopencl } #if PYOPENCL_CL_VERSION >= 0x2000 - void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp) + void set_arg_svm(cl_uint arg_index, svm_pointer const &wrp) { PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, (m_kernel, arg_index, wrp.ptr())); @@ -4634,7 +4782,7 @@ namespace pyopencl #if PYOPENCL_CL_VERSION >= 0x2000 try { - set_arg_svm(arg_index, arg.cast()); + set_arg_svm(arg_index, arg.cast()); return; } catch (py::cast_error &) { } diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index a2b5cde17..453f34c38 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -295,18 +295,34 @@ void pyopencl_expose_part_2(py::module &m) // {{{ svm #if PYOPENCL_CL_VERSION >= 0x2000 + { + typedef svm_pointer cls; + py::class_(m, "SVMPointer", py::dynamic_attr()) + .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) + .def("_size", [](cls &self) -> py::object + { + try + { + return py::cast(self.size()); + } + catch (size_not_available) + { + return py::none(); + } + }) + ; + } + { typedef svm_arg_wrapper cls; - py::class_(m, "SVM", py::dynamic_attr()) + py::class_(m, "SVM", py::dynamic_attr()) .def(py::init()) - .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) - .def("_size", &cls::size) ; } { typedef svm_allocation cls; - py::class_(m, "SVMAllocation", py::dynamic_attr()) + py::class_(m, "SVMAllocation", py::dynamic_attr()) .def(py::init, size_t, cl_uint, cl_svm_mem_flags, const command_queue *>(), py::arg("context"), py::arg("size"), @@ -318,10 +334,9 @@ void pyopencl_expose_part_2(py::module &m) .def("enqueue_release", &cls::enqueue_release, ":returns: a :class:`pyopencl.Event`\n\n" "|std-enqueue-blurb|") - .def("_ptr_as_int", &cls::ptr_as_int) .def(py::self == py::self) .def(py::self != py::self) - .def("__hash__", &cls::ptr_as_int) + .def("__hash__", [](cls &self) { return (intptr_t) self.ptr(); }) .DEF_SIMPLE_METHOD(bind_to_queue) .DEF_SIMPLE_METHOD(unbind_from_queue) ; @@ -332,7 +347,8 @@ void pyopencl_expose_part_2(py::module &m) py::arg("is_blocking"), py::arg("dst"), py::arg("src"), - py::arg("wait_for")=py::none() + py::arg("wait_for")=py::none(), + py::arg("byte_count")=py::none() ); m.def("_enqueue_svm_memfill", enqueue_svm_memfill, @@ -348,7 +364,8 @@ void pyopencl_expose_part_2(py::module &m) py::arg("is_blocking"), py::arg("flags"), py::arg("svm"), - py::arg("wait_for")=py::none() + py::arg("wait_for")=py::none(), + py::arg("size")=py::none() ); m.def("_enqueue_svm_unmap", enqueue_svm_unmap, From 1f4f7699d38aade3aeaea71436f7a484eace5449 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 22 Jul 2022 15:59:45 +0200 Subject: [PATCH 14/15] WIP: Towards SVM memory pool --- pyopencl/tools.py | 15 +- src/mempool.hpp | 30 ++-- src/wrap_cl.hpp | 28 ++-- src/wrap_cl_part_2.cpp | 4 +- src/wrap_mempool.cpp | 320 ++++++++++++++++++++++++++++++++++------- 5 files changed, 312 insertions(+), 85 deletions(-) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 09e4d0566..831d445ce 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -33,7 +33,7 @@ import numpy as np from pytools import memoize, memoize_method -from pyopencl._cl import bitlog2 # noqa: F401 +from pyopencl._cl import bitlog2, get_cl_header_version # noqa: F401 from pytools.persistent_dict import KeyBuilder as KeyBuilderBase import re @@ -59,10 +59,19 @@ def _register_types(): # {{{ imported names from pyopencl._cl import ( # noqa - PooledBuffer as PooledBuffer, + _tools_PooledBuffer as PooledBuffer, _tools_DeferredAllocator as DeferredAllocator, _tools_ImmediateAllocator as ImmediateAllocator, - MemoryPool as MemoryPool) + _tools_MemoryPool as MemoryPool, + ) + + +if get_cl_header_version() >= (2, 0): + from pyopencl._cl import ( # noqa + _tools_SVMemoryPool as SVMemoryPool, + _tools_PooledSVM as PooledSVM, + _tools_SVMAllocator as SVMAllocator, + ) # }}} diff --git a/src/mempool.hpp b/src/mempool.hpp index 44f0fd643..812bd2f1c 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -233,7 +233,8 @@ namespace PYGPU_PACKAGE std::cout << "[pool] allocation of size " << size << " served from bin " << bin_nr << " which contained " << bin.size() << " entries" << std::endl; - return pop_block_from_bin(bin, size); + return m_allocator->hand_out_existing_block( + pop_block_from_bin(bin, size)); } size_type alloc_sz = alloc_size(bin_nr); @@ -256,7 +257,8 @@ namespace PYGPU_PACKAGE m_allocator->try_release_blocks(); if (bin.size()) - return pop_block_from_bin(bin, size); + return m_allocator->hand_out_existing_block( + pop_block_from_bin(bin, size)); if (m_trace) std::cout << "[pool] allocation still OOM after GC" << std::endl; @@ -282,7 +284,7 @@ namespace PYGPU_PACKAGE "failed to free memory for allocation"); } - void free(pointer_type p, size_type size) + void free(pointer_type &&p, size_type size) { --m_active_blocks; m_active_bytes -= size; @@ -291,7 +293,7 @@ namespace PYGPU_PACKAGE if (!m_stop_holding) { inc_held_blocks(); - get_bin(bin_nr).push_back(p); + get_bin(bin_nr).push_back(std::move(p)); if (m_trace) std::cout << "[pool] block of size " << size << " returned to bin " @@ -300,7 +302,7 @@ namespace PYGPU_PACKAGE } else { - m_allocator->free(p); + m_allocator->free(std::move(p)); m_managed_bytes -= alloc_size(bin_nr); } } @@ -313,7 +315,7 @@ namespace PYGPU_PACKAGE while (bin.size()) { - m_allocator->free(bin.back()); + m_allocator->free(std::move(bin.back())); m_managed_bytes -= alloc_size(bin_pair.first); bin.pop_back(); @@ -353,7 +355,7 @@ namespace PYGPU_PACKAGE if (bin.size()) { - m_allocator->free(bin.back()); + m_allocator->free(std::move(bin.back())); m_managed_bytes -= alloc_size(bin_pair.first); bin.pop_back(); @@ -379,7 +381,7 @@ namespace PYGPU_PACKAGE pointer_type pop_block_from_bin(bin_t &bin, size_type size) { - pointer_type result = bin.back(); + pointer_type result(std::move(bin.back())); bin.pop_back(); dec_held_blocks(); @@ -399,7 +401,7 @@ namespace PYGPU_PACKAGE typedef typename Pool::pointer_type pointer_type; typedef typename Pool::size_type size_type; - private: + protected: PYGPU_SHARED_PTR m_pool; pointer_type m_ptr; @@ -421,7 +423,7 @@ namespace PYGPU_PACKAGE { if (m_valid) { - m_pool->free(m_ptr, m_size); + m_pool->free(std::move(m_ptr), m_size); m_valid = false; } else @@ -435,16 +437,8 @@ namespace PYGPU_PACKAGE #endif ); } - - pointer_type ptr() const - { return m_ptr; } - - size_type size() const - { return m_size; } }; } - - #endif diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 76477f55f..ad60372db 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1708,7 +1708,13 @@ namespace pyopencl src.m_valid = false; } - command_queue_ref(const command_queue_ref &) = delete; + command_queue_ref(const command_queue_ref &) + { + throw error("command_queue_ref", CL_INVALID_VALUE, + "command_queue_ref copy constructor is never supposed to be called; " + "all notional invocations should be eliminated because of NRVO"); + } + command_queue_ref &operator=(const command_queue_ref &) = delete; ~command_queue_ref() @@ -3545,7 +3551,7 @@ namespace pyopencl class svm_pointer { public: - virtual void *ptr() const = 0; + virtual void *svm_ptr() const = 0; // may throw size_not_available virtual size_t size() const = 0; }; @@ -3578,7 +3584,7 @@ namespace pyopencl m_size = ward->m_buf.len; } - void *ptr() const + void *svm_ptr() const { return m_ptr; } @@ -3674,7 +3680,7 @@ namespace pyopencl m_allocation = nullptr; } - void *ptr() const + void *svm_ptr() const { return m_allocation; } @@ -3701,7 +3707,7 @@ namespace pyopencl void bind_to_queue(command_queue const &queue) { - if (is_queue_out_of_order(m_queue.data())) + if (is_queue_out_of_order(queue.data())) throw error("SVMAllocation.bind_to_queue", CL_INVALID_VALUE, "supplying an out-of-order queue to SVMAllocation is invalid"); @@ -3794,7 +3800,7 @@ namespace pyopencl ( cq.data(), is_blocking, - dst.ptr(), src.ptr(), + dst.svm_ptr(), src.svm_ptr(), size, PYOPENCL_WAITLIST_ARGS, &evt @@ -3856,7 +3862,7 @@ namespace pyopencl clEnqueueSVMMemFill, ( cq.data(), - dst.ptr(), pattern_ptr, + dst.svm_ptr(), pattern_ptr, pattern_len, size, PYOPENCL_WAITLIST_ARGS, @@ -3913,7 +3919,7 @@ namespace pyopencl cq.data(), is_blocking, flags, - svm.ptr(), size, + svm.svm_ptr(), size, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3936,7 +3942,7 @@ namespace pyopencl clEnqueueSVMUnmap, ( cq.data(), - svm.ptr(), + svm.svm_ptr(), PYOPENCL_WAITLIST_ARGS, &evt )); @@ -3964,7 +3970,7 @@ namespace pyopencl { svm_pointer &svm(py::cast(py_svm)); - svm_pointers.push_back(svm.ptr()); + svm_pointers.push_back(svm.svm_ptr()); sizes.push_back(svm.size()); } @@ -4760,7 +4766,7 @@ namespace pyopencl void set_arg_svm(cl_uint arg_index, svm_pointer const &wrp) { PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, - (m_kernel, arg_index, wrp.ptr())); + (m_kernel, arg_index, wrp.svm_ptr())); } #endif diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 453f34c38..3c40e1c90 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -298,7 +298,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef svm_pointer cls; py::class_(m, "SVMPointer", py::dynamic_attr()) - .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.ptr(); }) + .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.svm_ptr(); }) .def("_size", [](cls &self) -> py::object { try @@ -336,7 +336,7 @@ void pyopencl_expose_part_2(py::module &m) "|std-enqueue-blurb|") .def(py::self == py::self) .def(py::self != py::self) - .def("__hash__", [](cls &self) { return (intptr_t) self.ptr(); }) + .def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); }) .DEF_SIMPLE_METHOD(bind_to_queue) .DEF_SIMPLE_METHOD(unbind_from_queue) ; diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 35630b036..429e3e1b9 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -57,12 +57,18 @@ namespace { return false; } + virtual pointer_type allocate(size_type s) { return nullptr; } - void free(pointer_type p) + virtual pointer_type hand_out_existing_block(pointer_type &&p) + { + return p; + } + + void free(pointer_type &&p) { } void try_release_blocks() @@ -70,16 +76,16 @@ namespace }; - // {{{ cl allocators + // {{{ buffer allocators - class cl_allocator_base + class buffer_allocator_base { protected: std::shared_ptr m_context; cl_mem_flags m_flags; public: - cl_allocator_base(std::shared_ptr const &ctx, + buffer_allocator_base(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { @@ -88,21 +94,26 @@ namespace "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); } - cl_allocator_base(cl_allocator_base const &src) + buffer_allocator_base(buffer_allocator_base const &src) : m_context(src.m_context), m_flags(src.m_flags) { } - virtual ~cl_allocator_base() + virtual ~buffer_allocator_base() { } typedef cl_mem pointer_type; typedef size_t size_type; - virtual cl_allocator_base *copy() const = 0; + virtual buffer_allocator_base *copy() const = 0; virtual bool is_deferred() const = 0; virtual pointer_type allocate(size_type s) = 0; - void free(pointer_type p) + virtual pointer_type hand_out_existing_block(pointer_type p) + { + return p; + } + + void free(pointer_type &&p) { PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p)); } @@ -113,20 +124,21 @@ namespace } }; - class cl_deferred_allocator : public cl_allocator_base + + class deferred_buffer_allocator : public buffer_allocator_base { private: - typedef cl_allocator_base super; + typedef buffer_allocator_base super; public: - cl_deferred_allocator(std::shared_ptr const &ctx, + deferred_buffer_allocator(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(ctx, flags) { } - cl_allocator_base *copy() const + buffer_allocator_base *copy() const { - return new cl_deferred_allocator(*this); + return new deferred_buffer_allocator(*this); } bool is_deferred() const @@ -143,26 +155,26 @@ namespace const unsigned zero = 0; - class cl_immediate_allocator : public cl_allocator_base + class immediate_buffer_allocator : public buffer_allocator_base { private: - typedef cl_allocator_base super; + typedef buffer_allocator_base super; pyopencl::command_queue m_queue; public: - cl_immediate_allocator(pyopencl::command_queue &queue, + immediate_buffer_allocator(pyopencl::command_queue &queue, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(std::shared_ptr(queue.get_context()), flags), m_queue(queue.data(), /*retain*/ true) { } - cl_immediate_allocator(cl_immediate_allocator const &src) + immediate_buffer_allocator(immediate_buffer_allocator const &src) : super(src), m_queue(src.m_queue) { } - cl_allocator_base *copy() const + buffer_allocator_base *copy() const { - return new cl_immediate_allocator(*this); + return new immediate_buffer_allocator(*this); } bool is_deferred() const @@ -215,10 +227,10 @@ namespace // }}} - // {{{ allocator_call + // {{{ buffer_allocator_call inline - pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) + pyopencl::buffer *buffer_allocator_call(buffer_allocator_base &alloc, size_t size) { cl_mem mem; int try_count = 0; @@ -266,12 +278,12 @@ namespace // {{{ pooled_buffer class pooled_buffer - : public pyopencl::pooled_allocation >, + : public pyopencl::pooled_allocation >, public pyopencl::memory_object_holder { private: typedef - pyopencl::pooled_allocation > + pyopencl::pooled_allocation > super; public: @@ -281,17 +293,22 @@ namespace { } const super::pointer_type data() const - { return ptr(); } + { return m_ptr; } + + size_t size() const + { + return m_size; + } }; // }}} - // {{{{ device_pool_allocate + // {{{ buffer_pool_allocate - pooled_buffer *device_pool_allocate( - std::shared_ptr > pool, - pyopencl::memory_pool::size_type sz) + pooled_buffer *buffer_pool_allocate( + std::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) { return new pooled_buffer(pool, sz); } @@ -299,25 +316,40 @@ namespace // }}} +#if PYOPENCL_CL_VERSION >= 0x2000 + // {{{ svm allocator // FIXME: Does this need deferred and immediate just like the buffer-level // allocators? (I.e. can I tell whether I am out of memory just from allocations?) + struct svm_held_pointer + { + void *ptr; + pyopencl::command_queue_ref queue; + }; + + class svm_allocator { + public: + typedef svm_held_pointer pointer_type; + typedef size_t size_type; + protected: std::shared_ptr m_context; cl_uint m_alignment; - cl_mem_flags m_flags; + cl_svm_mem_flags m_flags; + pyopencl::command_queue_ref m_queue; + public: svm_allocator(std::shared_ptr const &ctx, - cl_uint alignment, cl_mem_flags flags=CL_MEM_READ_WRITE) + cl_uint alignment, cl_svm_mem_flags flags=CL_MEM_READ_WRITE, + pyopencl::command_queue *queue=nullptr) : m_context(ctx), m_alignment(alignment), m_flags(flags) { - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - throw pyopencl::error("Allocator", CL_INVALID_VALUE, - "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + if (queue) + m_queue.set(m_queue.data()); } svm_allocator(svm_allocator const &src) @@ -328,21 +360,61 @@ namespace virtual ~svm_allocator() { } - typedef void *pointer_type; - typedef size_t size_type; + virtual svm_allocator *copy() const + { + return new svm_allocator(m_context, m_alignment, m_flags); + } + + virtual bool is_deferred() const + { + // FIXME: I don't know whether that's true. + return false; + } pointer_type allocate(size_type size) { if (size == 0) - return nullptr; + return { nullptr, nullptr }; PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); - return clSVMAlloc(m_context->data(), m_flags, size, m_alignment); + return { + clSVMAlloc(m_context->data(), m_flags, size, m_alignment), + pyopencl::command_queue_ref(m_queue.data()) + }; + } + + virtual pointer_type hand_out_existing_block(pointer_type &&p) + { + if (m_queue.is_valid()) + { + if (p.queue.is_valid()) + { + // make sure synchronization promises stay valid in new queue + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, (p.queue.data(), &evt)); + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (m_queue.data(), 1, &evt)); + } + p.queue.set(m_queue.data()); + } + return p; } - void free(pointer_type p) + void free(pointer_type &&p) { - clSVMFree(m_context->data(), p); + if (p.queue.is_valid()) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( + p.queue.data(), 1, &p.ptr, + nullptr, nullptr, + 0, nullptr, nullptr)); + p.queue.reset(); + } + else + { + PYOPENCL_PRINT_CALL_TRACE("clSVMFree"); + clSVMFree(m_context->data(), p.ptr); + } } void try_release_blocks() @@ -354,7 +426,99 @@ namespace // }}} + // {{{ svm_allocator_call + + inline + svm_held_pointer svm_allocator_call(svm_allocator &alloc, size_t size) + { + svm_held_pointer mem; + int try_count = 0; + while (true) + { + try + { + return alloc.allocate(size); + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + if (++try_count == 2) + throw; + } + alloc.try_release_blocks(); + } + } + + // }}} + + + // {{{ pooled_svm + + class pooled_svm + : public pyopencl::pooled_allocation>, + public pyopencl::svm_pointer + { + private: + typedef + pyopencl::pooled_allocation> + super; + + public: + pooled_svm( + std::shared_ptr p, super::size_type s) + : super(p, s) + { } + + void *svm_ptr() const + { return m_ptr.ptr; } + + size_t size() const + { return m_size; } + + void bind_to_queue(pyopencl::command_queue const &queue) + { + if (pyopencl::is_queue_out_of_order(queue.data())) + throw pyopencl::error("PooledSVM.bind_to_queue", CL_INVALID_VALUE, + "supplying an out-of-order queue to SVMAllocation is invalid"); + + if (m_ptr.queue.is_valid()) + { + // make sure synchronization promises stay valid in new queue + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_ptr.queue.data(), &evt)); + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (queue.data(), 1, &evt)); + } + + m_ptr.queue.set(queue.data()); + } + + void unbind_from_queue() + { + // NOTE: This absolves the allocation from any synchronization promises + // made. Keeping those before calling this method is the responsibility + // of the user. + m_ptr.queue.reset(); + } + }; + + // }}} + + + // {{{ svm_pool_allocate + + pooled_svm *svm_pool_allocate( + std::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) + { + return new pooled_svm(pool, sz); + } + + // }}} + +#endif template void expose_memory_pool(Wrapper &wrapper) @@ -381,11 +545,11 @@ void pyopencl_expose_mempool(py::module &m) m.def("bitlog2", pyopencl::bitlog2); { - typedef cl_allocator_base cls; + typedef buffer_allocator_base cls; py::class_ wrapper( m, "_tools_AllocatorBase"/*, py::no_init */); wrapper - .def("__call__", allocator_call) + .def("__call__", buffer_allocator_call) ; } @@ -410,8 +574,8 @@ void pyopencl_expose_mempool(py::module &m) } { - typedef cl_deferred_allocator cls; - py::class_ wrapper( + typedef deferred_buffer_allocator cls; + py::class_ wrapper( m, "_tools_DeferredAllocator"); wrapper .def(py::init< @@ -424,8 +588,8 @@ void pyopencl_expose_mempool(py::module &m) } { - typedef cl_immediate_allocator cls; - py::class_ wrapper( + typedef immediate_buffer_allocator cls; + py::class_ wrapper( m, "_tools_ImmediateAllocator"); wrapper .def(py::init()) @@ -435,18 +599,18 @@ void pyopencl_expose_mempool(py::module &m) } { - typedef pyopencl::memory_pool cls; + typedef pyopencl::memory_pool cls; py::class_< cls, /* boost::noncopyable, */ - std::shared_ptr> wrapper( m, "MemoryPool"); + std::shared_ptr> wrapper( m, "_tools_MemoryPool"); wrapper - .def(py::init(), + .def(py::init(), py::arg("allocator"), py::arg("leading_bits_in_bin_id")=4 ) - .def("allocate", device_pool_allocate) - .def("__call__", device_pool_allocate) + .def("allocate", buffer_pool_allocate) + .def("__call__", buffer_pool_allocate) // undoc for now .DEF_SIMPLE_METHOD(set_trace) ; @@ -458,10 +622,64 @@ void pyopencl_expose_mempool(py::module &m) typedef pooled_buffer cls; py::class_( - m, "PooledBuffer"/* , py::no_init */) + m, "_tools_PooledBuffer"/* , py::no_init */) + .def("release", &cls::free) + // undocumented for now, for consistency with SVM + .def("bind_to_queue", [](cls &self, pyopencl::command_queue &queue) { /* no-op */ }) + .def("unbind_from_queue", [](cls &self) { /* no-op */ }) + ; + } + +#if PYOPENCL_CL_VERSION >= 0x2000 + { + typedef pyopencl::memory_pool cls; + + py::class_< + cls, /* boost::noncopyable, */ + std::shared_ptr> wrapper( m, "_tools_SVMemoryPool"); + wrapper + .def(py::init(), + py::arg("allocator"), + py::arg("leading_bits_in_bin_id")=4 + ) + .def("allocate", svm_pool_allocate) + .def("__call__", svm_pool_allocate) + // undoc for now + .DEF_SIMPLE_METHOD(set_trace) + ; + + expose_memory_pool(wrapper); + } + + { + typedef pooled_svm cls; + py::class_( + m, "_tools_PooledSVM"/* , py::no_init */) .def("release", &cls::free) + .def("__eq__", [](const cls &self, const cls &other) + { return self.svm_ptr() == other.svm_ptr(); }) + .def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); }) + .DEF_SIMPLE_METHOD(bind_to_queue) + .DEF_SIMPLE_METHOD(unbind_from_queue) + ; + } + + { + typedef svm_allocator cls; + py::class_ wrapper( + m, "_tools_SVMAllocator"); + wrapper + .def(py::init const &, cl_uint, cl_uint, pyopencl::command_queue *>(), + py::arg("context"), + py::arg("alignment"), + py::arg("flags")=CL_MEM_READ_WRITE, + py::arg("command_queue").none(true)=nullptr + ) + .def("__call__", svm_allocator_call) ; } +#endif } // vim: foldmethod=marker From f721a96f07a0316cc73a2c2805b96c04c4a7af38 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 27 Jul 2022 12:32:11 -0500 Subject: [PATCH 15/15] Some SVM fixes --- examples/demo_array_svm.py | 2 +- pyopencl/tools.py | 19 ------------------- src/wrap_mempool.cpp | 10 ++++++++-- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py index 054e582c0..eaba23209 100644 --- a/examples/demo_array_svm.py +++ b/examples/demo_array_svm.py @@ -12,7 +12,7 @@ ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -alloc = SVMAllocator(ctx, cl.svm_mem_flags.READ_WRITE, queue=queue) +alloc = SVMAllocator(ctx, 0, cl.svm_mem_flags.READ_WRITE, queue) a_dev = cl_array.to_device(queue, a, allocator=alloc) print("A_DEV", a_dev.data) diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 831d445ce..b414168c6 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -76,25 +76,6 @@ def _register_types(): # }}} -# {{{ svm allocator - -# FIXME: Replace me with C++ -class SVMAllocator: - def __init__(self, ctx, flags, *, alignment=0, queue=None): - self._context = ctx - self._flags = flags - self._alignment = alignment - self._queue = queue - - def __call__(self, nbytes): - import pyopencl as cl - return cl.SVMAllocation( - self._context, nbytes, self._alignment, self._flags, - queue=self._queue) - -# }}} - - # {{{ first-arg caches _first_arg_dependent_caches = [] diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 429e3e1b9..34b5e1cea 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -1,4 +1,4 @@ -// Warp memory pool +// Wrap memory pool // // Copyright (C) 2009 Andreas Kloeckner // @@ -349,7 +349,7 @@ namespace : m_context(ctx), m_alignment(alignment), m_flags(flags) { if (queue) - m_queue.set(m_queue.data()); + m_queue.set(queue->data()); } svm_allocator(svm_allocator const &src) @@ -679,6 +679,12 @@ void pyopencl_expose_mempool(py::module &m) .def("__call__", svm_allocator_call) ; } + + { + typedef svm_held_pointer cls; + py::class_ wrapper( + m, "_tools_SVMHeldPointer"); + } #endif }