diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst
index f4e01f266..ffe6661e8 100644
--- a/doc/runtime_memory.rst
+++ b/doc/runtime_memory.rst
@@ -281,6 +281,8 @@ Transfers
 
 .. autofunction:: enqueue_copy(queue, dest, src, **kwargs)
 
+.. autofunction:: enqueue_fill(queue, dest, src, **kwargs)
+
 Mapping Memory into Host Address Space
 --------------------------------------
 
diff --git a/examples/demo_array_svm.py b/examples/demo_array_svm.py
new file mode 100644
index 000000000..eaba23209
--- /dev/null
+++ b/examples/demo_array_svm.py
@@ -0,0 +1,46 @@
+import pyopencl as cl
+import pyopencl.array as cl_array
+from pyopencl.tools import SVMAllocator
+import numpy as np
+import numpy.linalg as la
+
+n = 500000
+a = np.random.rand(n).astype(np.float32)
+b = np.random.rand(n).astype(np.float32)
+
+
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+alloc = SVMAllocator(ctx, 0, cl.svm_mem_flags.READ_WRITE, queue)
+
+a_dev = cl_array.to_device(queue, a, allocator=alloc)
+print("A_DEV", a_dev.data)
+b_dev = cl_array.to_device(queue, b, allocator=alloc)
+dest_dev = cl_array.empty_like(a_dev)
+print("DEST", dest_dev.data)
+
+prg = cl.Program(ctx, """
+    __kernel void sum(__global const float *a,
+    __global const float *b, __global float *c)
+    {
+      int gid = get_global_id(0);
+      c[gid] = a[gid] + b[gid];
+    }
+    """).build()
+
+knl = prg.sum  # Use this Kernel object for repeated calls
+knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)
+
+# PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before
+# we're done with it
+diff = dest_dev - (a_dev+b_dev)
+
+if 0:
+    diff = diff.get()
+    np.set_printoptions(linewidth=400)
+    print(dest_dev)
+    print((a_dev+b_dev).get())
+    print(diff)
+    print(la.norm(diff))
+    print("A_DEV", a_dev.data.mem.__array_interface__)
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 6420e5afb..22562d1f9 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -21,6 +21,8 @@
 """
 
 from sys import intern
+from warnings import warn
+from typing import Union, Any
 
 from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT  # noqa
 
@@ -43,7 +45,6 @@
     import os
     from os.path import dirname, join, realpath
     if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
-        from warnings import warn
         warn("It looks like you are importing PyOpenCL from "
                 "its source directory. This likely won't work.")
     raise
@@ -199,11 +200,9 @@
 
 if get_cl_header_version() >= (2, 0):
     from pyopencl._cl import (  # noqa
-        SVMAllocation,
+        SVMPointer,
         SVM,
-
-        # FIXME
-        #enqueue_svm_migratemem,
+        SVMAllocation,
         )
 
 if _cl.have_gl():
@@ -267,7 +266,6 @@ class CommandQueueUsedAfterExit(UserWarning):
 
 def compiler_output(text):
     import os
-    from warnings import warn
     if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")):
         warn(text, CompilerWarning)
     else:
@@ -389,7 +387,6 @@ def enable_debugging(platform_or_context):
         import os
         os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
     else:
-        from warnings import warn
         warn("do not know how to enable debugging on '%s'"
                 % platform.name)
 
@@ -428,7 +425,6 @@ def _get_prg(self):
             return self._prg
         else:
             # "no program" can only happen in from-source case.
-            from warnings import warn
             warn("Pre-build attribute access defeats compiler caching.",
                     stacklevel=3)
 
@@ -662,7 +658,6 @@ def device_hashable_model_and_version_identifier(self):
         return ("v1", self.vendor, self.vendor_id, self.name, self.version)
 
     def device_persistent_unique_id(self):
-        from warnings import warn
         warn("Device.persistent_unique_id is deprecated. "
                 "Use Device.hashable_model_and_version_identifier instead.",
                 DeprecationWarning, stacklevel=2)
@@ -684,7 +679,6 @@ def device_persistent_unique_id(self):
 
     def context_init(self, devices, properties, dev_type, cache_dir=None):
         if cache_dir is not None:
-            from warnings import warn
             warn("The 'cache_dir' argument to the Context constructor "
                 "is deprecated and no longer has an effect. "
                 "It was removed because it only applied to the wrapper "
@@ -970,7 +964,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None,
 
         if hostbuf is not None and not \
                 (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
-            from warnings import warn
             warn("'hostbuf' was passed, but no memory flags to make use of it.")
 
         if hostbuf is None and pitches is not None:
@@ -1043,7 +1036,6 @@ def image_init(self, context, flags, format, shape=None, pitches=None,
 
     class _ImageInfoGetter:
         def __init__(self, event):
-            from warnings import warn
             warn("Image.image.attr is deprecated and will go away in 2021. "
                     "Use Image.attr directly, instead.")
 
@@ -1150,24 +1142,19 @@ def memory_map_exit(self, exc_type, exc_val, exc_tb):
             """
 
     if get_cl_header_version() >= (2, 0):
-        svmallocation_old_init = SVMAllocation.__init__
-
-    def svmallocation_init(self, ctx, size, alignment, flags, _interface=None):
-        """
-        :arg ctx: a :class:`Context`
-        :arg flags: some of :class:`svm_mem_flags`.
-        """
-        svmallocation_old_init(self, ctx, size, alignment, flags)
-
-        # mem_flags.READ_ONLY applies to kernels, not the host
-        read_write = True
-        _interface["data"] = (
-                int(self._ptr_as_int()), not read_write)
-
-        self.__array_interface__ = _interface
-
-    if get_cl_header_version() >= (2, 0):
-        SVMAllocation.__init__ = svmallocation_init
+        class _ArrayInterfaceSVMAllocation(SVMAllocation):
+            def __init__(self, ctx, size, alignment, flags, _interface=None,
+                    queue=None):
+                """
+                :arg ctx: a :class:`Context`
+                :arg flags: some of :class:`svm_mem_flags`.
+                """
+                super().__init__(ctx, size, alignment, flags, queue)
+
+                # mem_flags.READ_ONLY applies to kernels, not the host
+                read_write = True
+                _interface["data"] = (
+                        int(self._ptr_as_int()), not read_write)
 
     # }}}
 
@@ -1778,12 +1765,14 @@ def enqueue_copy(queue, dest, src, **kwargs):
         else:
             raise ValueError("invalid dest mem object type")
 
-    elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVM):
+    elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer):
         # to SVM
-        if not isinstance(src, SVM):
+        if not isinstance(src, SVMPointer):
             src = SVM(src)
 
         is_blocking = kwargs.pop("is_blocking", True)
+        assert kwargs.pop("src_offset", 0) == 0
+        assert kwargs.pop("dest_offset", 0) == 0
         return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
 
     else:
@@ -1809,7 +1798,7 @@ def enqueue_copy(queue, dest, src, **kwargs):
                         queue, src, origin, region, dest, **kwargs)
             else:
                 raise ValueError("invalid src mem object type")
-        elif isinstance(src, SVM):
+        elif isinstance(src, SVMPointer):
             # from svm
             # dest is not a SVM instance, otherwise we'd be in the branch above
             is_blocking = kwargs.pop("is_blocking", True)
@@ -1822,6 +1811,26 @@ def enqueue_copy(queue, dest, src, **kwargs):
 # }}}
 
 
+# {{{ enqueue_fill
+
+def enqueue_fill(queue: CommandQueue, dest: Union[MemoryObjectHolder, SVMPointer],
+        pattern: Any, size: int, *, offset: int = 0, wait_for=None) -> Event:
+    """
+    .. versionadded:: 2022.2
+    """
+    if isinstance(dest, MemoryObjectHolder):
+        return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for)
+    elif isinstance(dest, SVMPointer):
+        if offset:
+            raise NotImplementedError("enqueue_fill with SVM does not yet support "
+                    "offsets")
+        return enqueue_svm_memfill(queue, dest, pattern, size, wait_for)
+    else:
+        raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'")
+
+# }}}
+
+
 # {{{ image creation
 
 DTYPE_TO_CHANNEL_TYPE = {
@@ -1927,7 +1936,6 @@ def enqueue_barrier(queue, wait_for=None):
 
 def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
     if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)):
-        from warnings import warn
         warn("The context for this queue does not declare OpenCL 1.2 support, so "
                 "the next thing you might see is a crash")
 
@@ -1944,7 +1952,7 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
 def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None):
     """Fill shared virtual memory with a pattern.
 
-    :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object
+    :arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`.
     :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the
         fill pattern to be used.
     :arg byte_count: The size of the memory to be fill. Defaults to the
@@ -1955,8 +1963,8 @@ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None):
     .. versionadded:: 2016.2
     """
 
-    if not isinstance(dest, SVM):
-        dest = SVM(dest)
+    if not isinstance(dest, SVMPointer):
+        dest = SVMPointer(dest)
 
     return _cl._enqueue_svm_memfill(
             queue, dest, pattern, byte_count=None, wait_for=None)
@@ -1965,7 +1973,7 @@ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None):
 def enqueue_svm_migratemem(queue, svms, flags, wait_for=None):
     """
     :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy`
-        arrays), optionally wrapped in :class:`SVM` objects.
+        arrays), or any implementation of :class:`SVMPointer`.
     :arg flags: a combination of :class:`mem_migration_flags`
 
     |std-enqueue-blurb|
@@ -1983,7 +1991,7 @@ def enqueue_svm_migratemem(queue, svms, flags, wait_for=None):
             wait_for)
 
 
-def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
+def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None):
     """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
     and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
     The array will be allocated in shared virtual memory belonging
@@ -2001,6 +2009,10 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
     will likely want to wrap the returned array in an :class:`SVM` tag.
 
     .. versionadded:: 2016.2
+
+    .. versionchanged:: 2022.2
+
+        *queue* argument added.
     """
 
     dtype = np.dtype(dtype)
@@ -2047,7 +2059,9 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
     if alignment is None:
         alignment = itemsize
 
-    svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface)
+    svm_alloc = _ArrayInterfaceSVMAllocation(
+            ctx, nbytes, alignment, flags, _interface=interface,
+            queue=queue)
     return np.asarray(svm_alloc)
 
 
diff --git a/pyopencl/array.py b/pyopencl/array.py
index cb66121a5..540e43ad5 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -720,9 +720,14 @@ def set(self, ary, queue=None, async_=None, **kwargs):
                     stacklevel=2)
 
         if self.size:
-            event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
-                    device_offset=self.offset,
-                    is_blocking=not async_)
+            if self.offset:
+                event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
+                        device_offset=self.offset,
+                        is_blocking=not async_)
+            else:
+                event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
+                        is_blocking=not async_)
+
             self.add_event(event1)
 
     def _get(self, queue=None, ary=None, async_=None, **kwargs):
@@ -770,9 +775,14 @@ def _get(self, queue=None, ary=None, async_=None, **kwargs):
                     "to associate one.")
 
         if self.size:
-            event1 = cl.enqueue_copy(queue, ary, self.base_data,
-                    device_offset=self.offset,
-                    wait_for=self.events, is_blocking=not async_)
+            if self.offset:
+                event1 = cl.enqueue_copy(queue, ary, self.base_data,
+                        device_offset=self.offset,
+                        wait_for=self.events, is_blocking=not async_)
+            else:
+                event1 = cl.enqueue_copy(queue, ary, self.base_data,
+                        wait_for=self.events, is_blocking=not async_)
+
             self.add_event(event1)
         else:
             event1 = None
@@ -1458,8 +1468,8 @@ def _zero_fill(self, queue=None, wait_for=None):
         # https://github.com/inducer/pyopencl/issues/395
         if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
             self.add_event(
-                    cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0),
-                        self.offset, self.nbytes, wait_for=wait_for))
+                    cl.enqueue_fill(queue, self.base_data, np.int8(0),
+                        self.nbytes, offset=self.offset, wait_for=wait_for))
         else:
             zero = np.zeros((), self.dtype)
             self.fill(zero, queue=queue)
diff --git a/pyopencl/tools.py b/pyopencl/tools.py
index 27adac75b..b414168c6 100644
--- a/pyopencl/tools.py
+++ b/pyopencl/tools.py
@@ -33,7 +33,7 @@
 
 import numpy as np
 from pytools import memoize, memoize_method
-from pyopencl._cl import bitlog2  # noqa: F401
+from pyopencl._cl import bitlog2, get_cl_header_version  # noqa: F401
 from pytools.persistent_dict import KeyBuilder as KeyBuilderBase
 
 import re
@@ -59,10 +59,19 @@ def _register_types():
 # {{{ imported names
 
 from pyopencl._cl import (  # noqa
-        PooledBuffer as PooledBuffer,
+        _tools_PooledBuffer as PooledBuffer,
         _tools_DeferredAllocator as DeferredAllocator,
         _tools_ImmediateAllocator as ImmediateAllocator,
-        MemoryPool as MemoryPool)
+        _tools_MemoryPool as MemoryPool,
+        )
+
+
+if get_cl_header_version() >= (2, 0):
+    from pyopencl._cl import (  # noqa
+            _tools_SVMemoryPool as SVMemoryPool,
+            _tools_PooledSVM as PooledSVM,
+            _tools_SVMAllocator as SVMAllocator,
+            )
 
 # }}}
 
diff --git a/src/mempool.hpp b/src/mempool.hpp
index 44f0fd643..812bd2f1c 100644
--- a/src/mempool.hpp
+++ b/src/mempool.hpp
@@ -233,7 +233,8 @@ namespace PYGPU_PACKAGE
             std::cout
               << "[pool] allocation of size " << size << " served from bin " << bin_nr
               << " which contained " << bin.size() << " entries" << std::endl;
-          return pop_block_from_bin(bin, size);
+          return m_allocator->hand_out_existing_block(
+              pop_block_from_bin(bin, size));
         }
 
          size_type alloc_sz = alloc_size(bin_nr);
@@ -256,7 +257,8 @@ namespace PYGPU_PACKAGE
 
         m_allocator->try_release_blocks();
         if (bin.size())
-          return pop_block_from_bin(bin, size);
+          return m_allocator->hand_out_existing_block(
+              pop_block_from_bin(bin, size));
 
         if (m_trace)
           std::cout << "[pool] allocation still OOM after GC" << std::endl;
@@ -282,7 +284,7 @@ namespace PYGPU_PACKAGE
             "failed to free memory for allocation");
       }
 
-      void free(pointer_type p, size_type size)
+      void free(pointer_type &&p, size_type size)
       {
         --m_active_blocks;
         m_active_bytes -= size;
@@ -291,7 +293,7 @@ namespace PYGPU_PACKAGE
         if (!m_stop_holding)
         {
           inc_held_blocks();
-          get_bin(bin_nr).push_back(p);
+          get_bin(bin_nr).push_back(std::move(p));
 
           if (m_trace)
             std::cout << "[pool] block of size " << size << " returned to bin "
@@ -300,7 +302,7 @@ namespace PYGPU_PACKAGE
         }
         else
         {
-          m_allocator->free(p);
+          m_allocator->free(std::move(p));
           m_managed_bytes -= alloc_size(bin_nr);
         }
       }
@@ -313,7 +315,7 @@ namespace PYGPU_PACKAGE
 
           while (bin.size())
           {
-            m_allocator->free(bin.back());
+            m_allocator->free(std::move(bin.back()));
             m_managed_bytes -= alloc_size(bin_pair.first);
             bin.pop_back();
 
@@ -353,7 +355,7 @@ namespace PYGPU_PACKAGE
 
           if (bin.size())
           {
-            m_allocator->free(bin.back());
+            m_allocator->free(std::move(bin.back()));
             m_managed_bytes -= alloc_size(bin_pair.first);
             bin.pop_back();
 
@@ -379,7 +381,7 @@ namespace PYGPU_PACKAGE
 
       pointer_type pop_block_from_bin(bin_t &bin, size_type size)
       {
-        pointer_type result = bin.back();
+        pointer_type result(std::move(bin.back()));
         bin.pop_back();
 
         dec_held_blocks();
@@ -399,7 +401,7 @@ namespace PYGPU_PACKAGE
       typedef typename Pool::pointer_type pointer_type;
       typedef typename Pool::size_type size_type;
 
-    private:
+    protected:
       PYGPU_SHARED_PTR<pool_type> m_pool;
 
       pointer_type m_ptr;
@@ -421,7 +423,7 @@ namespace PYGPU_PACKAGE
       {
         if (m_valid)
         {
-          m_pool->free(m_ptr, m_size);
+          m_pool->free(std::move(m_ptr), m_size);
           m_valid = false;
         }
         else
@@ -435,16 +437,8 @@ namespace PYGPU_PACKAGE
 #endif
               );
       }
-
-      pointer_type ptr() const
-      { return m_ptr; }
-
-      size_type size() const
-      { return m_size; }
   };
 }
 
 
-
-
 #endif
diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp
index 98964056f..ad60372db 100644
--- a/src/wrap_cl.hpp
+++ b/src/wrap_cl.hpp
@@ -137,10 +137,8 @@
 // }}}
 
 
+// {{{ macros and typedefs for wrappers
 
-
-
-// {{{ tools
 #if PY_VERSION_HEX >= 0x02050000
   typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T;
 #else
@@ -229,8 +227,6 @@
     }
 
 
-
-
 #define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \
   { \
     bool failed_with_mem_error = false; \
@@ -260,8 +256,20 @@
     } \
   }
 
+
+#define PYOPENCL_GET_SVM_SIZE(NAME) \
+  size_t NAME##_size; \
+  bool NAME##_has_size = false; \
+  try \
+  { \
+    NAME##_size = NAME.size(); \
+    NAME##_has_size = true; \
+  } \
+  catch (size_not_available)  { }
+
 // }}}
 
+
 // {{{ tracing and error reporting
 #ifdef PYOPENCL_TRACE
   #define PYOPENCL_PRINT_CALL_TRACE(NAME) \
@@ -329,6 +337,7 @@
 
 // }}}
 
+
 // {{{ get_info helpers
 #define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \
   { \
@@ -383,6 +392,7 @@
 
 // }}}
 
+
 // {{{ event helpers --------------------------------------------------------------
 #define PYOPENCL_PARSE_WAIT_FOR \
     cl_uint num_events_in_wait_list = 0; \
@@ -424,7 +434,9 @@
 
 // }}}
 
+
 // {{{ equality testing
+
 #define PYOPENCL_EQUALITY_TESTS(cls) \
     bool operator==(cls const &other) const \
     { return data() == other.data(); } \
@@ -432,8 +444,8 @@
     { return data() != other.data(); } \
     long hash() const \
     { return (long) (intptr_t) data(); }
-// }}}
 
+// }}}
 
 
 namespace pyopencl
@@ -496,6 +508,19 @@ namespace pyopencl
   // }}}
 
 
+  // {{{ utility functions
+
+  inline bool is_queue_out_of_order(cl_command_queue queue)
+  {
+      cl_command_queue_properties param_value;
+      PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo,
+          (queue, CL_QUEUE_PROPERTIES, sizeof(param_value), &param_value, 0));
+      return param_value & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+  }
+
+  // }}}
+
+
   // {{{ buffer interface helper
 
 
@@ -1655,6 +1680,82 @@ namespace pyopencl
   // }}}
 
 
+  // {{{ command_queue_ref
+
+  // In contrast to command_queue, command_queue_ref is "nullable", i.e.
+  // it is a RAII *optional* reference to a command queue.
+
+  class command_queue_ref
+  {
+    private:
+      bool m_valid;
+      cl_command_queue m_queue;
+
+    public:
+      command_queue_ref()
+        : m_valid(false)
+      {}
+
+      command_queue_ref(cl_command_queue queue)
+        : m_valid(true), m_queue(queue)
+      {
+        PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
+      }
+
+      command_queue_ref(command_queue_ref &&src)
+        : m_valid(src.m_valid), m_queue(src.m_queue)
+      {
+        src.m_valid = false;
+      }
+
+      command_queue_ref(const command_queue_ref &)
+      {
+        throw error("command_queue_ref", CL_INVALID_VALUE,
+            "command_queue_ref copy constructor is never supposed to be called; "
+            "all notional invocations should be eliminated because of NRVO");
+      }
+
+      command_queue_ref &operator=(const command_queue_ref &) = delete;
+
+      ~command_queue_ref()
+      {
+        reset();
+      }
+
+      bool is_valid() const
+      {
+        return m_valid;
+      }
+
+      cl_command_queue data() const
+      {
+        if (m_valid)
+          return m_queue;
+        else
+          throw error("command_queue_ref.data", CL_INVALID_VALUE,
+              "command_queue_ref is not valid");
+      }
+
+      void reset()
+      {
+        if (m_valid)
+          PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, (m_queue));
+        m_valid = false;
+      }
+
+      void set(cl_command_queue queue)
+      {
+        if (m_valid)
+          PYOPENCL_CALL_GUARDED(clReleaseCommandQueue, (m_queue));
+        m_queue = queue;
+        PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
+        m_valid = true;
+      }
+  };
+
+  // }}}
+
+
   // {{{ event/synchronization
 
   class event : noncopyable
@@ -3441,11 +3542,26 @@ namespace pyopencl
   // }}}
 
 
-  // {{{ svm
-
 #if PYOPENCL_CL_VERSION >= 0x2000
 
-  class svm_arg_wrapper
+  // {{{ svm pointer
+
+  class size_not_available { };
+
+  class svm_pointer
+  {
+    public:
+      virtual void *svm_ptr() const = 0;
+      // may throw size_not_available
+      virtual size_t size() const = 0;
+  };
+
+  // }}}
+
+
+  // {{{ svm_arg_wrapper
+
+  class svm_arg_wrapper : public svm_pointer
   {
     private:
       void *m_ptr;
@@ -3468,7 +3584,7 @@ namespace pyopencl
         m_size = ward->m_buf.len;
       }
 
-      void *ptr() const
+      void *svm_ptr() const
       {
         return m_ptr;
       }
@@ -3478,17 +3594,34 @@ namespace pyopencl
       }
   };
 
+  // }}}
+
+
+  // {{{ svm_allocation
 
-  class svm_allocation : noncopyable
+  class svm_allocation : public svm_pointer
   {
     private:
       std::shared_ptr<context> m_context;
       void *m_allocation;
+      size_t m_size;
+      command_queue_ref m_queue;
+      // FIXME Should keep a list of events so that we can wait for users
+      // to finish in the case of out-of-order queues.
 
     public:
-      svm_allocation(std::shared_ptr<context> const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags)
-        : m_context(ctx)
+      svm_allocation(std::shared_ptr<context> const &ctx, size_t size, cl_uint alignment,
+          cl_svm_mem_flags flags, const command_queue *queue = nullptr)
+        : m_context(ctx), m_size(size)
       {
+        if (queue)
+        {
+          m_queue.set(queue->data());
+          if (is_queue_out_of_order(m_queue.data()))
+            throw error("SVMAllocation.__init__", CL_INVALID_VALUE,
+                "supplying an out-of-order queue to SVMAllocation is invalid");
+        }
+
         PYOPENCL_PRINT_CALL_TRACE("clSVMalloc");
         m_allocation = clSVMAlloc(
             ctx->data(),
@@ -3498,6 +3631,9 @@ namespace pyopencl
           throw pyopencl::error("clSVMAlloc", CL_OUT_OF_RESOURCES);
       }
 
+      svm_allocation(const svm_allocation &) = delete;
+      svm_allocation &operator=(const svm_allocation &) = delete;
+
       ~svm_allocation()
       {
         if (m_allocation)
@@ -3510,8 +3646,20 @@ namespace pyopencl
           throw error("SVMAllocation.release", CL_INVALID_VALUE,
               "trying to double-unref svm allocation");
 
-        clSVMFree(m_context->data(), m_allocation);
-        m_allocation = nullptr;
+        if (m_queue.is_valid())
+        {
+          PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, (
+                m_queue.data(), 1, &m_allocation,
+                nullptr, nullptr,
+                0, nullptr, nullptr));
+          m_queue.reset();
+        }
+        else
+        {
+          PYOPENCL_PRINT_CALL_TRACE("clSVMFree");
+          clSVMFree(m_context->data(), m_allocation);
+          m_allocation = nullptr;
+        }
       }
 
       void enqueue_release(command_queue &queue, py::object py_wait_for)
@@ -3532,11 +3680,16 @@ namespace pyopencl
         m_allocation = nullptr;
       }
 
-      void *ptr() const
+      void *svm_ptr() const
       {
         return m_allocation;
       }
 
+      size_t size() const
+      {
+        return m_size;
+      }
+
       intptr_t ptr_as_int() const
       {
         return (intptr_t) m_allocation;
@@ -3551,22 +3704,95 @@ namespace pyopencl
       {
         return m_allocation != other.m_allocation;
       }
+
+      void bind_to_queue(command_queue const &queue)
+      {
+        if (is_queue_out_of_order(queue.data()))
+          throw error("SVMAllocation.bind_to_queue", CL_INVALID_VALUE,
+              "supplying an out-of-order queue to SVMAllocation is invalid");
+
+        if (m_queue.is_valid())
+        {
+          // make sure synchronization promises stay valid in new queue
+          cl_event evt;
+
+          PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_queue.data(), &evt));
+          PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (queue.data(), 1, &evt));
+        }
+
+        m_queue.set(queue.data());
+      }
+
+      void unbind_from_queue()
+      {
+        // NOTE: This absolves the allocation from any synchronization promises
+        // made. Keeping those before calling this method is the responsibility
+        // of the user.
+        m_queue.reset();
+      }
   };
 
+  // }}}
+
+
+  // {{{ svm operations
 
   inline
   event *enqueue_svm_memcpy(
       command_queue &cq,
       cl_bool is_blocking,
-      svm_arg_wrapper &dst, svm_arg_wrapper &src,
-      py::object py_wait_for
+      svm_pointer &dst, svm_pointer &src,
+      py::object py_wait_for,
+      py::object byte_count_py
       )
   {
     PYOPENCL_PARSE_WAIT_FOR;
 
-    if (src.size() != dst.size())
+    // {{{ process size
+
+    PYOPENCL_GET_SVM_SIZE(src);
+    PYOPENCL_GET_SVM_SIZE(dst);
+
+    size_t size;
+    bool have_size = false;
+
+    if (src_has_size)
+    {
+      size = src_size;
+      have_size = true;
+    }
+    if (dst_has_size)
+    {
+      if (have_size)
+      {
+        if (!byte_count_py.is_none())
+          size = std::min(size, dst_size);
+        else if (size != dst_size)
+          throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE,
+              "sizes of source and destination buffer do not match");
+      }
+      else
+      {
+        size = dst_size;
+        have_size = true;
+      }
+    }
+
+    if (!byte_count_py.is_none())
+    {
+      size_t byte_count = byte_count_py.cast<size_t>();
+      if (have_size && byte_count > size)
+        throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE,
+            "specified byte_count larger than size of source or destination buffers");
+      size = byte_count;
+      have_size = true;
+    }
+
+    if (!have_size)
       throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE,
-          "sizes of source and destination buffer do not match");
+          "size not passed and could not be determined");
+
+    // }}}
 
     cl_event evt;
     PYOPENCL_CALL_GUARDED(
@@ -3574,8 +3800,8 @@ namespace pyopencl
         (
           cq.data(),
           is_blocking,
-          dst.ptr(), src.ptr(),
-          dst.size(),
+          dst.svm_ptr(), src.svm_ptr(),
+          size,
           PYOPENCL_WAITLIST_ARGS,
           &evt
         ));
@@ -3587,7 +3813,7 @@ namespace pyopencl
   inline
   event *enqueue_svm_memfill(
       command_queue &cq,
-      svm_arg_wrapper &dst, py::object py_pattern,
+      svm_pointer &dst, py::object py_pattern,
       py::object byte_count,
       py::object py_wait_for
       )
@@ -3604,18 +3830,41 @@ namespace pyopencl
     pattern_ptr = pattern_ward->m_buf.buf;
     pattern_len = pattern_ward->m_buf.len;
 
-    size_t fill_size = dst.size();
+    // {{{ process size
+
+    PYOPENCL_GET_SVM_SIZE(dst);
+
+    size_t size;
+    bool have_size = false;
+    if (dst_has_size)
+    {
+      size = dst_size;
+      have_size = true;
+    }
     if (!byte_count.is_none())
-      fill_size = py::cast<size_t>(byte_count);
+    {
+      size_t user_size = py::cast<size_t>(byte_count);
+      if (have_size && user_size > size)
+        throw error("enqueue_svm_memfill", CL_INVALID_VALUE,
+            "byte_count too large for specified SVM buffer");
+    }
+
+    if (!have_size)
+    {
+      throw error("enqueue_svm_memfill", CL_INVALID_VALUE,
+          "byte_count not passed and could not be determined");
+    }
+
+    // }}}
 
     cl_event evt;
     PYOPENCL_CALL_GUARDED(
         clEnqueueSVMMemFill,
         (
           cq.data(),
-          dst.ptr(), pattern_ptr,
+          dst.svm_ptr(), pattern_ptr,
           pattern_len,
-          fill_size,
+          size,
           PYOPENCL_WAITLIST_ARGS,
           &evt
         ));
@@ -3629,12 +3878,40 @@ namespace pyopencl
       command_queue &cq,
       cl_bool is_blocking,
       cl_map_flags flags,
-      svm_arg_wrapper &svm,
-      py::object py_wait_for
+      svm_pointer &svm,
+      py::object py_wait_for,
+      py::object user_size_py
       )
   {
     PYOPENCL_PARSE_WAIT_FOR;
 
+    // {{{ process size
+
+    PYOPENCL_GET_SVM_SIZE(svm);
+
+    size_t size;
+    bool have_size = false;
+    if (svm_has_size)
+    {
+      size = svm_size;
+      have_size = true;
+    }
+    if (!user_size_py.is_none())
+    {
+      size_t user_size = py::cast<size_t>(user_size_py);
+      if (have_size && user_size > size)
+        throw error("enqueue_svm_memfill", CL_INVALID_VALUE,
+            "user-provided size too large for specified SVM buffer");
+    }
+
+    if (!have_size)
+    {
+      throw error("enqueue_svm_mem_map", CL_INVALID_VALUE,
+          "size not passed and could not be determined");
+    }
+
+    // }}}
+
     cl_event evt;
     PYOPENCL_CALL_GUARDED(
         clEnqueueSVMMap,
@@ -3642,7 +3919,7 @@ namespace pyopencl
           cq.data(),
           is_blocking,
           flags,
-          svm.ptr(), svm.size(),
+          svm.svm_ptr(), size,
           PYOPENCL_WAITLIST_ARGS,
           &evt
         ));
@@ -3654,7 +3931,7 @@ namespace pyopencl
   inline
   event *enqueue_svm_unmap(
       command_queue &cq,
-      svm_arg_wrapper &svm,
+      svm_pointer &svm,
       py::object py_wait_for
       )
   {
@@ -3665,7 +3942,7 @@ namespace pyopencl
         clEnqueueSVMUnmap,
         (
           cq.data(),
-          svm.ptr(),
+          svm.svm_ptr(),
           PYOPENCL_WAITLIST_ARGS,
           &evt
         ));
@@ -3691,9 +3968,9 @@ namespace pyopencl
 
     for (py::handle py_svm: svms)
     {
-      svm_arg_wrapper &svm(py::cast<svm_arg_wrapper &>(py_svm));
+      svm_pointer &svm(py::cast<svm_pointer &>(py_svm));
 
-      svm_pointers.push_back(svm.ptr());
+      svm_pointers.push_back(svm.svm_ptr());
       sizes.push_back(svm.size());
     }
 
@@ -4486,10 +4763,10 @@ namespace pyopencl
       }
 
 #if PYOPENCL_CL_VERSION >= 0x2000
-      void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp)
+      void set_arg_svm(cl_uint arg_index, svm_pointer const &wrp)
       {
         PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer,
-            (m_kernel, arg_index, wrp.ptr()));
+            (m_kernel, arg_index, wrp.svm_ptr()));
       }
 #endif
 
@@ -4511,7 +4788,7 @@ namespace pyopencl
 #if PYOPENCL_CL_VERSION >= 0x2000
         try
         {
-          set_arg_svm(arg_index, arg.cast<svm_arg_wrapper const &>());
+          set_arg_svm(arg_index, arg.cast<svm_pointer const &>());
           return;
         }
         catch (py::cast_error &) { }
diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp
index 0c9a0d1b1..3c40e1c90 100644
--- a/src/wrap_cl_part_2.cpp
+++ b/src/wrap_cl_part_2.cpp
@@ -295,25 +295,50 @@ void pyopencl_expose_part_2(py::module &m)
   // {{{ svm
 
 #if PYOPENCL_CL_VERSION >= 0x2000
+  {
+    typedef svm_pointer cls;
+    py::class_<cls>(m, "SVMPointer", py::dynamic_attr())
+      .def("_ptr_as_int", [](cls &self) { return (intptr_t) self.svm_ptr(); })
+      .def("_size", [](cls &self) -> py::object
+          {
+            try
+            {
+              return py::cast(self.size());
+            }
+            catch (size_not_available)
+            {
+              return py::none();
+            }
+          })
+      ;
+  }
+
   {
     typedef svm_arg_wrapper cls;
-    py::class_<cls>(m, "SVM", py::dynamic_attr())
+    py::class_<cls, svm_pointer>(m, "SVM", py::dynamic_attr())
       .def(py::init<py::object>())
       ;
   }
 
   {
     typedef svm_allocation cls;
-    py::class_<cls>(m, "SVMAllocation", py::dynamic_attr())
-      .def(py::init<std::shared_ptr<context>, size_t, cl_uint, cl_svm_mem_flags>())
+    py::class_<cls, svm_pointer>(m, "SVMAllocation", py::dynamic_attr())
+      .def(py::init<std::shared_ptr<context>, size_t, cl_uint, cl_svm_mem_flags, const command_queue *>(),
+          py::arg("context"),
+          py::arg("size"),
+          py::arg("alignment"),
+          py::arg("flags"),
+          py::arg("queue").none(true)=py::none()
+          )
       .DEF_SIMPLE_METHOD(release)
       .def("enqueue_release", &cls::enqueue_release,
           ":returns: a :class:`pyopencl.Event`\n\n"
           "|std-enqueue-blurb|")
-      .def("_ptr_as_int", &cls::ptr_as_int)
       .def(py::self == py::self)
       .def(py::self != py::self)
-      .def("__hash__", &cls::ptr_as_int)
+      .def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); })
+      .DEF_SIMPLE_METHOD(bind_to_queue)
+      .DEF_SIMPLE_METHOD(unbind_from_queue)
       ;
   }
 
@@ -322,7 +347,8 @@ void pyopencl_expose_part_2(py::module &m)
       py::arg("is_blocking"),
       py::arg("dst"),
       py::arg("src"),
-      py::arg("wait_for")=py::none()
+      py::arg("wait_for")=py::none(),
+      py::arg("byte_count")=py::none()
       );
 
   m.def("_enqueue_svm_memfill", enqueue_svm_memfill,
@@ -338,7 +364,8 @@ void pyopencl_expose_part_2(py::module &m)
       py::arg("is_blocking"),
       py::arg("flags"),
       py::arg("svm"),
-      py::arg("wait_for")=py::none()
+      py::arg("wait_for")=py::none(),
+      py::arg("size")=py::none()
       );
 
   m.def("_enqueue_svm_unmap", enqueue_svm_unmap,
diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp
index 6b014ba5e..34b5e1cea 100644
--- a/src/wrap_mempool.cpp
+++ b/src/wrap_mempool.cpp
@@ -1,4 +1,4 @@
-// Warp memory pool
+// Wrap memory pool
 //
 // Copyright (C) 2009 Andreas Kloeckner
 //
@@ -57,12 +57,18 @@ namespace
       {
         return false;
       }
+
       virtual pointer_type allocate(size_type s)
       {
         return nullptr;
       }
 
-      void free(pointer_type p)
+      virtual pointer_type hand_out_existing_block(pointer_type &&p)
+      {
+        return p;
+      }
+
+      void free(pointer_type &&p)
       { }
 
       void try_release_blocks()
@@ -70,14 +76,16 @@ namespace
   };
 
 
-  class cl_allocator_base
+  // {{{ buffer allocators
+
+  class buffer_allocator_base
   {
     protected:
       std::shared_ptr<pyopencl::context> m_context;
       cl_mem_flags m_flags;
 
     public:
-      cl_allocator_base(std::shared_ptr<pyopencl::context> const &ctx,
+      buffer_allocator_base(std::shared_ptr<pyopencl::context> const &ctx,
           cl_mem_flags flags=CL_MEM_READ_WRITE)
         : m_context(ctx), m_flags(flags)
       {
@@ -86,21 +94,26 @@ namespace
               "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags");
       }
 
-      cl_allocator_base(cl_allocator_base const &src)
+      buffer_allocator_base(buffer_allocator_base const &src)
       : m_context(src.m_context), m_flags(src.m_flags)
       { }
 
-      virtual ~cl_allocator_base()
+      virtual ~buffer_allocator_base()
       { }
 
       typedef cl_mem pointer_type;
       typedef size_t size_type;
 
-      virtual cl_allocator_base *copy() const = 0;
+      virtual buffer_allocator_base *copy() const = 0;
       virtual bool is_deferred() const = 0;
       virtual pointer_type allocate(size_type s) = 0;
 
-      void free(pointer_type p)
+      virtual pointer_type hand_out_existing_block(pointer_type p)
+      {
+        return p;
+      }
+
+      void free(pointer_type &&p)
       {
         PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p));
       }
@@ -111,20 +124,21 @@ namespace
       }
   };
 
-  class cl_deferred_allocator : public cl_allocator_base
+
+  class deferred_buffer_allocator : public buffer_allocator_base
   {
     private:
-      typedef cl_allocator_base super;
+      typedef buffer_allocator_base super;
 
     public:
-      cl_deferred_allocator(std::shared_ptr<pyopencl::context> const &ctx,
+      deferred_buffer_allocator(std::shared_ptr<pyopencl::context> const &ctx,
           cl_mem_flags flags=CL_MEM_READ_WRITE)
         : super(ctx, flags)
       { }
 
-      cl_allocator_base *copy() const
+      buffer_allocator_base *copy() const
       {
-        return new cl_deferred_allocator(*this);
+        return new deferred_buffer_allocator(*this);
       }
 
       bool is_deferred() const
@@ -141,26 +155,26 @@ namespace
 
   const unsigned zero = 0;
 
-  class cl_immediate_allocator : public cl_allocator_base
+  class immediate_buffer_allocator : public buffer_allocator_base
   {
     private:
-      typedef cl_allocator_base super;
+      typedef buffer_allocator_base super;
       pyopencl::command_queue m_queue;
 
     public:
-      cl_immediate_allocator(pyopencl::command_queue &queue,
+      immediate_buffer_allocator(pyopencl::command_queue &queue,
           cl_mem_flags flags=CL_MEM_READ_WRITE)
         : super(std::shared_ptr<pyopencl::context>(queue.get_context()), flags),
         m_queue(queue.data(), /*retain*/ true)
       { }
 
-      cl_immediate_allocator(cl_immediate_allocator const &src)
+      immediate_buffer_allocator(immediate_buffer_allocator const &src)
         : super(src), m_queue(src.m_queue)
       { }
 
-      cl_allocator_base *copy() const
+      buffer_allocator_base *copy() const
       {
-        return new cl_immediate_allocator(*this);
+        return new immediate_buffer_allocator(*this);
       }
 
       bool is_deferred() const
@@ -210,11 +224,13 @@ namespace
       }
   };
 
+  // }}}
 
 
+  // {{{ buffer_allocator_call
 
   inline
-  pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size)
+  pyopencl::buffer *buffer_allocator_call(buffer_allocator_base &alloc, size_t size)
   {
     cl_mem mem;
     int try_count = 0;
@@ -256,16 +272,18 @@ namespace
     }
   }
 
+  // }}}
 
 
+  // {{{ pooled_buffer
 
   class pooled_buffer
-    : public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >,
+    : public pyopencl::pooled_allocation<pyopencl::memory_pool<buffer_allocator_base> >,
     public pyopencl::memory_object_holder
   {
     private:
       typedef
-        pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >
+        pyopencl::pooled_allocation<pyopencl::memory_pool<buffer_allocator_base> >
         super;
 
     public:
@@ -275,22 +293,233 @@ namespace
       { }
 
       const super::pointer_type data() const
-      { return ptr(); }
+      { return m_ptr; }
+
+      size_t size() const
+      {
+        return m_size;
+      }
   };
 
+  // }}}
 
 
+  // {{{ buffer_pool_allocate
 
-  pooled_buffer *device_pool_allocate(
-      std::shared_ptr<pyopencl::memory_pool<cl_allocator_base> > pool,
-      pyopencl::memory_pool<cl_allocator_base>::size_type sz)
+  pooled_buffer *buffer_pool_allocate(
+      std::shared_ptr<pyopencl::memory_pool<buffer_allocator_base> > pool,
+      pyopencl::memory_pool<buffer_allocator_base>::size_type sz)
   {
     return new pooled_buffer(pool, sz);
   }
 
+  // }}}
+
+
+#if PYOPENCL_CL_VERSION >= 0x2000
+
+  // {{{ svm allocator
 
+  // FIXME: Does this need deferred and immediate just like the buffer-level
+  // allocators? (I.e. can I tell whether I am out of memory just from allocations?)
+  struct svm_held_pointer
+  {
+    void *ptr;
+    pyopencl::command_queue_ref queue;
+  };
 
 
+  class svm_allocator
+  {
+    public:
+      typedef svm_held_pointer pointer_type;
+      typedef size_t size_type;
+
+    protected:
+      std::shared_ptr<pyopencl::context> m_context;
+      cl_uint m_alignment;
+      cl_svm_mem_flags m_flags;
+      pyopencl::command_queue_ref m_queue;
+
+
+    public:
+      svm_allocator(std::shared_ptr<pyopencl::context> const &ctx,
+          cl_uint alignment, cl_svm_mem_flags flags=CL_MEM_READ_WRITE,
+          pyopencl::command_queue *queue=nullptr)
+        : m_context(ctx), m_alignment(alignment), m_flags(flags)
+      {
+        if (queue)
+          m_queue.set(queue->data());
+      }
+
+      svm_allocator(svm_allocator const &src)
+      : m_context(src.m_context), m_alignment(src.m_alignment),
+      m_flags(src.m_flags)
+      { }
+
+      virtual ~svm_allocator()
+      { }
+
+      virtual svm_allocator *copy() const
+      {
+        return new svm_allocator(m_context, m_alignment, m_flags);
+      }
+
+      virtual bool is_deferred() const
+      {
+        // FIXME: I don't know whether that's true.
+        return false;
+      }
+
+      pointer_type allocate(size_type size)
+      {
+        if (size == 0)
+          return { nullptr, nullptr };
+
+        PYOPENCL_PRINT_CALL_TRACE("clSVMalloc");
+        return {
+          clSVMAlloc(m_context->data(), m_flags, size, m_alignment),
+          pyopencl::command_queue_ref(m_queue.data())
+        };
+      }
+
+      virtual pointer_type hand_out_existing_block(pointer_type &&p)
+      {
+        if (m_queue.is_valid())
+        {
+          if (p.queue.is_valid())
+          {
+            // make sure synchronization promises stay valid in new queue
+            cl_event evt;
+
+            PYOPENCL_CALL_GUARDED(clEnqueueMarker, (p.queue.data(), &evt));
+            PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (m_queue.data(), 1, &evt));
+          }
+          p.queue.set(m_queue.data());
+        }
+        return p;
+      }
+
+      void free(pointer_type &&p)
+      {
+        if (p.queue.is_valid())
+        {
+          PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, (
+                p.queue.data(), 1, &p.ptr,
+                nullptr, nullptr,
+                0, nullptr, nullptr));
+          p.queue.reset();
+        }
+        else
+        {
+          PYOPENCL_PRINT_CALL_TRACE("clSVMFree");
+          clSVMFree(m_context->data(), p.ptr);
+        }
+      }
+
+      void try_release_blocks()
+      {
+        pyopencl::run_python_gc();
+      }
+  };
+
+  // }}}
+
+
+  // {{{ svm_allocator_call
+
+  inline
+  svm_held_pointer svm_allocator_call(svm_allocator &alloc, size_t size)
+  {
+    svm_held_pointer mem;
+    int try_count = 0;
+    while (true)
+    {
+      try
+      {
+        return alloc.allocate(size);
+      }
+      catch (pyopencl::error &e)
+      {
+        if (!e.is_out_of_memory())
+          throw;
+        if (++try_count == 2)
+          throw;
+      }
+
+      alloc.try_release_blocks();
+    }
+  }
+
+  // }}}
+
+
+  // {{{ pooled_svm
+
+  class pooled_svm
+    : public pyopencl::pooled_allocation<pyopencl::memory_pool<svm_allocator>>,
+    public pyopencl::svm_pointer
+  {
+    private:
+      typedef
+        pyopencl::pooled_allocation<pyopencl::memory_pool<svm_allocator>>
+        super;
+
+    public:
+      pooled_svm(
+          std::shared_ptr<super::pool_type> p, super::size_type s)
+        : super(p, s)
+      { }
+
+      void *svm_ptr() const
+      { return m_ptr.ptr; }
+
+      size_t size() const
+      { return m_size; }
+
+      void bind_to_queue(pyopencl::command_queue const &queue)
+      {
+        if (pyopencl::is_queue_out_of_order(queue.data()))
+          throw pyopencl::error("PooledSVM.bind_to_queue", CL_INVALID_VALUE,
+              "supplying an out-of-order queue to SVMAllocation is invalid");
+
+        if (m_ptr.queue.is_valid())
+        {
+          // make sure synchronization promises stay valid in new queue
+          cl_event evt;
+
+          PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_ptr.queue.data(), &evt));
+          PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (queue.data(), 1, &evt));
+        }
+
+        m_ptr.queue.set(queue.data());
+      }
+
+      void unbind_from_queue()
+      {
+        // NOTE: This absolves the allocation from any synchronization promises
+        // made. Keeping those before calling this method is the responsibility
+        // of the user.
+        m_ptr.queue.reset();
+      }
+  };
+
+  // }}}
+
+
+  // {{{ svm_pool_allocate
+
+  pooled_svm *svm_pool_allocate(
+      std::shared_ptr<pyopencl::memory_pool<svm_allocator> > pool,
+      pyopencl::memory_pool<svm_allocator>::size_type sz)
+  {
+    return new pooled_svm(pool, sz);
+  }
+
+  // }}}
+
+#endif
+
   template<class Wrapper>
   void expose_memory_pool(Wrapper &wrapper)
   {
@@ -316,11 +545,11 @@ void pyopencl_expose_mempool(py::module &m)
   m.def("bitlog2", pyopencl::bitlog2);
 
   {
-    typedef cl_allocator_base cls;
+    typedef buffer_allocator_base cls;
     py::class_<cls /*, boost::noncopyable */> wrapper(
         m, "_tools_AllocatorBase"/*, py::no_init */);
     wrapper
-      .def("__call__", allocator_call)
+      .def("__call__", buffer_allocator_call)
       ;
 
   }
@@ -345,8 +574,8 @@ void pyopencl_expose_mempool(py::module &m)
   }
 
   {
-    typedef cl_deferred_allocator cls;
-    py::class_<cls, cl_allocator_base> wrapper(
+    typedef deferred_buffer_allocator cls;
+    py::class_<cls, buffer_allocator_base> wrapper(
         m, "_tools_DeferredAllocator");
     wrapper
       .def(py::init<
@@ -359,8 +588,8 @@ void pyopencl_expose_mempool(py::module &m)
   }
 
   {
-    typedef cl_immediate_allocator cls;
-    py::class_<cls, cl_allocator_base> wrapper(
+    typedef immediate_buffer_allocator cls;
+    py::class_<cls, buffer_allocator_base> wrapper(
         m, "_tools_ImmediateAllocator");
     wrapper
       .def(py::init<pyopencl::command_queue &>())
@@ -370,18 +599,18 @@ void pyopencl_expose_mempool(py::module &m)
   }
 
   {
-    typedef pyopencl::memory_pool<cl_allocator_base> cls;
+    typedef pyopencl::memory_pool<buffer_allocator_base> cls;
 
     py::class_<
       cls, /* boost::noncopyable, */
-      std::shared_ptr<cls>> wrapper( m, "MemoryPool");
+      std::shared_ptr<cls>> wrapper( m, "_tools_MemoryPool");
     wrapper
-      .def(py::init<cl_allocator_base const &, unsigned>(),
+      .def(py::init<buffer_allocator_base const &, unsigned>(),
           py::arg("allocator"),
           py::arg("leading_bits_in_bin_id")=4
           )
-      .def("allocate", device_pool_allocate)
-      .def("__call__", device_pool_allocate)
+      .def("allocate", buffer_pool_allocate)
+      .def("__call__", buffer_pool_allocate)
       // undoc for now
       .DEF_SIMPLE_METHOD(set_trace)
       ;
@@ -393,8 +622,70 @@ void pyopencl_expose_mempool(py::module &m)
     typedef pooled_buffer cls;
     py::class_<cls, /* boost::noncopyable, */
       pyopencl::memory_object_holder>(
-          m, "PooledBuffer"/* , py::no_init */)
+          m, "_tools_PooledBuffer"/* , py::no_init */)
+      .def("release", &cls::free)
+      // undocumented for now, for consistency with SVM
+      .def("bind_to_queue", [](cls &self, pyopencl::command_queue &queue) { /* no-op */ })
+      .def("unbind_from_queue", [](cls &self) { /* no-op */ })
+      ;
+  }
+
+#if PYOPENCL_CL_VERSION >= 0x2000
+  {
+    typedef pyopencl::memory_pool<svm_allocator> cls;
+
+    py::class_<
+      cls, /* boost::noncopyable, */
+      std::shared_ptr<cls>> wrapper( m, "_tools_SVMemoryPool");
+    wrapper
+      .def(py::init<svm_allocator const &, unsigned>(),
+          py::arg("allocator"),
+          py::arg("leading_bits_in_bin_id")=4
+          )
+      .def("allocate", svm_pool_allocate)
+      .def("__call__", svm_pool_allocate)
+      // undoc for now
+      .DEF_SIMPLE_METHOD(set_trace)
+      ;
+
+    expose_memory_pool(wrapper);
+  }
+
+  {
+    typedef pooled_svm cls;
+    py::class_<cls, /* boost::noncopyable, */
+      pyopencl::svm_pointer>(
+          m, "_tools_PooledSVM"/* , py::no_init */)
       .def("release", &cls::free)
+      .def("__eq__", [](const cls &self, const cls &other)
+          { return self.svm_ptr() == other.svm_ptr(); })
+      .def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); })
+      .DEF_SIMPLE_METHOD(bind_to_queue)
+      .DEF_SIMPLE_METHOD(unbind_from_queue)
       ;
   }
+
+  {
+    typedef svm_allocator cls;
+    py::class_<cls> wrapper(
+        m, "_tools_SVMAllocator");
+    wrapper
+      .def(py::init<std::shared_ptr<pyopencl::context>  const &, cl_uint, cl_uint, pyopencl::command_queue *>(),
+          py::arg("context"),
+          py::arg("alignment"),
+          py::arg("flags")=CL_MEM_READ_WRITE,
+          py::arg("command_queue").none(true)=nullptr
+          )
+      .def("__call__", svm_allocator_call)
+      ;
+  }
+
+  {
+    typedef svm_held_pointer cls;
+    py::class_<cls> wrapper(
+        m, "_tools_SVMHeldPointer");
+  }
+#endif
 }
+
+// vim: foldmethod=marker