From ba7c2056034f93d1eb6f5594b6aa4185dbf8cffe Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 30 Aug 2022 22:01:37 -0500 Subject: [PATCH 01/19] LazilyPyOpenCLCompilingFunctionCaller: limit arg size for GPUs --- arraycontext/impl/pytato/compile.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 07cb57b9..6392fa2f 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -415,12 +415,22 @@ def _dag_to_transformed_pytato_prg(self, dict_of_named_arrays, *, prg_id=None): prg_id, "pre_generate_loopy", pt_dict_of_named_arrays) with ProcessLogger(logger, f"generate_loopy for '{prg_id}'"): + import pyopencl as cl + dev = self.actx.context.devices[0] + limit = dev.max_parameter_size + target = None + if dev.type & cl.device_type.GPU: + # Leave some extra space since our sizes are estimates + target = lp.PyOpenCLTarget(limit_arg_size_nbytes=limit//2) + pytato_program = pt.generate_loopy( pt_dict_of_named_arrays, options=lp.Options( return_dict=True, no_numpy=True), - function_name=_prg_id_to_kernel_name(prg_id)) + function_name=_prg_id_to_kernel_name(prg_id), + target=target, + ) assert isinstance(pytato_program, BoundPyOpenCLProgram) self.actx._compile_trace_callback( From 07be5602990af78628f8a4525cf7c477e899fe68 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 30 Aug 2022 22:23:45 -0500 Subject: [PATCH 02/19] move limit --- arraycontext/impl/pytato/compile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 6392fa2f..a9a84f5b 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -417,9 +417,9 @@ def _dag_to_transformed_pytato_prg(self, dict_of_named_arrays, *, prg_id=None): with ProcessLogger(logger, f"generate_loopy for '{prg_id}'"): import pyopencl as cl dev = self.actx.context.devices[0] - limit = dev.max_parameter_size target = None if dev.type & cl.device_type.GPU: + limit = dev.max_parameter_size # Leave some extra space since our sizes are estimates target = lp.PyOpenCLTarget(limit_arg_size_nbytes=limit//2) From 620ac829ad99065b73a0a49235192dbdfa380006 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 31 Aug 2022 12:18:26 -0500 Subject: [PATCH 03/19] also check for SVM presence --- arraycontext/impl/pytato/compile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index a9a84f5b..24848073 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -418,7 +418,8 @@ def _dag_to_transformed_pytato_prg(self, dict_of_named_arrays, *, prg_id=None): import pyopencl as cl dev = self.actx.context.devices[0] target = None - if dev.type & cl.device_type.GPU: + if (dev.type & cl.device_type.GPU + and cl.characterize.has_coarse_grain_buffer_svm(dev)): limit = dev.max_parameter_size # Leave some extra space since our sizes are estimates target = lp.PyOpenCLTarget(limit_arg_size_nbytes=limit//2) From 1e54ce49945684008ab48a1a7f2793fc5590a702 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 7 Sep 2022 16:05:07 -0500 Subject: [PATCH 04/19] get_target() --- arraycontext/impl/pytato/__init__.py | 15 ++++++++++++++- arraycontext/impl/pytato/compile.py | 11 +---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index afbe7ce9..65350443 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -321,6 +321,18 @@ def _to_numpy(ary): self._rec_map_container(_to_numpy, self.freeze(array)), actx=None) + def get_target(self): + import pyopencl as cl + from pytato.target.loopy import LoopyPyOpenCLTarget + dev = self.queue.device + target = None + if (dev.type & cl.device_type.GPU + and cl.characterize.has_coarse_grain_buffer_svm(dev)): + target = LoopyPyOpenCLTarget( + limit_arg_size_nbytes=dev.max_parameter_size) + + return target + def freeze(self, array): if np.isscalar(array): return array @@ -415,7 +427,8 @@ def _record_leaf_ary_in_dict( pt_prg = pt.generate_loopy(transformed_dag, options=_DEFAULT_LOOPY_OPTIONS, cl_device=self.queue.device, - function_name=function_name) + function_name=function_name, + target=self.get_target()) pt_prg = pt_prg.with_transformed_program(self.transform_loopy_program) self._freeze_prg_cache[normalized_expr] = pt_prg else: diff --git a/arraycontext/impl/pytato/compile.py b/arraycontext/impl/pytato/compile.py index 24848073..ac4c01d7 100644 --- a/arraycontext/impl/pytato/compile.py +++ b/arraycontext/impl/pytato/compile.py @@ -415,22 +415,13 @@ def _dag_to_transformed_pytato_prg(self, dict_of_named_arrays, *, prg_id=None): prg_id, "pre_generate_loopy", pt_dict_of_named_arrays) with ProcessLogger(logger, f"generate_loopy for '{prg_id}'"): - import pyopencl as cl - dev = self.actx.context.devices[0] - target = None - if (dev.type & cl.device_type.GPU - and cl.characterize.has_coarse_grain_buffer_svm(dev)): - limit = dev.max_parameter_size - # Leave some extra space since our sizes are estimates - target = lp.PyOpenCLTarget(limit_arg_size_nbytes=limit//2) - pytato_program = pt.generate_loopy( pt_dict_of_named_arrays, options=lp.Options( return_dict=True, no_numpy=True), function_name=_prg_id_to_kernel_name(prg_id), - target=target, + target=self.actx.get_target(), ) assert isinstance(pytato_program, BoundPyOpenCLProgram) From f82ba6751b101f02052c3377cc01c8996e22b08e Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 7 Sep 2022 16:27:14 -0500 Subject: [PATCH 05/19] memoize get_target --- arraycontext/impl/pytato/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 65350443..31afbcc0 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -54,6 +54,7 @@ from arraycontext.container.traversal import (rec_map_array_container, with_array_context) from arraycontext.metadata import NameHint +from pytools import memoize_method if TYPE_CHECKING: import pytato @@ -321,6 +322,7 @@ def _to_numpy(ary): self._rec_map_container(_to_numpy, self.freeze(array)), actx=None) + @memoize_method def get_target(self): import pyopencl as cl from pytato.target.loopy import LoopyPyOpenCLTarget From 11924bc6a53c6c914f417bdc865e34b4b4c2872f Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 7 Sep 2022 16:34:12 -0500 Subject: [PATCH 06/19] UNDO BEFORE MERGE: use dev branches --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a4cb4025..9f2aa044 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy -git+https://github.com/inducer/loopy.git#egg=loopy -git+https://github.com/inducer/pytato.git#egg=pytato +git+https://github.com/inducer/loopy.git@svm-args#egg=loopy +git+https://github.com/inducer/pytato.git@limit_arg_size#egg=pytato From faba326cd2142d4072c1132c523e6c82553621df Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 12 Sep 2022 15:02:26 -0500 Subject: [PATCH 07/19] Hackety hack: SVM detection in actx constructor --- arraycontext/impl/pytato/__init__.py | 36 +++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 31afbcc0..c720951e 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -233,6 +233,8 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): """ def __init__( self, queue: "cl.CommandQueue", allocator=None, *, + use_memory_pool: Optional[bool] = None, + allocator_uses_svm: Optional[bool] = None, compile_trace_callback: Optional[Callable[[Any, str, Any], None]] = None ) -> None: """ @@ -243,10 +245,39 @@ def __init__( representation. This interface should be considered unstable. """ + if allocator is not None and use_memory_pool is not None: + raise TypeError("may not specify both allocator and use_memory_pool") + + from pyopencl.characterize import has_coarse_grain_buffer_svm + has_svm = has_coarse_grain_buffer_svm(queue.device) + + self.using_svm = None + + if allocator is None: + if has_svm: + self.using_svm = True + + from pyopencl.tools import SVMAllocator + allocator = SVMAllocator(queue.context, queue=queue) + + if use_memory_pool: + from pyopencl.tools import SVMPool + allocator = SVMPool(allocator) + else: + self.using_svm = False + + from pyopencl.tools import ImmediateAllocator + allocator = ImmediateAllocator(queue.context) + + if use_memory_pool: + from pyopencl.tools import MemoryPool + allocator = MemoryPool(allocator) + import pytato as pt import pyopencl.array as cla super().__init__(compile_trace_callback=compile_trace_callback) self.queue = queue + self.allocator = allocator self.array_types = (pt.Array, cla.Array) @@ -330,8 +361,11 @@ def get_target(self): target = None if (dev.type & cl.device_type.GPU and cl.characterize.has_coarse_grain_buffer_svm(dev)): + + from loopy import PyOpenCLTarget target = LoopyPyOpenCLTarget( - limit_arg_size_nbytes=dev.max_parameter_size) + target=PyOpenCLTarget( + limit_arg_size_nbytes=dev.max_parameter_size)) return target From 61038f97575019bc95903995f778ff1283a2c014 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 12 Sep 2022 18:01:35 -0500 Subject: [PATCH 08/19] check whether passed allocator supports SVM --- arraycontext/impl/pytato/__init__.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index c720951e..a178e496 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -248,12 +248,11 @@ def __init__( if allocator is not None and use_memory_pool is not None: raise TypeError("may not specify both allocator and use_memory_pool") - from pyopencl.characterize import has_coarse_grain_buffer_svm - has_svm = has_coarse_grain_buffer_svm(queue.device) - self.using_svm = None if allocator is None: + from pyopencl.characterize import has_coarse_grain_buffer_svm + has_svm = has_coarse_grain_buffer_svm(queue.device) if has_svm: self.using_svm = True @@ -261,8 +260,8 @@ def __init__( allocator = SVMAllocator(queue.context, queue=queue) if use_memory_pool: - from pyopencl.tools import SVMPool - allocator = SVMPool(allocator) + from pyopencl.tools import SVMPool + allocator = SVMPool(allocator) else: self.using_svm = False @@ -272,6 +271,17 @@ def __init__( if use_memory_pool: from pyopencl.tools import MemoryPool allocator = MemoryPool(allocator) + else: + # Check whether the passed allocator allocates SVM + try: + from pyopencl import SVMPointer + mem = allocator(4) + if isinstance(mem, SVMPointer): + self.using_svm = True + else: + self.using_svm = False + except ImportError: + self.using_svm = False import pytato as pt import pyopencl.array as cla @@ -359,12 +369,12 @@ def get_target(self): from pytato.target.loopy import LoopyPyOpenCLTarget dev = self.queue.device target = None - if (dev.type & cl.device_type.GPU + if (self.using_svm and dev.type & cl.device_type.GPU and cl.characterize.has_coarse_grain_buffer_svm(dev)): from loopy import PyOpenCLTarget target = LoopyPyOpenCLTarget( - target=PyOpenCLTarget( + loopy_target=PyOpenCLTarget( limit_arg_size_nbytes=dev.max_parameter_size)) return target From c769de1ac6bc7aff76284008cc8d2d25427aef07 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 12 Sep 2022 18:03:00 -0500 Subject: [PATCH 09/19] undo loopy branch --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9f2aa044..27e7e857 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy -git+https://github.com/inducer/loopy.git@svm-args#egg=loopy +git+https://github.com/inducer/loopy.git#egg=loopy git+https://github.com/inducer/pytato.git@limit_arg_size#egg=pytato From 6e912a95a6a5a0106f74f68106d948ab0da631a4 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 12 Sep 2022 19:01:36 -0500 Subject: [PATCH 10/19] implement it for the base class --- arraycontext/impl/pytato/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index a178e496..69c1c324 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -204,6 +204,9 @@ def supports_nonscalar_broadcasting(self): def permits_advanced_indexing(self): return True + def get_target(self): + return None + # }}} # }}} From 2768feea8d7e4846d3c6df731d46238c8f776375 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 13 Sep 2022 08:33:54 -0500 Subject: [PATCH 11/19] subclass LoopyPyOpenCLTarget --- arraycontext/impl/pytato/__init__.py | 37 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 69c1c324..c40debd4 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -214,6 +214,29 @@ def get_target(self): # {{{ PytatoPyOpenCLArrayContext +from pytato.target.loopy import LoopyPyOpenCLTarget + + +class PytatoLoopyPyOpenCLTarget(LoopyPyOpenCLTarget): + def __init__(self, dev, using_svm) -> None: + super().__init__() + self.dev = dev + self.using_svm = using_svm + + @memoize_method + def get_loopy_target(self): + import pyopencl as cl + target = None + if (self.using_svm and self.dev.type & cl.device_type.GPU + and cl.characterize.has_coarse_grain_buffer_svm(self.dev)): + + from loopy import PyOpenCLTarget + target = PyOpenCLTarget( + limit_arg_size_nbytes=42) + + return target + + class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): """ A :class:`ArrayContext` that uses :mod:`pytato` data types to represent @@ -368,19 +391,7 @@ def _to_numpy(ary): @memoize_method def get_target(self): - import pyopencl as cl - from pytato.target.loopy import LoopyPyOpenCLTarget - dev = self.queue.device - target = None - if (self.using_svm and dev.type & cl.device_type.GPU - and cl.characterize.has_coarse_grain_buffer_svm(dev)): - - from loopy import PyOpenCLTarget - target = LoopyPyOpenCLTarget( - loopy_target=PyOpenCLTarget( - limit_arg_size_nbytes=dev.max_parameter_size)) - - return target + return PytatoLoopyPyOpenCLTarget(self.queue.device, self.using_svm) def freeze(self, array): if np.isscalar(array): From 05a75bf363d3255f43f8d09ba43119cff54973c1 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 13 Sep 2022 08:35:42 -0500 Subject: [PATCH 12/19] set actual limit --- arraycontext/impl/pytato/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index c40debd4..17ac2385 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -232,7 +232,7 @@ def get_loopy_target(self): from loopy import PyOpenCLTarget target = PyOpenCLTarget( - limit_arg_size_nbytes=42) + limit_arg_size_nbytes=self.dev.max_parameter_size) return target From 5e3bed2e54d10d6e9b371ce2cab3bc51e379111c Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 13 Sep 2022 18:02:27 -0500 Subject: [PATCH 13/19] undo pytato branch --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 27e7e857..a4cb4025 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ git+https://github.com/inducer/pyopencl.git#egg=pyopencl git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/loopy.git#egg=loopy -git+https://github.com/inducer/pytato.git@limit_arg_size#egg=pytato +git+https://github.com/inducer/pytato.git#egg=pytato From fe407cb16536ddca3ac0d3c58f1956155d13bba5 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 14 Sep 2022 10:02:42 -0500 Subject: [PATCH 14/19] remove unused argument --- arraycontext/impl/pytato/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 17ac2385..39729d23 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -260,7 +260,6 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): def __init__( self, queue: "cl.CommandQueue", allocator=None, *, use_memory_pool: Optional[bool] = None, - allocator_uses_svm: Optional[bool] = None, compile_trace_callback: Optional[Callable[[Any, str, Any], None]] = None ) -> None: """ From e14df924d47df3633424c7eb4ced736e44347324 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 14 Sep 2022 11:48:26 -0500 Subject: [PATCH 15/19] add type annotations --- arraycontext/impl/pytato/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 39729d23..e5d44eb2 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -59,6 +59,7 @@ if TYPE_CHECKING: import pytato import pyopencl as cl + import loopy as lp if getattr(sys, "_BUILDING_SPHINX_DOCS", False): import pyopencl as cl # noqa: F811 @@ -218,13 +219,13 @@ def get_target(self): class PytatoLoopyPyOpenCLTarget(LoopyPyOpenCLTarget): - def __init__(self, dev, using_svm) -> None: + def __init__(self, dev: "cl.Device", using_svm: bool) -> None: super().__init__() self.dev = dev self.using_svm = using_svm @memoize_method - def get_loopy_target(self): + def get_loopy_target(self) -> Optional["lp.PyOpenCLTarget"]: import pyopencl as cl target = None if (self.using_svm and self.dev.type & cl.device_type.GPU From 4271e2118a18e4061db4104e50f7f956893cb052 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 14 Sep 2022 11:59:42 -0500 Subject: [PATCH 16/19] add logging --- arraycontext/impl/pytato/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index e5d44eb2..d20875f2 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -65,6 +65,10 @@ import pyopencl as cl # noqa: F811 +import logging +logger = logging.getLogger(__name__) + + # {{{ tag conversion def _preprocess_array_tags(tags: ToTagSetConvertible) -> FrozenSet[Tag]: @@ -231,9 +235,11 @@ def get_loopy_target(self) -> Optional["lp.PyOpenCLTarget"]: if (self.using_svm and self.dev.type & cl.device_type.GPU and cl.characterize.has_coarse_grain_buffer_svm(self.dev)): + limit = self.dev.max_parameter_size + logger.info(f"PytatoLoopyPyOpenCLTarget: limit_arg_size_nbytes={limit}") + from loopy import PyOpenCLTarget - target = PyOpenCLTarget( - limit_arg_size_nbytes=self.dev.max_parameter_size) + target = PyOpenCLTarget(limit_arg_size_nbytes=limit) return target From bf459d6cf802cbdb7fee681bd2928d9127dcd210 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 19 Sep 2022 15:04:55 -0500 Subject: [PATCH 17/19] Refactor arg size passing to put less logic in the target --- arraycontext/impl/pytato/__init__.py | 48 +++++++++++++++++----------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index d20875f2..d3270660 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -222,26 +222,14 @@ def get_target(self): from pytato.target.loopy import LoopyPyOpenCLTarget -class PytatoLoopyPyOpenCLTarget(LoopyPyOpenCLTarget): - def __init__(self, dev: "cl.Device", using_svm: bool) -> None: +class _ArgSizeLimitingPytatoLoopyPyOpenCLTarget(LoopyPyOpenCLTarget): + def __init__(self, limit_arg_size_nbytes: int) -> None: super().__init__() - self.dev = dev - self.using_svm = using_svm + self.limit_arg_size_nbytes = limit_arg_size_nbytes @memoize_method def get_loopy_target(self) -> Optional["lp.PyOpenCLTarget"]: - import pyopencl as cl - target = None - if (self.using_svm and self.dev.type & cl.device_type.GPU - and cl.characterize.has_coarse_grain_buffer_svm(self.dev)): - - limit = self.dev.max_parameter_size - logger.info(f"PytatoLoopyPyOpenCLTarget: limit_arg_size_nbytes={limit}") - - from loopy import PyOpenCLTarget - target = PyOpenCLTarget(limit_arg_size_nbytes=limit) - - return target + return PyOpenCLTarget(limit_arg_size_nbytes=self.limit_arg_size_nbytes) class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): @@ -267,7 +255,10 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): def __init__( self, queue: "cl.CommandQueue", allocator=None, *, use_memory_pool: Optional[bool] = None, - compile_trace_callback: Optional[Callable[[Any, str, Any], None]] = None + compile_trace_callback: Optional[Callable[[Any, str, Any], None]] = None, + + # do not use: only for testing + _force_svm_arg_limit: Optional[int] = None, ) -> None: """ :arg compile_trace_callback: A function of three arguments @@ -326,6 +317,8 @@ def __init__( # unused, but necessary to keep the context alive self.context = self.queue.context + self._force_svm_arg_limit = _force_svm_arg_limit + @property def _frozen_array_types(self) -> Tuple[Type, ...]: import pyopencl.array as cla @@ -397,7 +390,26 @@ def _to_numpy(ary): @memoize_method def get_target(self): - return PytatoLoopyPyOpenCLTarget(self.queue.device, self.using_svm) + import pyopencl as cl + import pyopencl.characterize as cl_char + + dev = self.queue.device + + if ( + self._force_svm_arg_limit is not None + or ( + self.using_svm and dev.type & cl.device_type.GPU + and cl_char.has_coarse_grain_buffer_svm(dev))): + + limit = dev.max_parameter_size + if self._force_svm_arg_limit is not None: + limit = self._force_svm_arg_limit + + logger.info(f"limiting argument buffer size for {dev} to {limit} bytes") + + return _ArgSizeLimitingPytatoLoopyPyOpenCLTarget(limit) + else: + return super().get_target() def freeze(self, array): if np.isscalar(array): From 025b1cf3ab7f5aa51852f7f0bb07ef23740c37ef Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 19 Sep 2022 15:11:10 -0500 Subject: [PATCH 18/19] flake8 --- arraycontext/impl/pytato/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index d3270660..ec13738a 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -229,6 +229,7 @@ def __init__(self, limit_arg_size_nbytes: int) -> None: @memoize_method def get_loopy_target(self) -> Optional["lp.PyOpenCLTarget"]: + from loopy import PyOpenCLTarget return PyOpenCLTarget(limit_arg_size_nbytes=self.limit_arg_size_nbytes) From 29fe79310d1fad6ef3fa6757f7b55e3e92066442 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 19 Sep 2022 16:43:36 -0500 Subject: [PATCH 19/19] add a test --- test/test_pytato_arraycontext.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/test_pytato_arraycontext.py b/test/test_pytato_arraycontext.py index f4d132ca..7dc76c4c 100644 --- a/test/test_pytato_arraycontext.py +++ b/test/test_pytato_arraycontext.py @@ -100,6 +100,27 @@ def test_tags_preserved_after_freeze(actx_factory): assert foo.axes[1].tags_of_type(BazTag) +def test_arg_size_limit(actx_factory): + ran_callback = False + + def my_ctc(what, stage, ir): + if stage == "final": + assert ir.target.limit_arg_size_nbytes == 42 + nonlocal ran_callback + ran_callback = True + + def twice(x): + return 2 * x + + actx = _PytatoPyOpenCLArrayContextForTests( + actx_factory().queue, compile_trace_callback=my_ctc, _force_svm_arg_limit=42) + + f = actx.compile(twice) + f(99) + + assert ran_callback + + if __name__ == "__main__": import sys if len(sys.argv) > 1: