diff --git a/loopy/__init__.py b/loopy/__init__.py index b6008df3c..8a7d082ba 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -67,7 +67,8 @@ from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION from loopy.transform.iname import ( - set_loop_priority, prioritize_loops, untag_inames, + set_loop_priority, prioritize_loops, constrain_loop_nesting, + untag_inames, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, remove_unused_inames, split_reduction_inward, split_reduction_outward, @@ -184,7 +185,8 @@ # {{{ transforms - "set_loop_priority", "prioritize_loops", "untag_inames", + "set_loop_priority", "prioritize_loops", "constrain_loop_nesting", + "untag_inames", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "remove_unused_inames", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9b022936b..5aec9c752 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -272,6 +272,7 @@ def __init__(self, domains, instructions, args=None, iname_slab_increments=None, loop_priority=frozenset(), + loop_nest_constraints=None, silenced_warnings=None, applied_iname_rewrites=None, @@ -417,6 +418,7 @@ def __init__(self, domains, instructions, args=None, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, + loop_nest_constraints=loop_nest_constraints, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, @@ -1550,6 +1552,7 @@ def __setstate__(self, state): "substitutions", "iname_slab_increments", "loop_priority", + "loop_nest_constraints", "silenced_warnings", "options", "state", diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index c6a9ec3ac..3b572e310 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -23,7 +23,6 @@ from pytools import ImmutableRecord import sys -import islpy as isl from loopy.diagnostic import warn_with_kernel, LoopyError # noqa from pytools import MinRecursionLimit, ProcessLogger @@ -195,12 +194,11 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import ConcurrentTag, IlpBaseTag + from loopy.kernel.data import ConcurrentTag all_nonpar_inames = { iname for iname in kernel.all_inames() - if not kernel.iname_tags_of_type(iname, - (ConcurrentTag, IlpBaseTag))} + if not kernel.iname_tags_of_type(iname, ConcurrentTag)} iname_to_insns = kernel.iname_to_insns() @@ -216,40 +214,24 @@ def find_loop_nest_around_map(kernel): """Returns a dictionary mapping inames to other inames that are always nested around them. """ - result = {} - - all_inames = kernel.all_inames() - - iname_to_insns = kernel.iname_to_insns() + from collections import defaultdict + from loopy.schedule.tools import get_loop_nest_tree - # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag - for inner_iname in all_inames: - result[inner_iname] = set() - for outer_iname in all_inames: - if inner_iname == outer_iname: - continue + tree = get_loop_nest_tree(kernel) - if kernel.iname_tags_of_type(outer_iname, IlpBaseTag): - # ILP tags are special because they are parallel tags - # and therefore 'in principle' nest around everything. - # But they're realized by the scheduler as a loop - # at the innermost level, so we'll cut them some - # slack here. - continue + loop_nest_around_map = defaultdict(frozenset) - if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]: - result[inner_iname].add(outer_iname) + for node in tree.all_nodes_itr(): + if node.identifier == tree.root: + continue + iname = node.identifier + depth = tree.depth(iname) + all_ancestors = frozenset(tree.ancestor(iname, d).identifier + for d in range(1, depth)) - for dom_idx, dom in enumerate(kernel.domains): - for outer_iname in dom.get_var_names(isl.dim_type.param): - if outer_iname not in all_inames: - continue + loop_nest_around_map[iname] = all_ancestors - for inner_iname in dom.get_var_names(isl.dim_type.set): - result[inner_iname].add(outer_iname) - - return result + return loop_nest_around_map def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): @@ -804,8 +786,117 @@ def is_similar_to_template(insn): # {{{ scheduling algorithm -def generate_loop_schedules_internal( - sched_state, debug=None): +def _get_dep_equivalent_nests(tree, within1, within2): + common_ancestors = (within1 & within2) | {""} + + innermost_parent = max(common_ancestors, + key=lambda k: tree.depth(k)) + iname1, = [iname.identifier + for iname in tree.children(innermost_parent) + if iname.identifier in within1] + + iname2, = [iname.identifier + for iname in tree.children(innermost_parent) + if iname.identifier in within2] + + return iname1, iname2 + + +def generate_loop_schedules_v2(kernel): + from loopy.schedule.tools import get_loop_nest_tree + from functools import reduce + from pytools.graph import compute_topological_order + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag + + if any(insn.priority != 0 for insn in kernel.instructions): + raise NotImplementedError + + if kernel.schedule is not None: + raise NotImplementedError + + concurrent_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, ConcurrentTag)} + ilp_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, IlpBaseTag)} + vec_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, VectorizeTag)} + parallel_inames = (concurrent_inames - ilp_inames - vec_inames) + + # the first step is to figure out the loop nest trees + # I would rather get the loop nest tree first + loop_nest_tree = get_loop_nest_tree(kernel) + + # loop_inames: inames that are realized as loops. Concurrent inames aren't + # realized as a loop in the generated code for a loopy.TargetBase. + loop_inames = (reduce(frozenset.union, (insn.within_inames + for insn in kernel.instructions), + frozenset()) + - parallel_inames) + + dag = {} + dag.update({EnterLoop(iname=iname): frozenset({LeaveLoop(iname=iname)}) + for iname in loop_inames}) + dag.update({LeaveLoop(iname=iname): frozenset() + for iname in loop_inames}) + dag.update({RunInstruction(insn_id=insn.id): frozenset() + for insn in kernel.instructions}) + + for parent in loop_nest_tree.all_nodes_itr(): + outer_loop = parent.identifier + if outer_loop == "": + continue + + for child in loop_nest_tree.children(outer_loop): + inner_loop = child.identifier + dag[EnterLoop(iname=outer_loop)] |= {EnterLoop(iname=inner_loop)} + dag[LeaveLoop(iname=inner_loop)] |= {LeaveLoop(iname=outer_loop)} + + for insn in kernel.instructions: + insn_loop_inames = insn.within_inames & loop_inames + for dep_id in insn.depends_on: + dep = kernel.id_to_insn[dep_id] + dep_loop_inames = dep.within_inames & loop_inames + dag[RunInstruction(insn_id=dep_id)] |= {RunInstruction(insn_id=insn.id)} + if dep_loop_inames < insn_loop_inames: + for iname in insn_loop_inames - dep_loop_inames: + dag[RunInstruction(insn_id=dep.id)] |= {EnterLoop(iname=iname)} + elif insn_loop_inames < dep_loop_inames: + for iname in dep_loop_inames - insn_loop_inames: + dag[LeaveLoop(iname=iname)] |= {RunInstruction(insn_id=insn.id)} + elif dep_loop_inames != insn_loop_inames: + insn_iname, dep_iname = _get_dep_equivalent_nests(loop_nest_tree, + insn_loop_inames, + dep_loop_inames) + dag[LeaveLoop(iname=dep_iname)] |= {EnterLoop(iname=insn_iname)} + else: + pass + + for iname in insn_loop_inames: + dag[EnterLoop(iname=iname)] |= {RunInstruction(insn_id=insn.id)} + dag[RunInstruction(insn_id=insn.id)] |= {LeaveLoop(iname=iname)} + + def iname_key(iname): + all_ancestors = [loop_nest_tree.ancestor(iname, i).identifier + for i in range(1, loop_nest_tree.depth(iname))] + return ",".join(all_ancestors+[iname]) + + def key(x): + if isinstance(x, RunInstruction): + iname = max((kernel.id_to_insn[x.insn_id].within_inames & loop_inames), + key=lambda k: loop_nest_tree.depth(k), + default="") + result = (iname_key(iname), x.insn_id) + elif isinstance(x, (EnterLoop, LeaveLoop)): + result = (iname_key(x.iname),) + else: + raise NotImplementedError + + return result + + return compute_topological_order(dag, key=key) + + +def generate_loop_schedules_internal(sched_state, debug=None): # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. kernel = sched_state.kernel @@ -1942,171 +2033,195 @@ def generate_loop_schedules(kernel, debug_args={}): yield from generate_loop_schedules_inner(kernel, debug_args=debug_args) -def generate_loop_schedules_inner(kernel, debug_args={}): +def postprocess_schedule(kernel, gen_sched): from loopy.kernel import KernelState - if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED): - raise LoopyError("cannot schedule a kernel that has not been " - "preprocessed") + gen_sched = convert_barrier_instructions_to_barriers( + kernel, gen_sched) - from loopy.check import pre_schedule_checks - pre_schedule_checks(kernel) - - schedule_count = 0 + gsize, lsize = kernel.get_grid_size_upper_bounds() - debug = ScheduleDebugger(**debug_args) + if (gsize or lsize): + if not kernel.options.disable_global_barriers: + logger.debug("%s: barrier insertion: global" % kernel.name) + gen_sched = insert_barriers(kernel, gen_sched, + synchronization_kind="global", verify_only=True) - preschedule = kernel.schedule if kernel.state == KernelState.LINEARIZED else () + logger.debug("%s: barrier insertion: local" % kernel.name) + gen_sched = insert_barriers(kernel, gen_sched, + synchronization_kind="local", verify_only=False) + logger.debug("%s: barrier insertion: done" % kernel.name) - prescheduled_inames = { - insn.iname - for insn in preschedule - if isinstance(insn, EnterLoop)} + new_kernel = kernel.copy( + schedule=gen_sched, + state=KernelState.LINEARIZED) - prescheduled_insn_ids = { - insn_id - for item in preschedule - for insn_id in sched_item_to_insn_id(item)} - - from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - filter_iname_tags_by_type) - ilp_inames = { - name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, IlpBaseTag)} - vec_inames = { - name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, VectorizeTag)} - parallel_inames = { - name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} - - loop_nest_with_map = find_loop_nest_with_map(kernel) - loop_nest_around_map = find_loop_nest_around_map(kernel) - sched_state = SchedulerState( - kernel=kernel, - loop_nest_around_map=loop_nest_around_map, - loop_insn_dep_map=find_loop_insn_dep_map( - kernel, - loop_nest_with_map=loop_nest_with_map, - loop_nest_around_map=loop_nest_around_map), - breakable_inames=ilp_inames, - ilp_inames=ilp_inames, - vec_inames=vec_inames, - - prescheduled_inames=prescheduled_inames, - prescheduled_insn_ids=prescheduled_insn_ids, - - # time-varying part - active_inames=(), - entered_inames=frozenset(), - enclosing_subkernel_inames=(), - - schedule=(), - - unscheduled_insn_ids={insn.id for insn in kernel.instructions}, - scheduled_insn_ids=frozenset(), - within_subkernel=kernel.state != KernelState.LINEARIZED, - may_schedule_global_barriers=True, + from loopy.schedule.device_mapping import \ + map_schedule_onto_host_or_device + if kernel.state != KernelState.LINEARIZED: + # Device mapper only gets run once. + new_kernel = map_schedule_onto_host_or_device(new_kernel) - preschedule=preschedule, - insn_ids_to_try=None, + from loopy.schedule.tools import add_extra_args_to_schedule + return add_extra_args_to_schedule(new_kernel) - # ilp and vec are not parallel for the purposes of the scheduler - parallel_inames=parallel_inames - ilp_inames - vec_inames, - group_insn_counts=group_insn_counts(kernel), - active_group_counts={}, - - insns_in_topologically_sorted_order=( - get_insns_in_topologically_sorted_order(kernel)), - ) - - schedule_gen_kwargs = {} - - def print_longest_dead_end(): - if debug.interactive: - print("Loopy will now show you the scheduler state at the point") - print("where the longest (dead-end) schedule was generated, in the") - print("the hope that some of this makes sense and helps you find") - print("the issue.") - print() - print("To disable this interactive behavior, pass") - print(" debug_args=dict(interactive=False)") - print("to generate_loop_schedules().") - print(75*"-") - input("Enter:") - print() - print() - - debug.debug_length = len(debug.longest_rejected_schedule) - while True: - try: - for _ in generate_loop_schedules_internal( - sched_state, debug=debug, **schedule_gen_kwargs): - pass - - except ScheduleDebugInput as e: - debug.debug_length = int(str(e)) - continue - - break - - try: - for gen_sched in generate_loop_schedules_internal( - sched_state, debug=debug, **schedule_gen_kwargs): - debug.stop() - - gen_sched = convert_barrier_instructions_to_barriers( - kernel, gen_sched) - - gsize, lsize = kernel.get_grid_size_upper_bounds() +def generate_loop_schedules_inner(kernel, debug_args={}): + from loopy.kernel import KernelState + if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED): + raise LoopyError("cannot schedule a kernel that has not been " + "preprocessed") - if (gsize or lsize): - if not kernel.options.disable_global_barriers: - logger.debug("%s: barrier insertion: global" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, - synchronization_kind="global", verify_only=True) + from loopy.check import pre_schedule_checks + pre_schedule_checks(kernel) - logger.debug("%s: barrier insertion: local" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, - synchronization_kind="local", verify_only=False) - logger.debug("%s: barrier insertion: done" % kernel.name) + can_v2_scheduler_handle = ( + # v2-scheduler cannot handle insn groups + all(len(insn.conflicts_with_groups) == 0 + for insn in kernel.instructions) + # v2-scheduler cannot handle prescheduled kernel + and (not kernel.schedule) + # v2-scheduler cannot handle instruction priorities + and all(insn.priority == 0 + for insn in kernel.instructions) + ) + + if can_v2_scheduler_handle: + gen_sched = generate_loop_schedules_v2(kernel) + yield postprocess_schedule(kernel, gen_sched) + else: + schedule_count = 0 + + debug = ScheduleDebugger(**debug_args) + + preschedule = (kernel.schedule + + if kernel.state == KernelState.LINEARIZED + + else ()) + + prescheduled_inames = { + insn.iname + for insn in preschedule + if isinstance(insn, EnterLoop)} + + prescheduled_insn_ids = { + insn_id + for item in preschedule + for insn_id in sched_item_to_insn_id(item)} + + from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, + filter_iname_tags_by_type) + ilp_inames = { + name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, IlpBaseTag)} + vec_inames = { + name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, VectorizeTag)} + parallel_inames = { + name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} + + loop_nest_with_map = find_loop_nest_with_map(kernel) + loop_nest_around_map = find_loop_nest_around_map(kernel) + sched_state = SchedulerState( + kernel=kernel, + loop_nest_around_map=loop_nest_around_map, + loop_insn_dep_map=find_loop_insn_dep_map( + kernel, + loop_nest_with_map=loop_nest_with_map, + loop_nest_around_map=loop_nest_around_map), + breakable_inames=ilp_inames, + ilp_inames=ilp_inames, + vec_inames=vec_inames, + + prescheduled_inames=prescheduled_inames, + prescheduled_insn_ids=prescheduled_insn_ids, + + # time-varying part + active_inames=(), + entered_inames=frozenset(), + enclosing_subkernel_inames=(), + + schedule=(), + + unscheduled_insn_ids={insn.id for insn in kernel.instructions}, + scheduled_insn_ids=frozenset(), + within_subkernel=kernel.state != KernelState.LINEARIZED, + may_schedule_global_barriers=True, + + preschedule=preschedule, + insn_ids_to_try=None, + + # ilp and vec are not parallel for the purposes of the scheduler + parallel_inames=parallel_inames - ilp_inames - vec_inames, + + group_insn_counts=group_insn_counts(kernel), + active_group_counts={}, + + insns_in_topologically_sorted_order=( + get_insns_in_topologically_sorted_order(kernel)), + ) + + schedule_gen_kwargs = {} + + def print_longest_dead_end(): + if debug.interactive: + print("Loopy will now show you the scheduler state at the point") + print("where the longest (dead-end) schedule was generated, in the") + print("the hope that some of this makes sense and helps you find") + print("the issue.") + print() + print("To disable this interactive behavior, pass") + print(" debug_args=dict(interactive=False)") + print("to generate_loop_schedules().") + print(75*"-") + input("Enter:") + print() + print() + + debug.debug_length = len(debug.longest_rejected_schedule) + while True: + try: + for _ in generate_loop_schedules_internal( + sched_state, debug=debug, **schedule_gen_kwargs): + pass + + except ScheduleDebugInput as e: + debug.debug_length = int(str(e)) + continue - new_kernel = kernel.copy( - schedule=gen_sched, - state=KernelState.LINEARIZED) + break - from loopy.schedule.device_mapping import \ - map_schedule_onto_host_or_device - if kernel.state != KernelState.LINEARIZED: - # Device mapper only gets run once. - new_kernel = map_schedule_onto_host_or_device(new_kernel) + try: + for gen_sched in generate_loop_schedules_internal( + sched_state, debug=debug, **schedule_gen_kwargs): + debug.stop() - from loopy.schedule.tools import add_extra_args_to_schedule - new_kernel = add_extra_args_to_schedule(new_kernel) - yield new_kernel + new_kernel = postprocess_schedule(kernel, gen_sched) + yield new_kernel - debug.start() + debug.start() - schedule_count += 1 + schedule_count += 1 - except KeyboardInterrupt: - print() - print(75*"-") - print("Interrupted during scheduling") - print(75*"-") - print_longest_dead_end() - raise + except KeyboardInterrupt: + print() + print(75*"-") + print("Interrupted during scheduling") + print(75*"-") + print_longest_dead_end() + raise - debug.done_scheduling() - if not schedule_count: - print(75*"-") - print("ERROR: Sorry--loopy did not find a schedule for your kernel.") - print(75*"-") - print_longest_dead_end() - raise RuntimeError("no valid schedules found") + debug.done_scheduling() + if not schedule_count: + print(75*"-") + print("ERROR: Sorry--loopy did not find a schedule for your kernel.") + print(75*"-") + print_longest_dead_end() + raise RuntimeError("no valid schedules found") logger.info("%s: schedule done" % kernel.name) diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index afcdfb07b..29b92dcfa 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -21,6 +21,10 @@ """ from loopy.kernel.data import AddressSpace +from loopy.diagnostic import LoopyError +from treelib import Tree +from collections import defaultdict +from functools import reduce # {{{ block boundary finder @@ -104,3 +108,295 @@ def add_extra_args_to_schedule(kernel): return kernel.copy(schedule=new_schedule) # }}} + + +class _not_seen: # noqa: N801 + pass + + +def pull_out_loop_nest(tree, loop_nests, inames_to_pull_out): + """ + Updates *tree* to make *inames_to_pull_out* a loop nesting level in + *loop_nests* + + :returns: a :class:`tuple` ``(outer_loop_nest, inner_loop_nest)``, where + outer_loop_nest is the identifier for the new outer and inner loop + nests so that *inames_to_pull_out* is a valid nesting. + """ + assert all(isinstance(loop_nest, frozenset) for loop_nest in loop_nests) + assert inames_to_pull_out <= reduce(frozenset.union, loop_nests, frozenset()) + + # {{{ sanity check to ensure the loop nest *inames_to_pull_out* is possible + + loop_nests = sorted(loop_nests, key=lambda nest: tree.depth(nest)) + + for outer, inner in zip(loop_nests[:-1], loop_nests[1:]): + if outer != tree.parent(inner).identifier: + raise LoopyError(f"Cannot schedule loop nest {inames_to_pull_out} " + f" in the nesting tree:\n{tree}") + + assert tree.depth(loop_nests[0]) == 0 + + # }}} + + innermost_loop_nest = loop_nests[-1] + new_outer_loop_nest = inames_to_pull_out - reduce(frozenset.union, + loop_nests[:-1], + frozenset()) + new_inner_loop_nest = innermost_loop_nest - inames_to_pull_out + + if new_outer_loop_nest == innermost_loop_nest: + # such a loop nesting already exists => do nothing + return new_outer_loop_nest, None + + # add the outer loop to our loop nest tree + tree.create_node(identifier=new_outer_loop_nest, + parent=tree.parent(innermost_loop_nest).identifier) + + # rename the old loop to the inner loop + tree.update_node(innermost_loop_nest, + identifier=new_inner_loop_nest, + tag=new_inner_loop_nest) + + # set the parent of inner loop to be the outer loop + tree.move_node(new_inner_loop_nest, new_outer_loop_nest) + + return new_outer_loop_nest, new_inner_loop_nest + + +def add_inner_loops(tree, outer_loop_nest, inner_loop_nest): + """ + Update *tree* to nest *inner_loop_nest* inside *outer_loop_nest*. + """ + # add the outer loop to our loop nest tree + tree.create_node(identifier=inner_loop_nest, parent=outer_loop_nest) + + +def _order_loop_nests(loop_nest_tree, + strict_priorities, + relaxed_priorities, + iname_to_tree_node_id): + """ + Returns a loop nest where all nodes in the tree are instances of + :class:`str` denoting inames. Unlike *loop_nest_tree* which corresponds to + multiple loop nesting, this routine returns a unique loop nest that is + obtained after constraining *loop_nest_tree* with the constraints enforced + by *priorities*. + """ + from pytools.graph import compute_topological_order as toposort + from warnings import warn + + loop_nests = set(iname_to_tree_node_id.values()) + + flow_requirements = {loop_nest: {iname: frozenset() + for iname in loop_nest} + for loop_nest in loop_nests} + + def _update_flow_requirements(priorities, cannot_satisfy_callback): + for priority in priorities: + for outer_iname, inner_iname in zip(priority[:-1], priority[1:]): + inner_iname_nest = iname_to_tree_node_id[inner_iname] + outer_iname_nest = iname_to_tree_node_id[outer_iname] + if inner_iname_nest == outer_iname_nest: + flow_requirements[inner_iname_nest][outer_iname] |= {inner_iname} + else: + ancestors_of_inner_iname = reduce( + frozenset.union, + (loop_nest_tree.ancestor(inner_iname_nest, k).identifier + for k in range(loop_nest_tree.depth(inner_iname_nest))), + frozenset()) + ancestors_of_outer_iname = reduce( + frozenset.union, + (loop_nest_tree.ancestor(outer_iname_nest, k).identifier + for k in range(loop_nest_tree.depth(outer_iname_nest))), + frozenset()) + if outer_iname in ancestors_of_inner_iname: + # constraint already satisfied => do nothing + pass + elif inner_iname in ancestors_of_outer_iname: + cannot_satisfy_callback("Cannot satisfy constraint that" + f" iname '{inner_iname}' must be" + f" nested within '{outer_iname}''.") + else: + # inner iname and outer iname are indirect family members + # => must be realized via dependencies in the linearization + # phase + raise NotImplementedError + + def _raise_loopy_err(x): + raise LoopyError(x) + + _update_flow_requirements(strict_priorities, _raise_loopy_err) + _update_flow_requirements(relaxed_priorities, warn) + + ordered_loop_nests = {unordered_nest: toposort(flow, + key=lambda x: x) + for unordered_nest, flow in flow_requirements.items()} + + # {{{ just choose one of the possible loop nestings + + assert loop_nest_tree.root == frozenset() + + # Either all of these loop nestings would be valid or all would invalid => + # we aren't marking any schedulable kernel as unschedulable. + + new_tree = Tree() + + old_to_new_parent = {} + + new_tree.create_node(identifier="") + old_to_new_parent[loop_nest_tree.root] = "" + + # traversing 'tree' in an BFS fashion to create 'new_tree' + queue = [node.identifier + for node in loop_nest_tree.children(loop_nest_tree.root)] + + while queue: + current_nest = queue.pop(0) + + ordered_nest = ordered_loop_nests[current_nest] + new_tree.create_node(identifier=ordered_nest[0], + parent=old_to_new_parent[loop_nest_tree + .parent(current_nest) + .identifier]) + for new_parent, new_child in zip(ordered_nest[:-1], ordered_nest[1:]): + new_tree.create_node(identifier=new_child, parent=new_parent) + + old_to_new_parent[current_nest] = ordered_nest[-1] + + queue.extend([child.identifier + for child in loop_nest_tree.children(current_nest)]) + + # }}} + + return new_tree + + +def get_loop_nest_tree(kernel): + """ + Returns an instance of :class:`treelib.Tree` denoting the kernel's loop + nestings. + + Each node of the returned tree has a :class:`frozenset` of inames. + All the inames in the identifier of a parent node of a loop nest in the + tree must be nested outside all the iname in identifier of the loop nest. + + .. note:: + + This routine only takes into account the nesting dependency + constraints of :attr:`loopy.InstructionBase.within_inames` of all the + *kernel*'s instructions and the iname tags. This routine does *NOT* + include the nesting constraints imposed by the dependencies between the + instructions and the dependencies imposed by the kernel's domain tree. + """ + from islpy import dim_type + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag + + concurrent_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, ConcurrentTag)} + ilp_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, IlpBaseTag)} + vec_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, VectorizeTag)} + parallel_inames = (concurrent_inames - ilp_inames - vec_inames) + + # figuring the possible loop nestings minus the concurrent_inames as they + # are never realized as actual loops + iname_chains = {insn.within_inames - parallel_inames + for insn in kernel.instructions} + + tree = Tree() + root = frozenset() + + # mapping from iname to the innermost loop nest they are part of in *tree*. + iname_to_tree_node_id = defaultdict(lambda: _not_seen) + + tree.create_node(identifier=root) + + # if there were any loop with no inames, those have been already account + # for as the root. + iname_chains = iname_chains - {root} + + for iname_chain in iname_chains: + not_seen_inames = frozenset(iname for iname in iname_chain + if iname_to_tree_node_id[iname] is _not_seen) + seen_inames = iname_chain - not_seen_inames + + all_nests = {iname_to_tree_node_id[iname] for iname in seen_inames} + + outer_loop, inner_loop = pull_out_loop_nest(tree, + (all_nests | {frozenset()}), + seen_inames) + if not_seen_inames: + add_inner_loops(tree, outer_loop, not_seen_inames) + + # {{{ update iname to node id + + for iname in outer_loop: + iname_to_tree_node_id[iname] = outer_loop + + if inner_loop is not None: + for iname in inner_loop: + iname_to_tree_node_id[iname] = inner_loop + + for iname in not_seen_inames: + iname_to_tree_node_id[iname] = not_seen_inames + + # }}} + + # {{{ make ILP tagged inames innermost + + for iname_chain in iname_chains: + for ilp_iname in (ilp_inames & iname_chains): + # pull out other loops so that ilp_iname is the innermost + all_nests = {iname_to_tree_node_id[iname] for iname in seen_inames} + outer_loop, inner_loop = pull_out_loop_nest(tree, + (all_nests | {frozenset()}), + iname_chain - {ilp_iname}) + + for iname in outer_loop: + iname_to_tree_node_id[iname] = outer_loop + + if inner_loop is not None: + for iname in inner_loop: + iname_to_tree_node_id[iname] = inner_loop + + # }}} + + strict_loop_priorities = frozenset() + + # {{{ impose constraints by the domain tree + + loop_inames = kernel.all_inames() - parallel_inames + + for dom_idx, dom in enumerate(kernel.domains): + for outer_iname in set(dom.get_var_names(dim_type.param)): + if outer_iname not in loop_inames: + continue + + for inner_iname in dom.get_var_names(dim_type.set): + if inner_iname not in loop_inames: + continue + + # either outer_iname and inner_iname should belong to the same + # loop nest level or outer should be strictly outside inner + # iname + inner_iname_nest = iname_to_tree_node_id[inner_iname] + outer_iname_nest = iname_to_tree_node_id[outer_iname] + + if inner_iname_nest == outer_iname_nest: + strict_loop_priorities |= {(outer_iname, inner_iname)} + else: + ancestors_of_inner_iname = { + tree.ancestor(inner_iname_nest, k).identifier + for k in range(tree.depth(inner_iname_nest))} + if outer_iname_nest not in ancestors_of_inner_iname: + raise LoopyError(f"Loop '{outer_iname}' cannot be nested" + f" outside '{inner_iname}'.") + + # }}} + + return _order_loop_nests(tree, + strict_loop_priorities, + kernel.loop_priority, + iname_to_tree_node_id) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 32c56a5a3..4bee180c3 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -28,6 +28,7 @@ RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError +from pytools import Record __doc__ = """ @@ -112,6 +113,675 @@ def prioritize_loops(kernel, loop_priority): # }}} +# {{{ Handle loop nest constraints + +# {{{ Classes to house loop nest constraints + +# {{{ UnexpandedInameSet + +class UnexpandedInameSet(Record): + def __init__(self, inames, complement=False): + Record.__init__( + self, + inames=inames, + complement=complement, + ) + + def contains(self, inames): + if isinstance(inames, set): + return (not (inames & self.inames) if self.complement + else inames.issubset(self.inames)) + else: + return (inames not in self.inames if self.complement + else inames in self.inames) + + def get_inames_represented(self, iname_universe=None): + """Return the set of inames represented by the UnexpandedInameSet + """ + if self.complement: + if not iname_universe: + raise ValueError( + "Cannot expand UnexpandedInameSet %s without " + "iname_universe." % (self)) + return iname_universe-self.inames + else: + return self.inames.copy() + + def __lt__(self, other): + # FIXME is this function really necessary? If so, what should it return? + return self.__hash__() < other.__hash__() + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.inames) + key_builder.rec(key_hash, self.complement) + + def __str__(self): + return "%s{%s}" % ("~" if self.complement else "", + ",".join(i for i in sorted(self.inames))) + +# }}} + + +# {{{ LoopNestConstraints + +class LoopNestConstraints(Record): + def __init__(self, must_nest=None, must_not_nest=None, + must_nest_graph=None): + Record.__init__( + self, + must_nest=must_nest, + must_not_nest=must_not_nest, + must_nest_graph=must_nest_graph, + ) + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.must_nest) + key_builder.rec(key_hash, self.must_not_nest) + key_builder.rec(key_hash, self.must_nest_graph) + + def __str__(self): + return "LoopNestConstraints(\n" \ + " must_nest = " + str(self.must_nest) + "\n" \ + " must_not_nest = " + str(self.must_not_nest) + "\n" \ + " must_nest_graph = " + str(self.must_nest_graph) + "\n" \ + ")" + +# }}} + +# }}} + + +# {{{ Initial loop nest constraint creation + +# {{{ process_loop_nest_specification + +def process_loop_nest_specification( + nesting, + max_tuple_size=None, + complement_sets_allowed=True, + ): + + # Ensure that user-supplied nesting conforms to syntax rules, and + # convert string representations of nestings to tuple of UnexpandedInameSets + + import re + + def _raise_loop_nest_input_error(msg): + valid_prio_rules = ( + "Valid `must_nest` description formats: " # noqa + "\"iname, iname, ...\" or (str, str, str, ...), " # noqa + "where str can be of form " # noqa + "\"iname\" or \"{iname, iname, ...}\". " # noqa + "No set complements allowed.\n" # noqa + "Valid `must_not_nest` description tuples must have length 2: " # noqa + "\"iname, iname\", \"iname, ~iname\", or " # noqa + "(str, str), where str can be of form " # noqa + "\"iname\", \"~iname\", \"{iname, iname, ...}\", or " # noqa + "\"~{iname, iname, ...}\"." # noqa + ) + raise ValueError( + "Invalid loop nest prioritization: %s\n" + "Loop nest prioritization formatting rules:\n%s" + % (msg, valid_prio_rules)) + + def _error_on_regex_match(match_str, target_str): + if re.findall(match_str, target_str): + _raise_loop_nest_input_error( + "Unrecognized character(s) %s in nest string %s" + % (re.findall(match_str, target_str), target_str)) + + def _process_iname_set_str(iname_set_str): + # Convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet + + # Remove leading/trailing whitespace + iname_set_str_stripped = iname_set_str.strip() + + if not iname_set_str_stripped: + _raise_loop_nest_input_error( + "Found 0 inames in string %s." + % (iname_set_str)) + + # Process complement sets + if iname_set_str_stripped[0] == "~": + # Make sure compelement is allowed + if not complement_sets_allowed: + _raise_loop_nest_input_error( + "Complement (~) not allowed in this loop nest string %s. " + "If you have a use-case where allowing a currently " + "disallowed set complement would be helpful, and the " + "desired nesting constraint cannot easily be expressed " + "another way, " + "please contact the Loo.py maintainers." + % (iname_set_str)) + + # Remove tilde + iname_set_str_stripped = iname_set_str_stripped[1:] + if "~" in iname_set_str_stripped: + _raise_loop_nest_input_error( + "Multiple complement symbols found in iname set string %s" + % (iname_set_str)) + + # Make sure that braces are included if multiple inames present + if "," in iname_set_str_stripped and not ( + iname_set_str_stripped.startswith("{") and + iname_set_str_stripped.endswith("}")): + _raise_loop_nest_input_error( + "Complements of sets containing multiple inames must " + "enclose inames in braces: %s is not valid." + % (iname_set_str)) + + complement = True + else: + complement = False + + # Remove leading/trailing spaces + iname_set_str_stripped = iname_set_str_stripped.strip(" ") + + # Make sure braces are valid and strip them + if iname_set_str_stripped[0] == "{": + if not iname_set_str_stripped[-1] == "}": + _raise_loop_nest_input_error( + "Invalid braces: %s" % (iname_set_str)) + else: + # Remove enclosing braces + iname_set_str_stripped = iname_set_str_stripped[1:-1] + # (If there are dangling braces around, they will be caught next) + + # Remove any more spaces + iname_set_str_stripped = iname_set_str_stripped.strip() + + # Should be no remaining special characters besides comma and space + _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped) + + # Split by commas or spaces to get inames + inames = re.findall(r"([\w]+)(?:[ |,]*|$)", iname_set_str_stripped) + + # Make sure iname count matches what we expect from comma count + if len(inames) != iname_set_str_stripped.count(",") + 1: + _raise_loop_nest_input_error( + "Found %d inames but expected %d in string %s." + % (len(inames), iname_set_str_stripped.count(",") + 1, + iname_set_str)) + + if len(inames) == 0: + _raise_loop_nest_input_error( + "Found empty set in string %s." + % (iname_set_str)) + + # NOTE this won't catch certain cases of bad syntax, e.g., ("{h i j,,}", "k") + + return UnexpandedInameSet( + set([s.strip() for s in iname_set_str_stripped.split(",")]), + complement=complement) + + if isinstance(nesting, str): + # Enforce that constraints involving iname sets be passed as tuple. + # Iname sets defined negatively with a *single* iname are allowed here. + + # Check for any special characters besides comma, space, and tilde. + # E.g., curly braces would indicate that an iname set was NOT + # passed as a tuple, which is not allowed. + _error_on_regex_match(r"([^,\w~ ])", nesting) + + # Split by comma and process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting.split(",")) + else: + assert isinstance(nesting, (tuple, list)) + # Process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting) + + # Check max_tuple_size + if max_tuple_size and len(nesting_as_tuple) > max_tuple_size: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s exceeds max tuple size %d." + % (nesting_as_tuple)) + + # Make sure nesting has len > 1 + if len(nesting_as_tuple) <= 1: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s must have length > 1." + % (nesting_as_tuple)) + + # Return tuple of UnexpandedInameSets + return nesting_as_tuple + +# }}} + + +# {{{ constrain_loop_nesting + +def constrain_loop_nesting( + kernel, must_nest=None, must_not_nest=None): + r"""Add the provided constraints to the kernel. + + :arg must_nest: A tuple or comma-separated string representing + an ordering of loop nesting tiers that must appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + + :arg must_not_nest: A two-tuple or comma-separated string representing + an ordering of loop nesting tiers that must not appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + """ + + # {{{ Get any current constraints, if they exist + if kernel.loop_nest_constraints: + if kernel.loop_nest_constraints.must_nest: + must_nest_constraints_old = kernel.loop_nest_constraints.must_nest + else: + must_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_not_nest: + must_not_nest_constraints_old = \ + kernel.loop_nest_constraints.must_not_nest + else: + must_not_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_nest_graph: + must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph + else: + must_nest_graph_old = {} + else: + must_nest_constraints_old = set() + must_not_nest_constraints_old = set() + must_nest_graph_old = {} + + # }}} + + # {{{ Process must_nest + + if must_nest: + # {{{ Parse must_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_nest (no complements allowed) + must_nest_tuple = process_loop_nest_specification( + must_nest, complement_sets_allowed=False) + # }}} + + # {{{ Error if someone prioritizes concurrent iname + + from loopy.kernel.data import ConcurrentTag + for iname_set in must_nest_tuple: + for iname in iname_set.inames: + if kernel.iname_tags_of_type(iname, ConcurrentTag): + raise ValueError( + "iname %s tagged with ConcurrentTag, " + "cannot use iname in must-nest constraint %s." + % (iname, must_nest_tuple)) + + # }}} + + # {{{ Update must_nest graph (and check for cycles) + + must_nest_graph_new = update_must_nest_graph( + must_nest_graph_old, must_nest_tuple, kernel.all_inames()) + + # }}} + + # {{{ Make sure must_nest constraints don't violate must_not_nest + # (this may not catch all problems) + check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints_old, must_nest_graph_new) + # }}} + + # {{{ Check for conflicts with inames tagged 'vec' (must be innermost) + + from loopy.kernel.data import VectorizeTag + for iname in kernel.all_inames(): + if kernel.iname_tags_of_type(iname, VectorizeTag) and ( + must_nest_graph_new.get(iname, set())): + # Must-nest graph doesn't allow iname to be a leaf, error + raise ValueError( + "Iname %s tagged as 'vec', but loop nest constraints " + "%s require that iname %s nest outside of inames %s. " + "Vectorized inames must nest innermost; cannot " + "impose loop nest specification." + % (iname, must_nest, iname, + must_nest_graph_new.get(iname, set()))) + + # }}} + + # {{{ Add new must_nest constraints to existing must_nest constraints + must_nest_constraints_new = must_nest_constraints_old | set( + [must_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_nest constraints, just keep the old ones + + must_nest_constraints_new = must_nest_constraints_old + must_nest_graph_new = must_nest_graph_old + + # }}} + + # }}} + + # {{{ Process must_not_nest + + if must_not_nest: + # {{{ Parse must_not_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_not_nest; complements allowed; max_tuple_size=2 + + must_not_nest_tuple = process_loop_nest_specification( + must_not_nest, max_tuple_size=2) + + # }}} + + # {{{ Make sure must_not_nest constraints don't violate must_nest + + # (cycles are allowed in must_not_nest constraints) + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph_new.items(): + must_pairs.extend(list(itertools.product([iname_before], inames_after))) + + if not check_must_not_nest(must_pairs, must_not_nest_tuple): + raise ValueError( + "constrain_loop_nesting: nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest constraints %s." + % (must_not_nest_tuple, must_nest_constraints_new)) + + # }}} + + # {{{ Add new must_not_nest constraints to exisitng must_not_nest constraints + must_not_nest_constraints_new = must_not_nest_constraints_old | set([ + must_not_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_not_nest constraints, just keep the old ones + + must_not_nest_constraints_new = must_not_nest_constraints_old + + # }}} + + # }}} + + nest_constraints = LoopNestConstraints( + must_nest=must_nest_constraints_new, + must_not_nest=must_not_nest_constraints_new, + must_nest_graph=must_nest_graph_new, + ) + + return kernel.copy(loop_nest_constraints=nest_constraints) + +# }}} + + +# {{{ update_must_nest_graph + +def update_must_nest_graph(must_nest_graph, must_nest, all_inames): + # Note: there should *not* be any complements in the must_nest tuples + + from copy import deepcopy + new_graph = deepcopy(must_nest_graph) + + # First, each iname must be a node in the graph + for missing_iname in all_inames - new_graph.keys(): + new_graph[missing_iname] = set() + + # Expand must_nest into (before, after) pairs + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames) + + # Update must_nest_graph with new pairs + for before, after in must_nest_expanded: + new_graph[before].add(after) + + # Compute transitive closure + from pytools.graph import compute_transitive_closure, contains_cycle + new_graph_closure = compute_transitive_closure(new_graph) + # Note: compute_transitive_closure now allows cycles, will not error + + # Check for inconsistent must_nest constraints by checking for cycle: + if contains_cycle(new_graph_closure): + raise ValueError( + "update_must_nest_graph: Nest constraint cycle detected. " + "must_nest constraints %s inconsistent with existing " + "must_nest constraints %s." + % (must_nest, must_nest_graph)) + + return new_graph_closure + +# }}} + + +# {{{ _expand_iname_sets_in_tuple + +def _expand_iname_sets_in_tuple( + iname_sets_tuple, + iname_universe=None, + ): + + # First convert UnexpandedInameSets to sets. + # Note that must_nest constraints cannot be negatively defined. + positively_defined_iname_sets = [ + iname_set.get_inames_represented(iname_universe) + for iname_set in iname_sets_tuple] + + # Now expand all priority tuples into (before, after) pairs using + # Cartesian product of all pairs of sets + # (Assumes prio_sets length > 1) + import itertools + loop_priority_pairs = set() + for i, before_set in enumerate(positively_defined_iname_sets[:-1]): + for after_set in positively_defined_iname_sets[i+1:]: + loop_priority_pairs.update( + list(itertools.product(before_set, after_set))) + + # Make sure no priority tuple contains an iname twice + for prio_tuple in loop_priority_pairs: + if len(set(prio_tuple)) != len(prio_tuple): + raise ValueError( + "Loop nesting %s contains cycle: %s. " + % (iname_sets_tuple, prio_tuple)) + + return loop_priority_pairs + +# }}} + +# }}} + + +# {{{ Checking constraints + +# {{{ check_must_nest + +def check_must_nest(all_loop_nests, must_nest, all_inames): + r"""Determine whether must_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_nest: A tuple of :class:`UnexpandedInameSet`\ s describing + nestings that must appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must nest constraints + are satisfied by the provided loop nesting. + """ + + # In order to make sure must_nest is satisfied, we + # need to expand all must_nest tiers + + # FIXME instead of expanding tiers into all pairs up front, + # create these pairs one at a time so that we can stop as soon as we fail + + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest) + + # must_nest_expanded contains pairs + for before, after in must_nest_expanded: + found = False + for nesting in all_loop_nests: + if before in nesting and after in nesting and ( + nesting.index(before) < nesting.index(after)): + found = True + break + if not found: + return False + return True + +# }}} + + +# {{{ check_must_not_nest + +def check_must_not_nest(all_loop_nests, must_not_nest): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_not_nest: A two-tuple of :class:`UnexpandedInameSet`\ s + describing nestings that must not appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must_not_nest constraints + are satisfied by the provided loop nesting. + """ + + # Note that must_not_nest may only contain two tiers + + for nesting in all_loop_nests: + + # Go through each pair in all_loop_nests + for i, iname_before in enumerate(nesting): + for iname_after in nesting[i+1:]: + + # Check whether it violates must not nest + if (must_not_nest[0].contains(iname_before) + and must_not_nest[1].contains(iname_after)): + # Stop as soon as we fail + return False + return True + +# }}} + + +# {{{ loop_nest_constraints_satisfied + +def loop_nest_constraints_satisfied( + all_loop_nests, + must_nest_constraints=None, + must_not_nest_constraints=None, + all_inames=None): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A set of lists of inames, each representing + the nesting order of loops. + + :arg must_nest_constraints: An iterable of tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must + appear in all_loop_nests. + + :arg must_not_nest_constraints: An iterable of two-tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must not + appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the constraints + are satisfied by the provided loop nesting. + """ + + # Check must-nest constraints + if must_nest_constraints: + for must_nest in must_nest_constraints: + if not check_must_nest( + all_loop_nests, must_nest, all_inames): + return False + + # Check must-not-nest constraints + if must_not_nest_constraints: + for must_not_nest in must_not_nest_constraints: + if not check_must_not_nest( + all_loop_nests, must_not_nest): + return False + + return True + +# }}} + + +# {{{ check_must_not_nest_against_must_nest_graph + +def check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints, must_nest_graph): + r"""Ensure none of the must_not_nest constraints are violated by + nestings represented in the must_nest_graph + + :arg must_not_nest_constraints: A set of two-tuples of + :class:`UnexpandedInameSet`\ s describing nestings that must not appear + in loop nestings. + + :arg must_nest_graph: A :class:`dict` mapping each iname to other inames + that must be nested inside it. + """ + + if must_not_nest_constraints and must_nest_graph: + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph.items(): + must_pairs.extend( + list(itertools.product([iname_before], inames_after))) + if any(not check_must_not_nest(must_pairs, must_not_nest_tuple) + for must_not_nest_tuple in must_not_nest_constraints): + raise ValueError( + "Nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest relationships (must_nest graph: %s)." + % (must_not_nest_constraints, must_nest_graph)) + +# }}} + + +# {{{ get_iname_nestings + +def get_iname_nestings(linearization): + """Return a list of iname tuples representing the deepest loop nestings + in a kernel linearization. + """ + from loopy.schedule import EnterLoop, LeaveLoop + nestings = [] + current_tiers = [] + already_exiting_loops = False + for lin_item in linearization: + if isinstance(lin_item, EnterLoop): + already_exiting_loops = False + current_tiers.append(lin_item.iname) + elif isinstance(lin_item, LeaveLoop): + if not already_exiting_loops: + nestings.append(tuple(current_tiers)) + already_exiting_loops = True + del current_tiers[-1] + return nestings + +# }}} + +# }}} + +# }}} + + # {{{ split/chunk inames # {{{ backend diff --git a/setup.py b/setup.py index 89927f28c..0002e37cf 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ def write_git_revision(package_name): "codepy>=2017.1", "colorama", "Mako", + "treelib", ], extras_require={ diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py new file mode 100644 index 000000000..6bc1e8ef4 --- /dev/null +++ b/test/test_nest_constraints.py @@ -0,0 +1,376 @@ +__copyright__ = "Copyright (C) 2021 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import loopy as lp +import numpy as np +import pyopencl as cl + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +__all__ = [ + "pytest_generate_tests", + "cl" # "cl.create_some_context" + ] + + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + + +# {{{ test_loop_constraint_string_parsing + +def test_loop_constraint_string_parsing(): + ref_knl = lp.make_kernel( + "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker