diff --git a/loopy/__init__.py b/loopy/__init__.py index a73f83bb9..01bee01b0 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -69,7 +69,8 @@ from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION from loopy.transform.iname import ( - set_loop_priority, prioritize_loops, untag_inames, + set_loop_priority, prioritize_loops, constrain_loop_nesting, + untag_inames, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, remove_unused_inames, split_reduction_inward, split_reduction_outward, @@ -194,7 +195,8 @@ # {{{ transforms - "set_loop_priority", "prioritize_loops", "untag_inames", + "set_loop_priority", "prioritize_loops", "constrain_loop_nesting", + "untag_inames", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "remove_unused_inames", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 2f39614b8..021712443 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -268,6 +268,7 @@ def __init__(self, domains, instructions, args=None, iname_slab_increments=None, loop_priority=frozenset(), + loop_nest_constraints=None, silenced_warnings=None, applied_iname_rewrites=None, @@ -380,6 +381,7 @@ def __init__(self, domains, instructions, args=None, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, + loop_nest_constraints=loop_nest_constraints, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, @@ -1543,6 +1545,7 @@ def __setstate__(self, state): "substitutions", "iname_slab_increments", "loop_priority", + "loop_nest_constraints", "silenced_warnings", "options", "state", diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 5822f44ed..229bd4d68 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -632,7 +632,7 @@ class SchedulerState(ImmutableRecord): """ @property - def last_entered_loop(self): + def deepest_active_iname(self): if self.active_inames: return self.active_inames[-1] else: @@ -1088,40 +1088,40 @@ def insn_sort_key(insn_id): # {{{ see if we're ready to leave the innermost loop - last_entered_loop = sched_state.last_entered_loop + deepest_active_iname = sched_state.deepest_active_iname - if last_entered_loop is not None: + if deepest_active_iname is not None: can_leave = True if ( - last_entered_loop in sched_state.prescheduled_inames + deepest_active_iname in sched_state.prescheduled_inames and not ( isinstance(next_preschedule_item, LeaveLoop) - and next_preschedule_item.iname == last_entered_loop)): + and next_preschedule_item.iname == deepest_active_iname)): # A prescheduled loop can only be left if the preschedule agrees. if debug_mode: print("cannot leave '%s' because of preschedule constraints" - % last_entered_loop) + % deepest_active_iname) can_leave = False - elif last_entered_loop not in sched_state.breakable_inames: + elif deepest_active_iname not in sched_state.breakable_inames: # If the iname is not breakable, then check that we've # scheduled all the instructions that require it. for insn_id in sched_state.unscheduled_insn_ids: insn = kernel.id_to_insn[insn_id] - if last_entered_loop in insn.within_inames: + if deepest_active_iname in insn.within_inames: if debug_mode: print("cannot leave '%s' because '%s' still depends on it" - % (last_entered_loop, format_insn(kernel, insn.id))) + % (deepest_active_iname, format_insn(kernel, insn.id))) # check if there's a dependency of insn that needs to be - # outside of last_entered_loop. + # outside of deepest_active_iname. for subdep_id in gen_dependencies_except(kernel, insn_id, sched_state.scheduled_insn_ids): want = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) if ( - last_entered_loop not in want): + deepest_active_iname not in want): print( "%(warn)swarning:%(reset_all)s '%(iname)s', " "which the schedule is " @@ -1135,7 +1135,7 @@ def insn_sort_key(insn_id): % { "warn": Fore.RED + Style.BRIGHT, "reset_all": Style.RESET_ALL, - "iname": last_entered_loop, + "iname": deepest_active_iname, "subdep": format_insn_id(kernel, subdep_id), "dep": format_insn_id(kernel, insn_id), "subdep_i": format_insn(kernel, subdep_id), @@ -1162,23 +1162,72 @@ def insn_sort_key(insn_id): if ignore_count: ignore_count -= 1 else: - assert sched_item.iname == last_entered_loop + assert sched_item.iname == deepest_active_iname if seen_an_insn: can_leave = True break + # {{{ Don't leave if doing so would violate must_nest constraints + + # Don't leave if must_nest constraints require that + # additional inames be nested inside the current iname. + # (Check for these inames in the must_nest graph.) + if can_leave: + must_nest_graph = ( + sched_state.kernel.loop_nest_constraints.must_nest_graph + if sched_state.kernel.loop_nest_constraints else None) + + if must_nest_graph: + + # Get inames that must nest inside the current iname + must_nest_inside = must_nest_graph[deepest_active_iname] + + if must_nest_inside: + # {{{ Get inames that are scheduled inside current iname + + # Iterate through already scheduled loop inames until + # we find deepest_active_iname, then create the set of + # inames that are already scheduled inside it. + already_nested_inside = set() + + # Switch to flip when we encounter deepest_active_iname + within_deepest_active_iname = False + + for sched_item in sched_state.schedule: + if isinstance(sched_item, EnterLoop): + if within_deepest_active_iname: + # Found iname nested inside deepest_active_iname + already_nested_inside.add(sched_item.iname) + elif sched_item.iname == deepest_active_iname: + # Found deepest_active_iname + within_deepest_active_iname = True + elif (isinstance(sched_item, LeaveLoop) and + sched_item.iname == deepest_active_iname): + # We're leaving deepest_active_iname, and have + # found all deeper inames + break + + # }}} + + # Don't leave if must_nest constraints require that + # additional inames be nested inside the current iname + if not must_nest_inside.issubset(already_nested_inside): + can_leave = False + + # }}} + if can_leave and not debug_mode: for sub_sched in generate_loop_schedules_internal( sched_state.copy( schedule=( sched_state.schedule - + (LeaveLoop(iname=last_entered_loop),)), + + (LeaveLoop(iname=deepest_active_iname),)), active_inames=sched_state.active_inames[:-1], insn_ids_to_try=insn_ids_to_try, preschedule=( sched_state.preschedule - if last_entered_loop + if deepest_active_iname not in sched_state.prescheduled_inames else sched_state.preschedule[1:]), ), @@ -1316,72 +1365,152 @@ def insn_sort_key(insn_id): # {{{ tier building - # Build priority tiers. If a schedule is found in the first tier, then - # loops in the second are not even tried (and so on). - loop_priority_set = set().union(*[set(prio) - for prio in - sched_state.kernel.loop_priority]) - useful_loops_set = set(iname_to_usefulness.keys()) - useful_and_desired = useful_loops_set & loop_priority_set - - if useful_and_desired: - wanted = ( - useful_and_desired - - sched_state.ilp_inames - - sched_state.vec_inames - ) - priority_tiers = [t for t in - get_priority_tiers(wanted, - sched_state.kernel.loop_priority - ) - ] + # Keys of iname_to_usefulness are now inames that get us closer to + # scheduling an insn + + if sched_state.kernel.loop_nest_constraints: + + # {{{ Use loop_nest_constraints in determining next_iname_candidates + # (ensure that candidates don't violate nest constraints) - # Update the loop priority set, because some constraints may have - # have been contradictary. - loop_priority_set = set().union(*[set(t) for t in priority_tiers]) + # Inames not yet entered that would get us closer to scheduling an insn: + useful_loops_set = set(iname_to_usefulness.keys()) - priority_tiers.append( + from loopy.transform.iname import ( + loop_nest_constraints_satisfied, + get_graph_sources, + ) + from pytools.graph import compute_induced_subgraph + + # Since vec_inames must be innermost, + # they are not valid canidates unless only vec_inames remain + if useful_loops_set - sched_state.vec_inames: + useful_loops_set -= sched_state.vec_inames + + # {{{ Remove iname candidates that would violate must_nest + + # To enter an iname without violating must_nest constraints, + # iname must be a source in the induced subgraph of must_nest_graph + # containing inames in useful_loops_set (graph has a key for every + # iname; inames without children are still sources) + complete_must_nest_graph = ( + sched_state.kernel.loop_nest_constraints.must_nest_graph + if sched_state.kernel.loop_nest_constraints else None) + if complete_must_nest_graph: + must_nest_graph_useful = compute_induced_subgraph( + complete_must_nest_graph, useful_loops_set - - loop_priority_set - - sched_state.ilp_inames - - sched_state.vec_inames ) + source_inames = get_graph_sources(must_nest_graph_useful) + else: + # No must_nest constraints were provided, all inames are + # childless sources in the non-existant must_nest graph + source_inames = useful_loops_set + + # }}} + + # {{{ Remove iname candidates that would violate must_not_nest + + must_not_nest_constraints = ( + sched_state.kernel.loop_nest_constraints.must_not_nest + if sched_state.kernel.loop_nest_constraints else None) + if must_not_nest_constraints: + next_iname_candidates = set() + for next_iname in source_inames: + iname_orders_to_check = [ + (active_iname, next_iname) + for active_iname in active_inames_set] + + if loop_nest_constraints_satisfied( + iname_orders_to_check, + must_not_nest_constraints=must_not_nest_constraints): + next_iname_candidates.add(next_iname) + else: + # No must_not_nest constraints were provided + next_iname_candidates = source_inames + + # }}} + + # }}} + else: - priority_tiers = [ - useful_loops_set + + # {{{ Old tier building with loop_priority + + # Build priority tiers. If a schedule is found in the first tier, then + # loops in the second are not even tried (and so on). + loop_priority_set = set().union(*[set(prio) + for prio in + sched_state.kernel.loop_priority]) + useful_loops_set = set(iname_to_usefulness.keys()) + useful_and_desired = useful_loops_set & loop_priority_set + + if useful_and_desired: + wanted = ( + useful_and_desired - sched_state.ilp_inames - sched_state.vec_inames - ] - - # vectorization must be the absolute innermost loop - priority_tiers.extend([ - [iname] - for iname in sched_state.ilp_inames - if iname in useful_loops_set - ]) + ) + priority_tiers = [t for t in + get_priority_tiers(wanted, + sched_state.kernel.loop_priority + ) + ] + + # Update the loop priority set, because some constraints may have + # have been contradictary. + loop_priority_set = set().union(*[set(t) for t in priority_tiers]) + + priority_tiers.append( + useful_loops_set + - loop_priority_set + - sched_state.ilp_inames + - sched_state.vec_inames + ) + else: + priority_tiers = [ + useful_loops_set + - sched_state.ilp_inames + - sched_state.vec_inames + ] + + # vectorization must be the absolute innermost loop + priority_tiers.extend([ + [iname] + for iname in sched_state.ilp_inames + if iname in useful_loops_set + ]) + + priority_tiers.extend([ + [iname] + for iname in sched_state.vec_inames + if iname in useful_loops_set + ]) - priority_tiers.extend([ - [iname] - for iname in sched_state.vec_inames - if iname in useful_loops_set - ]) + # }}} # }}} - if debug_mode: - print("useful inames: %s" % ",".join(useful_loops_set)) - else: - for tier in priority_tiers: + if sched_state.kernel.loop_nest_constraints: + # {{{ Enter inames in next_iname_candidates + # (which were curtailed by nest constraints) + + if debug_mode: + print("useful inames: %s" % ",".join(useful_loops_set)) + else: found_viable_schedule = False - for iname in sorted(tier, + # Loop over iname candidates; enter inames and recurse + + # Sort by iname to achieve deterministic ordering of generated + # schedules + for iname in sorted(next_iname_candidates, key=lambda iname: ( iname_to_usefulness.get(iname, 0), - # Sort by iname to achieve deterministic - # ordering of generated schedules. iname), reverse=True): + # Enter the loop and recurse for sub_sched in generate_loop_schedules_internal( sched_state.copy( schedule=( @@ -1395,16 +1524,61 @@ def insn_sort_key(insn_id): insn_ids_to_try=insn_ids_to_try, preschedule=( sched_state.preschedule - if iname not in sched_state.prescheduled_inames + if iname not in + sched_state.prescheduled_inames else sched_state.preschedule[1:]), ), debug=debug): + found_viable_schedule = True yield sub_sched if found_viable_schedule: return + # }}} + else: + # {{{ Old looping over tiers (ignores nest constraints) + + if debug_mode: + print("useful inames: %s" % ",".join(useful_loops_set)) + else: + for tier in priority_tiers: + found_viable_schedule = False + + for iname in sorted(tier, + key=lambda iname: ( + iname_to_usefulness.get(iname, 0), + # Sort by iname to achieve deterministic + # ordering of generated schedules. + iname), + reverse=True): + + for sub_sched in generate_loop_schedules_internal( + sched_state.copy( + schedule=( + sched_state.schedule + + (EnterLoop(iname=iname),)), + active_inames=( + sched_state.active_inames + (iname,)), + entered_inames=( + sched_state.entered_inames + | frozenset((iname,))), + insn_ids_to_try=insn_ids_to_try, + preschedule=( + sched_state.preschedule + if iname not in + sched_state.prescheduled_inames + else sched_state.preschedule[1:]), + ), + debug=debug): + found_viable_schedule = True + yield sub_sched + + if found_viable_schedule: + return + # }}} + # }}} if debug_mode: @@ -1415,10 +1589,31 @@ def insn_sort_key(insn_id): if inp: raise ScheduleDebugInputError(inp) + # {{{ Make sure ALL must_nest_constraints are satisfied + + # (The check above avoids entering loops that would contradict must_nest + # constraints, but we don't know if all required nestings are present) + must_constraints_satisfied = True + if sched_state.kernel.loop_nest_constraints: + must_nest_constraints = sched_state.kernel.loop_nest_constraints.must_nest + if must_nest_constraints: + from loopy.transform.iname import ( + get_iname_nestings, + loop_nest_constraints_satisfied, + ) + sched_tiers = get_iname_nestings(sched_state.schedule) + must_constraints_satisfied = loop_nest_constraints_satisfied( + sched_tiers, must_nest_constraints, + must_not_nest_constraints=None, # (checked upon loop creation) + ) + + # }}} + if ( not sched_state.active_inames and not sched_state.unscheduled_insn_ids - and not sched_state.preschedule): + and not sched_state.preschedule + and must_constraints_satisfied): # if done, yield result debug.log_success(sched_state.schedule) @@ -2133,7 +2328,7 @@ def print_longest_dead_end(): key_builder=LoopyKeyBuilder()) -def _get_one_scheduled_kernel_inner(kernel, callables_table): +def _get_one_scheduled_kernel_inner(kernel, callables_table, debug_args=None): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the @@ -2143,7 +2338,8 @@ def _get_one_scheduled_kernel_inner(kernel, callables_table): # # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context. - return next(iter(generate_loop_schedules(kernel, callables_table))) + return next(iter(generate_loop_schedules( + kernel, callables_table, debug_args=debug_args))) def get_one_scheduled_kernel(kernel, callables_table): @@ -2155,7 +2351,7 @@ def get_one_scheduled_kernel(kernel, callables_table): return get_one_linearized_kernel(kernel, callables_table) -def get_one_linearized_kernel(kernel, callables_table): +def get_one_linearized_kernel(kernel, callables_table, debug_args=None): from loopy import CACHING_ENABLED # must include *callables_table* within the cache key as the preschedule @@ -2176,7 +2372,7 @@ def get_one_linearized_kernel(kernel, callables_table): with ProcessLogger(logger, "%s: schedule" % kernel.name): with MinRecursionLimitForScheduling(kernel): result = _get_one_scheduled_kernel_inner(kernel, - callables_table) + callables_table, debug_args=debug_args) if CACHING_ENABLED and not from_cache: schedule_cache.store_if_not_present(sched_cache_key, result) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c3b4a42ee..bff63102d 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -28,6 +28,7 @@ RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError +from pytools import Record from loopy.translation_unit import (TranslationUnit, for_each_kernel) @@ -121,6 +122,688 @@ def prioritize_loops(kernel, loop_priority): # }}} +# {{{ Handle loop nest constraints + +# {{{ Classes to house loop nest constraints + +# {{{ UnexpandedInameSet + +class UnexpandedInameSet(Record): + def __init__(self, inames, complement=False): + Record.__init__( + self, + inames=inames, + complement=complement, + ) + + def contains(self, inames): + if isinstance(inames, set): + return (not (inames & self.inames) if self.complement + else inames.issubset(self.inames)) + else: + return (inames not in self.inames if self.complement + else inames in self.inames) + + def get_inames_represented(self, iname_universe=None): + """Return the set of inames represented by the UnexpandedInameSet + """ + if self.complement: + if not iname_universe: + raise ValueError( + "Cannot expand UnexpandedInameSet %s without " + "iname_universe." % (self)) + return iname_universe-self.inames + else: + return self.inames.copy() + + def __lt__(self, other): + # FIXME is this function really necessary? (for caching?) + # If so, what should it return? + return self.__hash__() < other.__hash__() + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.inames) + key_builder.rec(key_hash, self.complement) + + def __str__(self): + return "%s{%s}" % ("~" if self.complement else "", + ",".join(i for i in sorted(self.inames))) + +# }}} + + +# {{{ LoopNestConstraints + +class LoopNestConstraints(Record): + def __init__(self, must_nest=None, must_not_nest=None, + must_nest_graph=None): + Record.__init__( + self, + must_nest=must_nest, + must_not_nest=must_not_nest, + must_nest_graph=must_nest_graph, + ) + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.must_nest) + key_builder.rec(key_hash, self.must_not_nest) + key_builder.rec(key_hash, self.must_nest_graph) + + def __str__(self): + return "LoopNestConstraints(\n" \ + " must_nest = " + str(self.must_nest) + "\n" \ + " must_not_nest = " + str(self.must_not_nest) + "\n" \ + " must_nest_graph = " + str(self.must_nest_graph) + "\n" \ + ")" + +# }}} + +# }}} + + +# {{{ Initial loop nest constraint creation + +# {{{ process_loop_nest_specification + +def process_loop_nest_specification( + nesting, + max_tuple_size=None, + complement_sets_allowed=True, + ): + + # Ensure that user-supplied nesting conforms to syntax rules, and + # convert string representations of nestings to tuple of UnexpandedInameSets + + import re + + def _raise_loop_nest_input_error(msg): + valid_prio_rules = ( + "Valid `must_nest` description formats: " # noqa + "\"iname, iname, ...\" or (str, str, str, ...), " # noqa + "where str can be of form " # noqa + "\"iname\" or \"{iname, iname, ...}\". " # noqa + "No set complements allowed.\n" # noqa + "Valid `must_not_nest` description tuples must have length 2: " # noqa + "\"iname, iname\", \"iname, ~iname\", or " # noqa + "(str, str), where str can be of form " # noqa + "\"iname\", \"~iname\", \"{iname, iname, ...}\", or " # noqa + "\"~{iname, iname, ...}\"." # noqa + ) + raise ValueError( + "Invalid loop nest prioritization: %s\n" + "Loop nest prioritization formatting rules:\n%s" + % (msg, valid_prio_rules)) + + def _error_on_regex_match(match_str, target_str): + if re.findall(match_str, target_str): + _raise_loop_nest_input_error( + "Unrecognized character(s) %s in nest string %s" + % (re.findall(match_str, target_str), target_str)) + + def _process_iname_set_str(iname_set_str): + # Convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet + + # Remove leading/trailing whitespace + iname_set_str_stripped = iname_set_str.strip() + + if not iname_set_str_stripped: + _raise_loop_nest_input_error( + "Found 0 inames in string %s." + % (iname_set_str)) + + # Process complement sets + if iname_set_str_stripped[0] == "~": + # Make sure compelement is allowed + if not complement_sets_allowed: + _raise_loop_nest_input_error( + "Complement (~) not allowed in this loop nest string %s. " + "If you have a use-case where allowing a currently " + "disallowed set complement would be helpful, and the " + "desired nesting constraint cannot easily be expressed " + "another way, " + "please contact the Loo.py maintainers." + % (iname_set_str)) + + # Remove tilde + iname_set_str_stripped = iname_set_str_stripped[1:] + if "~" in iname_set_str_stripped: + _raise_loop_nest_input_error( + "Multiple complement symbols found in iname set string %s" + % (iname_set_str)) + + # Make sure that braces are included if multiple inames present + if "," in iname_set_str_stripped and not ( + iname_set_str_stripped.startswith("{") and + iname_set_str_stripped.endswith("}")): + _raise_loop_nest_input_error( + "Complements of sets containing multiple inames must " + "enclose inames in braces: %s is not valid." + % (iname_set_str)) + + complement = True + else: + complement = False + + # Remove leading/trailing spaces + iname_set_str_stripped = iname_set_str_stripped.strip(" ") + + # Make sure braces are valid and strip them + if iname_set_str_stripped[0] == "{": + if not iname_set_str_stripped[-1] == "}": + _raise_loop_nest_input_error( + "Invalid braces: %s" % (iname_set_str)) + else: + # Remove enclosing braces + iname_set_str_stripped = iname_set_str_stripped[1:-1] + # (If there are dangling braces around, they will be caught next) + + # Remove any more spaces + iname_set_str_stripped = iname_set_str_stripped.strip() + + # Should be no remaining special characters besides comma and space + _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped) + + # Split by commas or spaces to get inames + inames = re.findall(r"([\w]+)(?:[ |,]*|$)", iname_set_str_stripped) + + # Make sure iname count matches what we expect from comma count + if len(inames) != iname_set_str_stripped.count(",") + 1: + _raise_loop_nest_input_error( + "Found %d inames but expected %d in string %s." + % (len(inames), iname_set_str_stripped.count(",") + 1, + iname_set_str)) + + if len(inames) == 0: + _raise_loop_nest_input_error( + "Found empty set in string %s." + % (iname_set_str)) + + # NOTE this won't catch certain cases of bad syntax, e.g., ("{h i j,,}", "k") + + return UnexpandedInameSet( + set([s.strip() for s in iname_set_str_stripped.split(",")]), + complement=complement) + + if isinstance(nesting, str): + # Enforce that constraints involving iname sets be passed as tuple. + # Iname sets defined negatively with a *single* iname are allowed here. + + # Check for any special characters besides comma, space, and tilde. + # E.g., curly braces would indicate that an iname set was NOT + # passed as a tuple, which is not allowed. + _error_on_regex_match(r"([^,\w~ ])", nesting) + + # Split by comma and process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting.split(",")) + else: + assert isinstance(nesting, (tuple, list)) + # Process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting) + + # Check max_tuple_size + if max_tuple_size and len(nesting_as_tuple) > max_tuple_size: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s exceeds max tuple size %d." + % (nesting_as_tuple)) + + # Make sure nesting has len > 1 + if len(nesting_as_tuple) <= 1: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s must have length > 1." + % (nesting_as_tuple)) + + # Return tuple of UnexpandedInameSets + return nesting_as_tuple + +# }}} + + +# {{{ constrain_loop_nesting + +@for_each_kernel +def constrain_loop_nesting( + kernel, must_nest=None, must_not_nest=None): + r"""Add the provided constraints to the kernel. + + :arg must_nest: A tuple or comma-separated string representing + an ordering of loop nesting tiers that must appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + + :arg must_not_nest: A two-tuple or comma-separated string representing + an ordering of loop nesting tiers that must not appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + """ + + # {{{ Get any current constraints, if they exist + if kernel.loop_nest_constraints: + if kernel.loop_nest_constraints.must_nest: + must_nest_constraints_old = kernel.loop_nest_constraints.must_nest + else: + must_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_not_nest: + must_not_nest_constraints_old = \ + kernel.loop_nest_constraints.must_not_nest + else: + must_not_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_nest_graph: + must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph + else: + must_nest_graph_old = {} + else: + must_nest_constraints_old = set() + must_not_nest_constraints_old = set() + must_nest_graph_old = {} + + # }}} + + # {{{ Process must_nest + + if must_nest: + # {{{ Parse must_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_nest (no complements allowed) + must_nest_tuple = process_loop_nest_specification( + must_nest, complement_sets_allowed=False) + # }}} + + # {{{ Error if someone prioritizes concurrent iname + + from loopy.kernel.data import ConcurrentTag + for iname_set in must_nest_tuple: + for iname in iname_set.inames: + if kernel.iname_tags_of_type(iname, ConcurrentTag): + raise ValueError( + "iname %s tagged with ConcurrentTag, " + "cannot use iname in must-nest constraint %s." + % (iname, must_nest_tuple)) + + # }}} + + # {{{ Update must_nest graph (and check for cycles) + + must_nest_graph_new = add_to_must_nest_graph( + must_nest_graph_old, must_nest_tuple, kernel.all_inames()) + + # }}} + + # {{{ Make sure must_nest constraints don't violate must_not_nest + # (this may not catch all problems) + check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints_old, must_nest_graph_new) + # }}} + + # {{{ Check for conflicts with inames tagged 'vec' (must be innermost) + + from loopy.kernel.data import VectorizeTag + for iname in kernel.all_inames(): + if kernel.iname_tags_of_type(iname, VectorizeTag) and ( + must_nest_graph_new.get(iname, set())): + # Must-nest graph doesn't allow iname to be a leaf, error + raise ValueError( + "Iname %s tagged as 'vec', but loop nest constraints " + "%s require that iname %s nest outside of inames %s. " + "Vectorized inames must nest innermost; cannot " + "impose loop nest specification." + % (iname, must_nest, iname, + must_nest_graph_new.get(iname, set()))) + + # }}} + + # {{{ Add new must_nest constraints to existing must_nest constraints + must_nest_constraints_new = must_nest_constraints_old | set( + [must_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_nest constraints, just keep the old ones + + must_nest_constraints_new = must_nest_constraints_old + must_nest_graph_new = must_nest_graph_old + + # }}} + + # }}} + + # {{{ Process must_not_nest + + if must_not_nest: + # {{{ Parse must_not_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_not_nest; complements allowed; max_tuple_size=2 + + must_not_nest_tuple = process_loop_nest_specification( + must_not_nest, max_tuple_size=2) + + # }}} + + # {{{ Make sure must_not_nest constraints don't violate must_nest + + # (cycles are allowed in must_not_nest constraints) + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph_new.items(): + must_pairs.extend(list(itertools.product([iname_before], inames_after))) + + if not check_must_not_nest(must_pairs, must_not_nest_tuple): + raise ValueError( + "constrain_loop_nesting: nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest constraints %s." + % (must_not_nest_tuple, must_nest_constraints_new)) + + # }}} + + # {{{ Add new must_not_nest constraints to exisitng must_not_nest constraints + must_not_nest_constraints_new = must_not_nest_constraints_old | set([ + must_not_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_not_nest constraints, just keep the old ones + + must_not_nest_constraints_new = must_not_nest_constraints_old + + # }}} + + # }}} + + nest_constraints = LoopNestConstraints( + must_nest=must_nest_constraints_new, + must_not_nest=must_not_nest_constraints_new, + must_nest_graph=must_nest_graph_new, + ) + + return kernel.copy(loop_nest_constraints=nest_constraints) + +# }}} + + +# {{{ add_to_must_nest_graph + +def add_to_must_nest_graph(must_nest_graph, new_must_nest, all_inames): + # Note: there should not be any complements in the new_must_nest tuples + + from copy import deepcopy + new_graph = deepcopy(must_nest_graph) + + # First, each iname must be a node in the graph + for missing_iname in all_inames - new_graph.keys(): + new_graph[missing_iname] = set() + + # Expand new_must_nest into (before, after) pairs + must_nest_expanded = _expand_iname_sets_in_tuple(new_must_nest, all_inames) + + # Update must_nest_graph with new pairs + for before, after in must_nest_expanded: + new_graph[before].add(after) + + # Compute transitive closure + from pytools.graph import compute_transitive_closure, contains_cycle + new_graph_closure = compute_transitive_closure(new_graph) + # Note: compute_transitive_closure now allows cycles, will not error + + # Check for inconsistent must_nest constraints by checking for cycle: + if contains_cycle(new_graph_closure): + raise ValueError( + "add_to_must_nest_graph: Nest constraint cycle detected. " + "must_nest constraints %s inconsistent with existing " + "must_nest constraints %s." + % (new_must_nest, must_nest_graph)) + + return new_graph_closure + +# }}} + + +# {{{ _expand_iname_sets_in_tuple + +def _expand_iname_sets_in_tuple( + iname_sets_tuple, + iname_universe=None, + ): + + # First convert UnexpandedInameSets to sets. + # Note that must_nest constraints cannot be negatively defined. + positively_defined_iname_sets = [ + iname_set.get_inames_represented(iname_universe) + for iname_set in iname_sets_tuple] + + # Now expand all priority tuples into (before, after) pairs using + # Cartesian product of all pairs of sets + # (Assumes prio_sets length > 1, which is enforced elsewhere) + import itertools + loop_priority_pairs = set() + for i, before_set in enumerate(positively_defined_iname_sets[:-1]): + for after_set in positively_defined_iname_sets[i+1:]: + loop_priority_pairs.update( + list(itertools.product(before_set, after_set))) + + # Make sure no priority tuple contains an iname twice (cycle) + for prio_tuple in loop_priority_pairs: + if len(set(prio_tuple)) != len(prio_tuple): + raise ValueError( + "Loop nesting %s contains cycle: %s. " + % (iname_sets_tuple, prio_tuple)) + + return loop_priority_pairs + +# }}} + +# }}} + + +# {{{ Checking constraints + +# {{{ check_must_nest + +def check_must_nest(all_loop_nests, must_nest): + r"""Determine whether must_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A sequence of sequences of inames, each representing + the order of nested loops. + + :arg must_nest: A tuple of :class:`UnexpandedInameSet`\ s describing + nestings that must appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must nest constraints + are satisfied by the provided loop nesting. + """ + + # In order to make sure must_nest is satisfied, we + # need to expand all must_nest tiers + + # FIXME instead of expanding tiers into all pairs up front, + # create these pairs one at a time so that we can stop as soon as we fail + + # Get all must-nest pairs of inames + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest) + + for before, after in must_nest_expanded: + correct_pair_found = False + for nesting in all_loop_nests: + # If both before and after are found in the nesting, + # and they're ordered correctly, this must-nest pair is satisfied + if before in nesting and after in nesting and ( + nesting.index(before) < nesting.index(after)): + correct_pair_found = True + break + if not correct_pair_found: + return False + return True + +# }}} + + +# {{{ check_must_not_nest + +def check_must_not_nest(all_loop_nests, must_not_nest): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A sequence of sequences of inames, each representing + the order of nested loops. + + :arg must_not_nest: A two-tuple of :class:`UnexpandedInameSet`\ s + describing nestings that must not appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must_not_nest constraints + are satisfied by the provided loop nesting. + """ + + # Note that must_not_nest may only contain two tiers + + for nesting in all_loop_nests: + # Go through each pair in all_loop_nests and check + # whether it violates must not nest + for i, iname_before in enumerate(nesting): + for iname_after in nesting[i+1:]: + if (must_not_nest[0].contains(iname_before) + and must_not_nest[1].contains(iname_after)): + # Stop as soon as we fail + return False + return True + +# }}} + + +# {{{ loop_nest_constraints_satisfied + +def loop_nest_constraints_satisfied( + all_loop_nests, + must_nest_constraints=None, + must_not_nest_constraints=None, + ): + r"""Determine whether must_nest and must_not_nest constraints are satisfied + by all_loop_nests + + :arg all_loop_nests: A sequence of sequences of inames, each representing + the order of nested loops. + + :arg must_nest_constraints: An iterable of tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must + appear in all_loop_nests. + + :arg must_not_nest_constraints: An iterable of two-tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must not + appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the constraints + are satisfied by the provided loop nesting. + """ + + # Check must-nest constraints + if must_nest_constraints: + for must_nest in must_nest_constraints: + if not check_must_nest( + all_loop_nests, must_nest): + return False + + # Check must-not-nest constraints + if must_not_nest_constraints: + for must_not_nest in must_not_nest_constraints: + if not check_must_not_nest( + all_loop_nests, must_not_nest): + return False + + return True + +# }}} + + +# {{{ check_must_not_nest_against_must_nest_graph + +def check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints, must_nest_graph): + r"""Ensure none of the must_not_nest constraints are violated by + nestings represented in the must_nest_graph + + :arg must_not_nest_constraints: A set of two-tuples of + :class:`UnexpandedInameSet`\ s describing nestings that must not appear + in loop nestings. + + :arg must_nest_graph: A :class:`dict` mapping each iname to other inames + that must be nested inside it. + """ + + if must_not_nest_constraints and must_nest_graph: + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph.items(): + must_pairs.extend( + list(itertools.product([iname_before], inames_after))) + if any(not check_must_not_nest(must_pairs, must_not_nest_tuple) + for must_not_nest_tuple in must_not_nest_constraints): + raise ValueError( + "Nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest relationships (must_nest graph: %s)." + % (must_not_nest_constraints, must_nest_graph)) + +# }}} + + +# {{{ get_iname_nestings + +def get_iname_nestings(linearization): + """Return a list of iname tuples representing the deepest loop nestings + in a kernel linearization. + """ + from loopy.schedule import EnterLoop, LeaveLoop + nestings = [] + current_tiers = [] + already_exiting_loops = False + for lin_item in linearization: + if isinstance(lin_item, EnterLoop): + already_exiting_loops = False + current_tiers.append(lin_item.iname) + elif isinstance(lin_item, LeaveLoop): + if not already_exiting_loops: + nestings.append(tuple(current_tiers)) + already_exiting_loops = True + del current_tiers[-1] + return nestings + +# }}} + + +# {{{ get_graph_sources + +def get_graph_sources(graph): + sources = set(graph.keys()) + for non_sources in graph.values(): + sources -= non_sources + return sources + +# }}} + +# }}} + +# }}} + + # {{{ split/chunk inames # {{{ backend diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py new file mode 100644 index 000000000..70f821715 --- /dev/null +++ b/test/test_nest_constraints.py @@ -0,0 +1,653 @@ +__copyright__ = "Copyright (C) 2021 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import loopy as lp +import numpy as np +import pyopencl as cl +from loopy import preprocess_kernel, get_one_linearized_kernel + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +__all__ = [ + "pytest_generate_tests", + "cl" # "cl.create_some_context" + ] + + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + + +# {{{ Helper functions + +def _process_and_linearize(prog, knl_name="loopy_kernel"): + # Return linearization items along with the preprocessed kernel and + # linearized kernel + proc_prog = preprocess_kernel(prog) + lin_prog = get_one_linearized_kernel( + proc_prog[knl_name], proc_prog.callables_table) + return lin_prog + +# }}} + + +# {{{ test_loop_constraint_string_parsing + +def test_loop_constraint_string_parsing(): + ref_knl = lp.make_kernel( + "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker