diff --git a/mathics/core/atoms/strings.py b/mathics/core/atoms/strings.py index efce92df0..5a0ea9974 100644 --- a/mathics/core/atoms/strings.py +++ b/mathics/core/atoms/strings.py @@ -9,7 +9,7 @@ import sympy from mathics.core.element import BoxElementMixin -from mathics.core.keycomparable import BASIC_ATOM_STRING_ELT_ORDER +from mathics.core.keycomparable import BASIC_ATOM_STRING_ELT_ORDER, wma_str_sort_key from mathics.core.symbols import Atom, Symbol, SymbolFalse, SymbolTrue, symbol_set from mathics.core.systemsymbols import SymbolFullForm, SymbolInputForm @@ -70,7 +70,7 @@ def element_order(self) -> tuple: """ return ( BASIC_ATOM_STRING_ELT_ORDER, - self.value, + wma_str_sort_key(self.value), 0, 1, ) diff --git a/mathics/core/expression.py b/mathics/core/expression.py index 851bccf24..58f1f744a 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -45,6 +45,7 @@ GENERAL_EXPRESSION_ELT_ORDER, GENERAL_NUMERIC_EXPRESSION_ELT_ORDER, Monomial, + wma_str_sort_key, ) from mathics.core.structure import LinkedStructure from mathics.core.symbols import ( @@ -893,8 +894,9 @@ def element_order(self) -> tuple: 3: tuple: list of Elements 4: 1: No clue... """ - exps: Dict[str, Union[float, complex]] = {} + exps: Dict[Tuple[str, str], Union[float, complex]] = {} head = self._head + if head is SymbolTimes: for element in self.elements: name = element.get_name() @@ -904,8 +906,10 @@ def element_order(self) -> tuple: assert isinstance(expr, (Expression, NumericOperators)) exp = expr.round_to_float() if var and exp is not None: + var = wma_str_sort_key(var) exps[var] = exps.get(var, 0) + exp elif name: + name = wma_str_sort_key(name) exps[name] = exps.get(name, 0) + 1 elif self.has_form("Power", 2): var = self.elements[0].get_name() @@ -917,6 +921,7 @@ def element_order(self) -> tuple: except AttributeError: exp = None if var and exp is not None: + var = wma_str_sort_key(var) exps[var] = exps.get(var, 0) + exp if exps: return ( diff --git a/mathics/core/keycomparable.py b/mathics/core/keycomparable.py index dac5c4901..ba30cd031 100644 --- a/mathics/core/keycomparable.py +++ b/mathics/core/keycomparable.py @@ -3,6 +3,8 @@ """ +from typing import Tuple + class KeyComparable: """Mathics3/WL defines a "canonical ordering" between elements, @@ -287,3 +289,38 @@ def __ne__(self, other) -> bool: BASIC_EXPRESSION_ELT_ORDER = 0x22 GENERAL_EXPRESSION_ELT_ORDER = 0x23 + + +def wma_str_sort_key(s: str) -> Tuple[str, str]: + """ + Return a Tuple providing the sort key + reproduce the order of strings and symbols + in WMA. + For example, the following is a list of sorted + strings in the WMA order: + `{Abeja, ABEJA, ave de paso, Ave de paso, Ave de Paso, AVe}` + The order criteria is: first sort case insensitive, then + for the first different character in the original string, + lower case comes before upper case. + """ + # An alternative to this implementation would be to map the + # characters in a way that + # a -> A + # A -> B + # b -> C + # B -> D + # ... + # m -> Z + # M -> a + # n -> b + # N -> c + # ... + # z -> y + # Z -> z + # so the result is again a string. Another possibility would be + # to return a wrapper class that implement this special comparison + # on the fly through the method `__lt__`. + return ( + s.lower(), + s.swapcase(), + ) diff --git a/mathics/core/symbols.py b/mathics/core/symbols.py index 0426e0e3e..afae3ace5 100644 --- a/mathics/core/symbols.py +++ b/mathics/core/symbols.py @@ -19,6 +19,7 @@ BASIC_EXPRESSION_ELT_ORDER, BASIC_NUMERIC_EXPRESSION_ELT_ORDER, Monomial, + wma_str_sort_key, ) from mathics.eval.tracing import trace_evaluate @@ -556,15 +557,17 @@ def element_order(self) -> tuple: Return a tuple value that is used in ordering elements of an expression. The tuple is ultimately compared lexicographically. """ + name = self.name + name_key = wma_str_sort_key(name) return ( ( BASIC_NUMERIC_EXPRESSION_ELT_ORDER if self.is_numeric() else BASIC_EXPRESSION_ELT_ORDER ), - Monomial({self.name: 1}), + Monomial({name_key: 1}), 0, - self.name, + name, 1, ) diff --git a/test/builtin/test_sort.py b/test/builtin/test_sort.py index 8a2f35508..fe70f0645 100644 --- a/test/builtin/test_sort.py +++ b/test/builtin/test_sort.py @@ -1,10 +1,49 @@ # -*- coding: utf-8 -*- +from test.helper import check_evaluation from mathics.core.expression import Expression from mathics.core.symbols import Symbol, SymbolPlus, SymbolTimes +def test_sort_wma(): + """Test the alphabetic order in WMA for Strings and Symbols""" + # In Python, str are ordered as tuples of + # ascii codes of the characters. So, + # + # "Abeja" <"Ave"<"aVe"<"abeja" + # + # In WMA, strings and symbols are sorted in alphabetical order, with + # lowercaps characters coming before than the corresponding upper case. + # Then, the same words are sorted in WMA as + # + # "abeja"< "Abeja"<"aVe"<"Ave" + # + # Such order is equivalent to use + # `lambda s: (s.lower(), s.swapcaps(),)` as sort key. + # + # Finally, String atoms comes before than Symbols. The following test + # reinforce this order. + str_expr = ( + '{"Ave", "aVe", "abeja", AVe, ave, aVe, "Abeja", "ABEJA", ' + '"AVe", "ave del paraíso", "Ave del paraíso", ' + '"Ave del Paraíso"} // Sort // InputForm' + ) + str_expected = ( + '{"abeja", "Abeja", "ABEJA", "aVe", "Ave", "AVe", ' + '"ave del paraíso", "Ave del paraíso", "Ave del Paraíso", ' + "ave, aVe, AVe}//InputForm" + ) + check_evaluation( + str_expr, + str_expected, + # to_string_expr=True, + # to_string_expected=True, + # hold_expected=True, + failure_message="WMA order", + ) + + def test_Expression_sameQ(): """ Test Expression.SameQ