diff --git a/doc/source/api.rst b/doc/source/api.rst index 73e13c8c7..a8327f90f 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -64,8 +64,7 @@ Modifying/Selecting Axis.rename Axis.extend Axis.insert - Axis.replace - Axis.apply + Axis.set_labels Axis.union Axis.intersection Axis.difference diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 410d88c99..f53ee9a5c 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -1,6 +1,20 @@ Change log ########## +Version 0.36 +============ + +In development. + +CORE +---- +.. include:: ./changes/version_0_36.rst.inc + +EDITOR +------ +.. include:: ./changes/editor/version_0_36.rst.inc + + Version 0.35 ============ diff --git a/doc/source/changes/version_0_36.rst.inc b/doc/source/changes/version_0_36.rst.inc new file mode 100644 index 000000000..6820fb727 --- /dev/null +++ b/doc/source/changes/version_0_36.rst.inc @@ -0,0 +1,80 @@ +.. py:currentmodule:: larray + + +Syntax changes +^^^^^^^^^^^^^^ + +* ``Axis.apply()`` and ``Axis.replace()`` are deprecated in favor of + :py:obj:`Axis.set_labels()`. + +* renamed ``Array.old_method_name()`` to :py:obj:`Array.new_method_name()` (closes :issue:`1`). + +* renamed ``old_argument_name`` argument of :py:obj:`Array.method_name()` to ``new_argument_name``. + + +Backward incompatible changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* other backward incompatible changes + + +New features +^^^^^^^^^^^^ + +* added a feature (see the :ref:`miscellaneous section ` for details). It works on :ref:`api-axis` and + :ref:`api-group` objects. + + Here is an example of the new feature: + + >>> arr = ndtest((2, 3)) + >>> arr + a\b b0 b1 b2 + a0 0 1 2 + a1 3 4 5 + + And it can also be used like this: + + >>> arr = ndtest("a=a0..a2") + >>> arr + a a0 a1 a2 + 0 1 2 + +* added another feature in the editor (closes :editor_issue:`1`). + + .. note:: + + - It works for foo bar ! + - It does not work for foo baz ! + + +.. _misc: + +Miscellaneous improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* :py:obj:`Array.set_labels()` and :py:obj:`Axis.set_labels()` (formerly + ``Axis.replace()`` and ``Axis.apply()``) now accepts slices, Groups or + selection strings as the labels to change, and callable and + "creation strings" as the new labels. This makes it easier to change + only a subset of labels or to change several labels in the same way + (closes :issue:`906`). + + >>> arr = ndtest((2, 3)) + >>> arr + a\b b0 b1 b2 + a0 0 1 2 + a1 3 4 5 + >>> arr.set_labels({'b1:': str.upper, 'a1': 'A-ONE'}) + a\b b0 B1 B2 + a0 0 1 2 + A-ONE 3 4 5 + >>> arr.set_labels('b1:', 'B1..B2') + a\b b0 B1 B2 + a0 0 1 2 + a1 3 4 5 + + +Fixes +^^^^^ + +* fixed something (closes :issue:`1`). diff --git a/larray/__init__.py b/larray/__init__.py index 1377bf777..8efb9536d 100644 --- a/larray/__init__.py +++ b/larray/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.35' +__version__ = '0.36-dev' from larray.core.axis import Axis, AxisCollection, X diff --git a/larray/core/array.py b/larray/core/array.py index a88920346..2dcc2e686 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -283,7 +283,7 @@ def concat(arrays, axis=0, dtype=None): # switch to object dtype if labels are of incompatible types, so that we do not implicitly convert numeric types to # strings (numpy should not do this in the first place but that is another story). This can happen for example when - # we want to add a "total" tick to a numeric axis (eg age). + # we want to add a "total" label to a numeric axis (eg age). combined_axis = Axis(concatenate_ndarrays(arrays_labels), name) # combine all axes (using labels from any side if any) @@ -2147,7 +2147,7 @@ def sort_values(self, key=None, axis=None, ascending=True) -> 'Array': # 1 2 0, axis='nat') # which sorts the *data* correctly, but the labels on the nat axis are not sorted # (because the __getitem__ in that case reuse the key axis as-is -- like it should). - # Both use cases have value, but I think reordering the ticks should be the default. + # Both use cases have value, but I think reordering the labels should be the default. # Now, I am unsure where to change this. Probably in IGroupMaker.__getitem__, # but then how do I get the "not reordering labels" behavior that I have now? # FWIW, using .data, I get IGroup([1, 2, 0], axis='nat'), which works. @@ -2684,19 +2684,19 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam # get list of labels for each axis (except the last one if wide=True) labels = [ensure_no_numpy_type(axis.labels) for axis in axes] - # creates vertical lines (ticks is a list of list) + # creates vertical lines (labels is a list of list) if self.ndim == 1 and wide: if dump_axes_names is True: # There is no vertical axis, so the axis name should not have - # any "tick" below it and we add an empty "tick". - ticks = [['']] + # any "label" below it and we add an empty "label". + labels = [['']] else: # There is no vertical axis but no axis name either - ticks = [[]] + labels = [[]] elif light: - ticks = light_product(*labels) + labels = light_product(*labels) else: - ticks = Product(labels) + labels = Product(labels) # computes the first line other_colnames = ensure_no_numpy_type(self.axes[-1].labels) if wide else [value_name] @@ -2706,14 +2706,14 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam if maxlines != -1 and height > maxlines: # replace middle lines of the table by '...'. # We show only the first and last edgeitems lines. - res2d.extend([list(tick) + dataline - for tick, dataline in zip(ticks[:edgeitems], ensure_no_numpy_type(data[:edgeitems]))]) + res2d.extend([list(label) + dataline + for label, dataline in zip(labels[:edgeitems], ensure_no_numpy_type(data[:edgeitems]))]) res2d.append(["..."] * (self.ndim - 1 + width)) - res2d.extend([list(tick) + dataline - for tick, dataline in zip(ticks[-edgeitems:], ensure_no_numpy_type(data[-edgeitems:]))]) + res2d.extend([list(label) + dataline + for label, dataline in zip(labels[-edgeitems:], ensure_no_numpy_type(data[-edgeitems:]))]) else: # all other lines (labels of N-1 first axes + data) - res2d.extend([list(tick) + ensure_no_numpy_type(dataline) for tick, dataline in zip(ticks, data)]) + res2d.extend([list(label) + ensure_no_numpy_type(dataline) for label, dataline in zip(labels, data)]) if na_repr != 'as_is': res2d = [[na_repr if value != value else value @@ -7513,7 +7513,6 @@ def __array__(self, dtype=None, copy=None): __array_priority__ = 100 - # TODO: this should be a thin wrapper around a method in AxisCollection def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'Array': r"""Replace the labels of one or several axes of the array. @@ -7611,13 +7610,18 @@ def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'Array' nat\sex Men F Belgian 0 1 FO 2 3 + + >>> a.set_labels({'M:F': str.lower, 'BE': 'Belgian', 'FO': 'Foreigner'}) + nat\sex m f + Belgian 0 1 + Foreigner 2 3 """ - axes = self.axes.set_labels(axis, labels, **kwargs) + new_axes = self.axes.set_labels(axis, labels, **kwargs) if inplace: - self.axes = axes + self.axes = new_axes return self else: - return Array(self.data, axes) + return Array(self.data, new_axes) def astype(self, dtype, order='K', casting='unsafe', subok=True, copy=True) -> 'Array': return Array(self.data.astype(dtype, order, casting, subok, copy), self.axes) diff --git a/larray/core/axis.py b/larray/core/axis.py index ed1c52b8f..9941982e0 100644 --- a/larray/core/axis.py +++ b/larray/core/axis.py @@ -11,8 +11,9 @@ from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCArray from larray.core.expr import ExprNode -from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_tick, _to_ticks, _to_key, _seq_summary, - _idx_seq_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups) +from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_label, _to_labels, _to_key, _seq_summary, + _idx_seq_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups, + _to_label_or_labels) from larray.util.oset import OrderedSet from larray.util.misc import (duplicates, array_lookup2, ReprString, index_by_id, renamed_to, LHDFStore, lazy_attribute, _isnoneslice, unique_list, unique_multi, Product, argsort, has_duplicates, @@ -115,15 +116,15 @@ def _retarget_warn_msg(key, real_axis, current_eval=None, future_eval=None): "Using a Group object which was used to create an aggregate to " \ "target its aggregated label is deprecated. " \ "Please use the aggregated label directly instead. " \ - "In this case, you should use {potential_tick!r} instead of " \ + "In this case, you should use {potential_label!r} instead of " \ "using {key!r}." -def _group_as_aggregated_label_msg(key, potential_tick=None): - if potential_tick is None: - potential_tick = _to_tick(key) +def _group_as_aggregated_label_msg(key, potential_label=None): + if potential_label is None: + potential_label = _to_label(key) return _GROUP_AS_AGGREGATED_LABEL_MSG_TEMPLATE.format( - potential_tick=potential_tick, + potential_label=potential_label, key=key ) @@ -304,7 +305,7 @@ def labels(self, labels): labels = np.arange(length) iswildcard = True else: - labels = _to_ticks(labels, parse_single_int=True) + labels = _to_labels(labels, parse_single_int=True) length = len(labels) iswildcard = False @@ -975,8 +976,8 @@ def _ipython_key_completions_(self) -> List[Scalar]: return list(self.labels) def __contains__(self, key) -> bool: - # TODO: ideally, _to_tick shouldn't be necessary, the __hash__ and __eq__ of Group should include this - return _to_tick(key) in self._mapping + # TODO: ideally, _to_label shouldn't be necessary, the __hash__ and __eq__ of Group should include this + return _to_label(key) in self._mapping # use the default hash. We have to specify it explicitly because we define __eq__ __hash__ = object.__hash__ @@ -1012,6 +1013,9 @@ def index(self, key) -> Union[int, np.ndarray, slice]: 3 >>> people.index(people.containing('Bruce')) array([1, 2]) + >>> a = Axis('a0..a5', 'a') + >>> a.index('a1,a3,a2..a4') + array([1, 3, 2, 3, 4]) """ mapping = self._mapping @@ -1027,18 +1031,17 @@ def index(self, key) -> Union[int, np.ndarray, slice]: # TODO: remove this as it is potentially very expensive # if key.key is an array or list and should be tried # as a last resort - potential_tick = _to_tick(key) - + potential_label = _to_label(key) # avoid matching 0 against False or 0.0, note that None has # object dtype and so always pass this test - if self._is_key_type_compatible(potential_tick): + if self._is_key_type_compatible(potential_label): try: - res_idx = mapping[potential_tick] - if potential_tick != key.key: + res_idx = mapping[potential_label] + if potential_label != key.key: raise ValueError( _group_as_aggregated_label_msg( key, - potential_tick + potential_label ) ) return res_idx @@ -1050,7 +1053,7 @@ def index(self, key) -> Union[int, np.ndarray, slice]: pass if isinstance(key, str): - # try the key as-is to allow getting at ticks with special characters (",", ":", ...) + # try the key as-is to allow getting at labels with special characters (",", ":", ...) try: # avoid matching 0 against False or 0.0, note that Group keys have object dtype and so always pass this # test @@ -1246,73 +1249,91 @@ def copy(self) -> 'Axis': new_axis.__sorted_values = self.__sorted_values return new_axis - def replace(self, old, new=None) -> 'Axis': + def set_labels(self, old_or_changes, new=None) -> 'Axis': r""" - Return a new axis with some labels replaced. + Return a new axis with some labels changed. - Parameters - ---------- - old : any scalar (bool, int, str, ...), tuple/list/array of scalars, or a mapping. - the label(s) to be replaced. Old can be a mapping {old1: new1, old2: new2, ...} - new : any scalar (bool, int, str, ...) or tuple/list/array of scalars, optional - the new label(s). This is argument must not be used if old is a mapping. + It supports three distinct syntax variants: - Returns - ------- - Axis - a new Axis with the old labels replaced by new labels. - - Examples - -------- - >>> sex = Axis('sex=M,F') - >>> sex - Axis(['M', 'F'], 'sex') - >>> sex.replace('M', 'Male') - Axis(['Male', 'F'], 'sex') - >>> sex.replace({'M': 'Male', 'F': 'Female'}) - Axis(['Male', 'Female'], 'sex') - >>> sex.replace(['M', 'F'], ['Male', 'Female']) - Axis(['Male', 'Female'], 'sex') - """ - if isinstance(old, dict): - new = list(old.values()) - old = list(old.keys()) - elif np.isscalar(old): - assert new is not None and np.isscalar(new), f"{new} is not a scalar but a {type(new).__name__}" - old = [old] - new = [new] - else: - seq = (tuple, list, np.ndarray) - assert isinstance(old, seq), f"{old} is not a sequence but a {type(old).__name__}" - assert isinstance(new, seq), f"{new} is not a sequence but a {type(new).__name__}" - assert len(old) == len(new) - # using object dtype because new labels length can be larger than the fixed str length in the self.labels array - labels = self.labels.astype(object) - indices = self.index(old) - labels[indices] = new - return Axis(labels, self.name) + * Axis.set_labels(new_labels) -> replace all Axis labels by `new_labels` + * Axis.set_labels(label_selection, new_labels) -> replace selection of labels by `new_labels` + * Axis.set_labels({old1: new1, old2: new2}) -> replace each selection of labels by corresponding new labels - def apply(self, func) -> 'Axis': - r""" - Return a new axis with the labels transformed by func. + Additionally, new labels in any of the above forms can be a function which transforms the existing + labels to produce the actual new labels. Parameters ---------- - func : callable - A callable which takes a single argument and returns a single value. + old_or_changes : any scalar (bool, int, str, ...), tuple/list/array of scalars, Group, callable or mapping. + This can be either: + + * A selection of label(s) to be replaced. This can take several forms: + - a single label (e.g. 'France') + - a list of labels (e.g. ['France', 'Germany']) + - a comma-separated string of labels (e.g. 'France,Germany') + - a Group (e.g. country['France']) + * A mapping {selection1: new_labels1, selection2: new_labels2, ...} + * New labels, in which case all the axis labels will be replaced by these new labels and + the `new` argument must not be used. + new : any scalar (bool, int, str, ...) or tuple/list/array of scalars or callable, optional + The new label(s) or function to apply to old labels to get the new labels. This is argument must not be + used if `old_or_changes` contains the new labels or if it is a mapping. Returns ------- Axis - a new Axis with the transformed labels. + a new Axis with the old labels replaced by new labels. Examples -------- - >>> sex = Axis('sex=MALE,FEMALE') - >>> sex.apply(str.capitalize) - Axis(['Male', 'Female'], 'sex') - """ - return Axis(np_frompyfunc(func, 1, 1)(self.labels), self.name) + >>> country = Axis('country=be,de,fr') + >>> country + Axis(['be', 'de', 'fr'], 'country') + >>> country.set_labels('be', 'Belgium') + Axis(['Belgium', 'de', 'fr'], 'country') + >>> country.set_labels({'de': 'Germany', 'fr': 'France'}) + Axis(['be', 'Germany', 'France'], 'country') + >>> country.set_labels(['be', 'fr'], ['Belgium', 'France']) + Axis(['Belgium', 'de', 'France'], 'country') + >>> country.set_labels('be,de', 'Belgium-Germany') + Axis(['Belgium-Germany', 'Belgium-Germany', 'fr'], 'country') + >>> country.set_labels('be,de', ['Belgium', 'Germany']) + Axis(['Belgium', 'Germany', 'fr'], 'country') + >>> country.set_labels(str.upper) + Axis(['BE', 'DE', 'FR'], 'country') + """ + # FIXME: compute max(length of new keys and old labels array) instead + # XXX: it might be easier to go via list to get the label type auto-detection + # labels = self.labels.tolist() + + # using object dtype because new labels length can be larger than the fixed str length in self.labels + labels = self.labels.astype(object) + get_indices = self.index + + def apply_changes(selection, label_change): + old_indices = get_indices(selection) + if callable(label_change): + old_labels = labels[old_indices] + if isinstance(old_labels, np.ndarray): + np_func = np_frompyfunc(label_change, 1, 1) + new_labels = np_func(old_labels) + else: + new_labels = label_change(old_labels) + else: + new_labels = _to_label_or_labels(label_change) + labels[old_indices] = new_labels + + if new is None and not isinstance(old_or_changes, dict): + apply_changes(slice(None), old_or_changes) + elif new is not None: + apply_changes(old_or_changes, new) + else: + assert new is None and isinstance(old_or_changes, dict) + for old, new in old_or_changes.items(): + apply_changes(old, new) + return Axis(labels, self.name) + apply = renamed_to(set_labels, 'apply') + replace = renamed_to(set_labels, 'replace') # XXX: rename to named like Group? def rename(self, name) -> 'Axis': @@ -1321,7 +1342,7 @@ def rename(self, name) -> 'Axis': Parameters ---------- - name : str + name : str, Axis the new name for the axis. Returns @@ -1374,7 +1395,7 @@ def union(self, other) -> 'Axis': Axis(['a0', 'a1', 'a2', 'a3'], 'a') """ non_string_scalar = np.isscalar(other) and not isinstance(other, str) - other = [other] if non_string_scalar else _to_ticks(other) + other = [other] if non_string_scalar else _to_labels(other) return Axis(unique_multi((self.labels, other)), self.name) def intersection(self, other) -> 'Axis': @@ -1409,7 +1430,7 @@ def intersection(self, other) -> 'Axis': Axis(['a0', 'a1', 'a0'], 'a') """ non_string_scalar = np.isscalar(other) and not isinstance(other, str) - other = [other] if non_string_scalar else _to_ticks(other) + other = [other] if non_string_scalar else _to_labels(other) to_keep = set(other) return Axis([label for label in self.labels if label in to_keep], self.name) @@ -1443,7 +1464,7 @@ def difference(self, other) -> 'Axis': Axis(['a0'], 'a') """ non_string_scalar = np.isscalar(other) and not isinstance(other, str) - other = [other] if non_string_scalar else _to_ticks(other) + other = [other] if non_string_scalar else _to_labels(other) to_drop = set(other) return Axis([label for label in self.labels if label not in to_drop], self.name) @@ -2070,7 +2091,7 @@ def isaxis(self, value) -> bool: >>> col.isaxis('c') False """ - # this is tricky. 0 and 1 can be both axes indices and axes ticks. + # this is tricky. 0 and 1 can be both axes indices and axes labels. # not sure what's worse: # 1) disallow aggregates(axis_num): users could still use arr.sum(arr.axes[0]) # we could also provide an explicit kwarg (ie this would effectively forbid having an axis named "axis"). @@ -2079,13 +2100,13 @@ def isaxis(self, value) -> bool: return isinstance(value, Axis) or (isinstance(value, str) and value in self) # 2) slightly inconsistent API: allow aggregate over single labels if they are string, but not int # arr.sum(0) would sum on the first axis, but arr.sum('M') would - # sum a single tick. I don't like this option. - # 3) disallow single tick aggregates. Single labels make little sense in the context of an aggregate, + # sum a single label. I don't like this option. + # 3) disallow single label aggregates. Single labels make little sense in the context of an aggregate, # but you don't always know/want to differenciate the code in that case anyway. # It would be annoying for e.g. Brussels # 4) give priority to axes, # arr.sum(0) would sum on the first axis but arr.sum(5) would - # sum a single tick (assuming there is a int axis and less than six axes). + # sum a single label (assuming there is a int axis and less than six axes). # return value in self def __len__(self) -> int: @@ -2744,25 +2765,14 @@ def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'AxisCo # handle {label1: new_label1, label2: new_label2} if any(axis_ref not in self for axis_ref in changes.keys()): changes_per_axis = defaultdict(list) - for selection, new_labels in changes.items(): + for selection, label_changes in changes.items(): group = self._guess_axis(selection) axis = group.axis - changes_per_axis[axis].append((selection, new_labels)) + changes_per_axis[axis].append((group, label_changes)) changes = {axis: dict(axis_changes) for axis, axis_changes in changes_per_axis.items()} - new_axes = [] - for old_axis, axis_changes in changes.items(): - real_axis = self[old_axis] - if isinstance(axis_changes, dict): - new_axis = real_axis.replace(axis_changes) - # TODO: we should implement the non-dict behavior in Axis.replace, so that we can simplify this code to: - # new_axes = [self[old_axis].replace(axis_changes) for old_axis, axis_changes in changes.items()] - elif callable(axis_changes): - new_axis = real_axis.apply(axis_changes) - else: - new_axis = Axis(axis_changes, real_axis.name) - new_axes.append((real_axis, new_axis)) - return self.replace(new_axes, inplace=inplace) + return self.replace({old_axis: self[old_axis].set_labels(axis_changes) for old_axis, axis_changes in + changes.items()}, inplace=inplace) # TODO: deprecate method (should use __sub__ instead) def without(self, axes) -> 'AxisCollection': @@ -3745,6 +3755,7 @@ def align(self, *other, join='outer', axes=None) -> Tuple['AxisCollection']: See Also -------- Array.align + Axis.align Examples -------- diff --git a/larray/core/group.py b/larray/core/group.py index c8c39ed27..6c49d0661 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -345,9 +345,9 @@ def _seq_group_to_name(seq) -> Sequence[Any]: return seq -def _to_tick(v) -> Scalar: +def _to_label(v) -> Scalar: r""" - Convert any value to a tick (ie makes it hashable, and acceptable as an ndarray element). + Convert any value to a label (ie make it hashable, and acceptable as an ndarray element). scalar -> not modified slice -> 'start:stop' @@ -364,7 +364,7 @@ def _to_tick(v) -> Scalar: Returns ------- any scalar - scalar representing the tick + scalar representing the label """ # the fact that an "aggregated tick" is passed as a LGroup or as a string should be as irrelevant as possible. # The thing is that we cannot (currently) use the more elegant _to_tick(e.key) that means the LGroup is not @@ -374,7 +374,7 @@ def _to_tick(v) -> Scalar: if np.isscalar(v): return v elif isinstance(v, Group): - return v.name if v.name is not None else _to_tick(v.to_label()) + return v.name if v.name is not None else _to_label(v.to_label()) elif isinstance(v, slice): return _slice_to_str(v) elif isinstance(v, (tuple, list)): @@ -387,7 +387,41 @@ def _to_tick(v) -> Scalar: return str(v) -def _to_ticks(s, parse_single_int=False) -> Iterable[Scalar]: +def _to_label_or_labels(value, parse_single_int=False): + if isinstance(value, ABCAxis): + return value.labels + elif isinstance(value, Group): + # a single LGroup used for all ticks of an Axis + # XXX: unsure _to_ticks() is necessary as s.eval() should return existing labels + # In fact, calling _to_ticks is only necessary because Group keys are not + # checked enough, especially for groups without axis, or with + # AxisReference/string axes + return _to_label_or_labels(value.eval()) + elif isinstance(value, np.ndarray): + # we assume it has already been translated + # XXX: Is it a safe assumption? + return value + if isinstance(value, pd.Index): + return value.values + elif isinstance(value, (list, tuple)): + return [_to_label(v) for v in value] + elif isinstance(value, range): + return value + elif isinstance(value, str): + labels = _seq_str_to_seq(value, parse_single_int=parse_single_int) + if isinstance(labels, slice): + raise ValueError("using : to define axes is deprecated, please use .. instead") + return labels + elif hasattr(value, '__array__'): + return value.__array__() + else: + try: + return list(value) + except TypeError: + raise TypeError(f"ticks must be iterable ({type(value)} is not)") + + +def _to_labels(value, parse_single_int=False) -> Iterable[Scalar]: r""" Make a (list of) value(s) usable as the collection of labels for an Axis (ie hashable). @@ -395,7 +429,7 @@ def _to_ticks(s, parse_single_int=False) -> Iterable[Scalar]: Parameters ---------- - s : iterable + value : str, list, tuple, range, pd.Index, Axis, Group, List of values usable as the collection of labels for an Axis. Returns @@ -404,48 +438,23 @@ def _to_ticks(s, parse_single_int=False) -> Iterable[Scalar]: Examples -------- - >>> list(_to_ticks('M , F')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_labels('M , F')) # doctest: +NORMALIZE_WHITESPACE ['M', 'F'] - >>> list(_to_ticks('A,C..E,F..G,Z')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_labels('A,C..E,F..G,Z')) # doctest: +NORMALIZE_WHITESPACE ['A', 'C', 'D', 'E', 'F', 'G', 'Z'] - >>> list(_to_ticks('U')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_labels('U')) # doctest: +NORMALIZE_WHITESPACE ['U'] - >>> list(_to_ticks('..3')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_labels('..3')) # doctest: +NORMALIZE_WHITESPACE [0, 1, 2, 3] - >>> list(_to_ticks('01..12')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_labels('01..12')) # doctest: +NORMALIZE_WHITESPACE ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'] - >>> list(_to_ticks('01,02,03,10,11,12')) # doctest: +NORMALIZE_WHITESPACE + >>> list(_to_labels('01,02,03,10,11,12')) # doctest: +NORMALIZE_WHITESPACE ['01', '02', '03', '10', '11', '12'] """ - if isinstance(s, ABCAxis): - return s.labels - if isinstance(s, Group): - # a single LGroup used for all ticks of an Axis - return _to_ticks(s.eval()) - elif isinstance(s, np.ndarray): - # we assume it has already been translated - # XXX: Is it a safe assumption? - return s - - if isinstance(s, pd.Index): - ticks = s.values - elif isinstance(s, (list, tuple)): - ticks = [_to_tick(e) for e in s] - elif isinstance(s, range): - ticks = s - elif isinstance(s, str): - seq = _seq_str_to_seq(s, parse_single_int=parse_single_int) - if isinstance(seq, slice): - raise ValueError("using : to define axes is deprecated, please use .. instead") - ticks = [seq] if isinstance(seq, (str, int)) else seq - elif hasattr(s, '__array__'): - ticks = s.__array__() - else: - try: - ticks = list(s) - except TypeError: - raise TypeError(f"ticks must be iterable ({type(s)} is not)") - return np.asarray(ticks) + labels = _to_label_or_labels(value, parse_single_int=parse_single_int) + if np.isscalar(labels): + labels = [labels] + return np.asarray(labels) _axis_name_pattern = re.compile(r'\s*(([A-Za-z0-9]\w*)(\.i)?\s*\[)?(.*)') @@ -663,7 +672,7 @@ def _to_keys(value, stack_depth=1) -> Union[Key, Tuple[Key]]: def _translate_sheet_name(sheet_name) -> str: if isinstance(sheet_name, Group): - sheet_name = str(_to_tick(sheet_name)) + sheet_name = str(_to_label(sheet_name)) if isinstance(sheet_name, str): sheet_name = _sheet_name_pattern.sub('_', sheet_name) if len(sheet_name) > 31: @@ -677,7 +686,7 @@ def _translate_sheet_name(sheet_name) -> str: def _translate_group_key_hdf(key) -> str: if isinstance(key, Group): - key = _key_hdf_pattern.sub('_', str(_to_tick(key))) + key = _key_hdf_pattern.sub('_', str(_to_label(key))) return key @@ -702,7 +711,7 @@ def union(*args) -> List[Any]: ['a', 'b', 'c', 'd', 'e', 'f', 0, 1, 2] """ if args: - return unique_list(chain(*(_to_ticks(arg) for arg in args))) + return unique_list(chain(*(_to_labels(arg) for arg in args))) else: return [] @@ -762,7 +771,7 @@ def __init__(self, key, name=None, axis=None): # we do NOT assign a name automatically when missing because that makes it impossible to know whether a name # was explicitly given or not - self.name = _to_tick(name) if name is not None else name + self.name = _to_label(name) if name is not None else name assert axis is None or isinstance(axis, (str, int, ABCAxis)), f"invalid axis '{axis}' ({type(axis).__name__})" # we could check the key is valid but this can be slow and could be useless @@ -1528,7 +1537,7 @@ def __hash__(self) -> int: # is a small price to pay if the performance impact is large. # the problem with using self.translate() is that we cannot compare groups without axis # return hash(_to_tick(self.translate())) - return hash(_to_tick(self.key)) + return hash(_to_label(self.key)) def remove_nested_groups(key) -> Any: @@ -1749,7 +1758,7 @@ def eval(self) -> Union[Scalar, Sequence[Scalar]]: raise ValueError("Cannot evaluate a positional group without axis") def __hash__(self): - return hash(('IGroup', _to_tick(self.key))) + return hash(('IGroup', _to_label(self.key))) PGroup = renamed_to(IGroup, 'PGroup', raise_error=True) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 52ab48144..a9f6e08b2 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -18,7 +18,7 @@ def decode(s, encoding='utf-8', errors='strict'): def parse(s): r""" - Used to parse the "folded" axis ticks (usually periods). + Used to parse the "folded" axis labels (usually periods). """ # parameters can be strings or numbers if isinstance(s, str): diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 32bf274a9..2278201f3 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -24,7 +24,7 @@ zip_array_values, zip_array_items, nan_to_num ) from larray.core.axis import ( - _to_ticks, _to_key, _retarget_warn_msg, _group_as_aggregated_label_msg + _to_labels, _to_key, _retarget_warn_msg, _group_as_aggregated_label_msg ) from larray.util.misc import LHDFStore @@ -43,8 +43,8 @@ def test_value_string_split(): - assert_nparray_equal(_to_ticks('c0,c1'), np.asarray(['c0', 'c1'])) - assert_nparray_equal(_to_ticks('c0, c1'), np.asarray(['c0', 'c1'])) + assert_nparray_equal(_to_labels('c0,c1'), np.asarray(['c0', 'c1'])) + assert_nparray_equal(_to_labels('c0, c1'), np.asarray(['c0', 'c1'])) def test_value_string_union(): @@ -52,12 +52,12 @@ def test_value_string_union(): def test_value_string_range(): - assert_nparray_equal(_to_ticks('0..15'), np.arange(16)) - assert_nparray_equal(_to_ticks('..15'), np.arange(16)) + assert_nparray_equal(_to_labels('0..15'), np.arange(16)) + assert_nparray_equal(_to_labels('..15'), np.arange(16)) with must_raise(ValueError, "no stop bound provided in range: '10..'"): - _to_ticks('10..') + _to_labels('10..') with must_raise(ValueError, "no stop bound provided in range: '..'"): - _to_ticks('..') + _to_labels('..') # ================ # diff --git a/larray/tests/test_axis.py b/larray/tests/test_axis.py index f023486c4..5005cbe7f 100644 --- a/larray/tests/test_axis.py +++ b/larray/tests/test_axis.py @@ -811,5 +811,19 @@ def test_split(): assert b.equals(Axis(['b0', 'b1', 'b2'])) +def test_apply(): + sex = Axis('sex=MALE,FEMALE') + with must_warn(FutureWarning, msg="apply() is deprecated. Use set_labels() instead."): + res = sex.apply(str.capitalize) + assert res.equals(Axis(['Male', 'Female'], 'sex')) + + +def test_replace(): + sex = Axis('sex=M,F') + with must_warn(FutureWarning, msg="replace() is deprecated. Use set_labels() instead."): + res = sex.replace('M', 'Male') + assert res.equals(Axis(['Male', 'F'], 'sex')) + + if __name__ == "__main__": pytest.main() diff --git a/pyproject.toml b/pyproject.toml index 2603fb542..9a6186918 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ [project] name = "larray" -version = "0.35" +version = "0.36-dev" description = "N-D labeled arrays in Python" readme = { file = "README.rst", content-type = "text/x-rst" }