diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index 3161f09..995c222 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -5,6 +5,24 @@ New features * added explicit support for Python 3.14. +* the editor is now associated with some file extensions on Windows, which + means that double-clicking on files with these extensions in the Windows File + Explorer will open them in the editor. The following extensions are + associated: .h5, .hdf, .feather, .parquet, .ddb and .duckdb + (closes :editor_issue:`302`). + + Known issues + ------------ + * This does not work when multiple users share a single Python environment + because the file association is done only for the user which installed the + editor. + * The reported file type is very long and weird. For example, for .h5 files, + it is ".h5 larray-editor.AssocFile.h5 file" instead of just "H5 File". This + is harmless though. + * We choose not to associate the editor with file formats we support but + which likely have a better viewer installed on the computer (.xlsx, .csv, + .dta, .sas7bdat, text files, IODE files and zip files) + Miscellaneous improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -43,6 +61,11 @@ Fixes * avoid warnings when displaying data with any column entirely non-numeric (including NaN). Closes :editor_issue:`311`. +* fixed `compare()` not working for object or string arrays (closes + :editor_issue:`163`). The background color and "maximum absolute + relative difference" label were always wrong and the comparison failed + completely when the first array was an object array containing any 0 value. + * fixed the mechanism writing warning/error messages happening during the editor initialization. The errors are now correctly written in the user TEMP directory / larray-editor-stderr.log \ No newline at end of file diff --git a/larray_editor/arrayadapter.py b/larray_editor/arrayadapter.py index b22ce53..52cc0fc 100644 --- a/larray_editor/arrayadapter.py +++ b/larray_editor/arrayadapter.py @@ -1337,11 +1337,7 @@ def get_finite_numeric_values(array: np.ndarray) -> np.ndarray: # format (or plotting?) finite_value = np.abs(finite_value) elif dtype.type is np.object_: - # change non numeric to nan - finite_value = np.where(is_number_value_vectorized(finite_value), - finite_value, - np.nan) - finite_value = finite_value.astype(np.float64) + finite_value = non_numeric_to_nan(finite_value) elif np.issubdtype(dtype, np.bool_): finite_value = finite_value.astype(np.int8) elif not np.issubdtype(dtype, np.number): @@ -1356,6 +1352,31 @@ def get_finite_numeric_values(array: np.ndarray) -> np.ndarray: return np.where(np.isfinite(finite_value), finite_value, np.nan) +def non_numeric_to_nan(array: np.ndarray) -> np.ndarray: + dtype = array.dtype + if np.issubdtype(dtype, np.number): + return array + elif dtype.type is np.object_: + array = np.where(is_number_value_vectorized(array), + array, + np.nan) + return array.astype(np.float64) + # not numeric nor object => all non-numeric + else: + return np.full(array.shape, np.nan, dtype=np.float64) + + +def ensure_numeric_array(array: la.Array) -> la.Array: + dtype = array.dtype + if np.issubdtype(dtype, np.number): + return array + else: + return la.Array( + non_numeric_to_nan(array.data), + axes=array.axes + ) + + # only used in LArray adapter. it should use the same code path as the rest # though def get_color_value(array, global_vmin, global_vmax, axis=None): diff --git a/larray_editor/comparator.py b/larray_editor/comparator.py index f668f72..6ea10de 100644 --- a/larray_editor/comparator.py +++ b/larray_editor/comparator.py @@ -6,6 +6,7 @@ from qtpy.QtWidgets import (QWidget, QVBoxLayout, QListWidget, QSplitter, QHBoxLayout, QLabel, QCheckBox, QLineEdit, QComboBox, QMessageBox) +from larray_editor.arrayadapter import ensure_numeric_array from larray_editor.utils import _, print_exception, align_arrays from larray_editor.arraywidget import ArrayEditorWidget from larray_editor.editor import AbstractEditorWindow @@ -149,6 +150,9 @@ def _update_from_arrays(self): stack_axis = self.stack_axis align_method = self.get_align_method() try: + # this also converts string arrays to object arrays because + # align_arrays computes the common dtype of the arrays *and* the + # fill_value, which is nan by default aligned_arrays = align_arrays(self.arrays, join=align_method, fill_value=self.fill_value) @@ -179,66 +183,66 @@ def _update_from_combined_array(self): isclose = eq self._diff_below_tolerance = isclose - try: - with np.errstate(divide='ignore', invalid='ignore'): - diff = self._combined_array - self._array0 - reldiff = diff / self._array0 - # make reldiff 0 where the values are the same than array0 even for - # special values (0, nan, inf, -inf) - # at this point reldiff can still contain nan and infs - reldiff = la.where(eq, 0, reldiff) - - # 1) compute maxabsreldiff for the label - # this should NOT exclude nans or infs - relmin = reldiff.min(skipna=False) - relmax = reldiff.max(skipna=False) - maxabsreldiff = max(abs(relmin), abs(relmax)) - - # 2) compute bg_value - # replace -inf by min(reldiff), +inf by max(reldiff) - reldiff_for_bg = reldiff.copy() - isneginf = reldiff == -np.inf - isposinf = reldiff == np.inf - isinf = isneginf | isposinf - - # given the way reldiff is constructed, it cannot contain only infs - # (because inf/inf is nan) it can contain only infs and nans though, - # in which case finite_relXXX will be nan, so unless the array - # is empty, finite_relXXX should never be inf - finite_relmin = np.nanmin(reldiff, where=~isinf, initial=np.inf) - finite_relmax = np.nanmax(reldiff, where=~isinf, initial=-np.inf) - # special case when reldiff contains only 0 and infs (to avoid - # coloring the inf cells white in that case) - if finite_relmin == 0 and finite_relmax == 0 and isinf.any(): - finite_relmin = -1 - finite_relmax = 1 - reldiff_for_bg[isneginf] = finite_relmin - reldiff_for_bg[isposinf] = finite_relmax - - # make sure that "acceptable" differences show as white - reldiff_for_bg = la.where(isclose, 0, reldiff_for_bg) - - # We need a separate version for bg and the label, so that when we - # modify atol/rtol, the background color is updated but not the - # maxreldiff label - maxabsreldiff_for_bg = max(abs(np.nanmin(reldiff_for_bg)), - abs(np.nanmax(reldiff_for_bg))) - if maxabsreldiff_for_bg: - # scale reldiff to range 0-1 with 0.5 for reldiff = 0 - self._bg_value = (reldiff_for_bg / maxabsreldiff_for_bg) / 2 + 0.5 - # if the only differences are nans on either side - elif not isclose.all(): - # use white (0.5) everywhere except where reldiff is nan, so - # that nans are grey - self._bg_value = reldiff_for_bg + 0.5 - else: - # do NOT use full_like as we don't want to inherit array dtype - self._bg_value = la.full(self._combined_array.axes, 0.5) - except TypeError: - # str/object array - maxabsreldiff = la.nan + # we cannot use raw numpy arrays yet because we need the arrays to + # broadcast properly to compute diff and reldiff + combined_array = ensure_numeric_array(self._combined_array) + array0 = ensure_numeric_array(self._array0) + with np.errstate(divide='ignore', invalid='ignore'): + diff = combined_array - array0 + reldiff = diff / array0 + + # make reldiff 0 where the values are the same than array0 even for + # special values (0, nan, inf, -inf) + # at this point reldiff can still contain nan and infs + reldiff = la.where(eq, 0, reldiff) + + # 1) compute maxabsreldiff for the label + # this should NOT exclude nans or infs + relmin = reldiff.min(skipna=False) + relmax = reldiff.max(skipna=False) + maxabsreldiff = max(abs(relmin), abs(relmax)) + + # 2) compute bg_value + # replace -inf by min(reldiff), +inf by max(reldiff) + reldiff_for_bg = reldiff.copy() + isneginf = reldiff == -np.inf + isposinf = reldiff == np.inf + isinf = isneginf | isposinf + + # given the way reldiff is constructed, it cannot contain only infs + # (because inf/inf is nan) it can contain only infs and nans though, + # in which case finite_relXXX will be nan, so unless the array + # is empty, finite_relXXX should never be inf + finite_relmin = np.nanmin(reldiff, where=~isinf, initial=np.inf) + finite_relmax = np.nanmax(reldiff, where=~isinf, initial=-np.inf) + + # special case when reldiff contains only 0 and infs (to avoid + # coloring the inf cells white in that case) + if finite_relmin == 0 and finite_relmax == 0 and isinf.any(): + finite_relmin = -1 + finite_relmax = 1 + reldiff_for_bg[isneginf] = finite_relmin + reldiff_for_bg[isposinf] = finite_relmax + + # make sure that "acceptable" differences show as white + reldiff_for_bg = la.where(isclose, 0, reldiff_for_bg) + + # We need a separate version for bg and the label, so that when we + # modify atol/rtol, the background color is updated but not the + # maxreldiff label + maxabsreldiff_for_bg = max(abs(np.nanmin(reldiff_for_bg)), + abs(np.nanmax(reldiff_for_bg))) + if maxabsreldiff_for_bg: + # scale reldiff to range 0-1 with 0.5 for reldiff = 0 + self._bg_value = (reldiff_for_bg / maxabsreldiff_for_bg) / 2 + 0.5 + # if the only differences are nans on either side + elif not isclose.all(): + # use white (0.5) everywhere except where reldiff is nan, so + # that nans are grey + self._bg_value = reldiff_for_bg + 0.5 + else: # do NOT use full_like as we don't want to inherit array dtype - self._bg_value = la.full(self._combined_array.axes, 0.5) + self._bg_value = la.full(combined_array.axes, 0.5) # using percents does not look good when the numbers are very small self.maxdiff_label.setText(str(maxabsreldiff))