From 5d9c9523555c9da419664b04a12a226b03c0933c Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Sat, 28 Mar 2026 10:25:53 +0000 Subject: [PATCH 01/23] Add wildcard/glob pattern support for exclude_paths and include_paths --- deepdiff/deephash.py | 35 +- deepdiff/diff.py | 2364 +++++++------------------------------- deepdiff/helper.py | 24 + deepdiff/path.py | 161 +++ deepdiff/search.py | 8 +- docs/deephash_doc.rst | 2 + docs/diff_doc.rst | 4 +- docs/exclude_paths.rst | 43 + docs/search_doc.rst | 426 ++++++- tests/test_glob_paths.py | 719 ++++++++++++ 10 files changed, 1773 insertions(+), 2013 deletions(-) create mode 100644 tests/test_glob_paths.py diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index d26338e2..1ecc22a4 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -14,7 +14,8 @@ convert_item_or_items_into_compiled_regexes_else_none, get_id, type_is_subclass_of_type_group, type_in_type_group, number_to_string, datetime_normalize, KEY_TO_VAL_STR, - get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel) + get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel, + separate_wildcard_and_exact_paths) from deepdiff.base import Base @@ -189,6 +190,7 @@ def __init__(self, custom_operators: Optional[List[Any]] = None, default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc, encodings: Optional[List[str]] = None, + exclude_glob_paths: Optional[List[Any]] = None, exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None, exclude_paths: Optional[PathType] = None, exclude_regex_paths: Optional[RegexType] = None, @@ -205,6 +207,7 @@ def __init__(self, ignore_type_in_groups: Any = None, ignore_type_subclasses: bool = False, ignore_uuid_types: bool = False, + include_glob_paths: Optional[List[Any]] = None, include_paths: Optional[PathType] = None, number_format_notation: str = "f", number_to_string_func: Optional[NumberToStringFunc] = None, @@ -231,8 +234,14 @@ def __init__(self, exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths)) - self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths)) + _exclude_set = convert_item_or_items_into_set_else_none(exclude_paths) + _exclude_exact, _exclude_globs = separate_wildcard_and_exact_paths(_exclude_set) + self.exclude_paths = add_root_to_paths(_exclude_exact) + self.exclude_glob_paths = exclude_glob_paths or _exclude_globs + _include_set = convert_item_or_items_into_set_else_none(include_paths) + _include_exact, _include_globs = separate_wildcard_and_exact_paths(_include_set) + self.include_paths = add_root_to_paths(_include_exact) + self.include_glob_paths = include_glob_paths or _include_globs self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.hasher = default_hasher if hasher is None else hasher self.hashes[UNPROCESSED_KEY] = [] # type: ignore @@ -461,11 +470,21 @@ def _skip_this(self, obj: Any, parent: str) -> bool: skip = False if self.exclude_paths and parent in self.exclude_paths: skip = True - if self.include_paths and parent != 'root': - if parent not in self.include_paths: - skip = True - for prefix in self.include_paths: - if parent.startswith(prefix): + elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths): + skip = True + if (self.include_paths or self.include_glob_paths) and parent != 'root': + skip = True + if self.include_paths: + if parent in self.include_paths: + skip = False + else: + for prefix in self.include_paths: + if parent.startswith(prefix): + skip = False + break + if skip and self.include_glob_paths: + for gp in self.include_glob_paths: + if gp.match_or_is_ancestor(parent): skip = False break elif self.exclude_regex_paths and any( diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 2931cefd..2ac62b5e 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -1,2012 +1,484 @@ -#!/usr/bin/env python - -# In order to run the docstrings: -# python3 -m deepdiff.diff -# You might need to run it many times since dictionaries come in different orders -# every time you run the docstrings. -# However the docstring expects it in a specific order in order to pass! -import difflib +import re import logging -import types -import datetime -import uuid -from enum import Enum -from copy import deepcopy -from math import isclose as is_close -from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol, Literal -from collections.abc import Mapping, Iterable, Sequence -from collections import defaultdict -from inspect import getmembers -from itertools import zip_longest +from ast import literal_eval from functools import lru_cache -from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent, - IndexedHash, unprocessed, add_to_frozen_set, basic_types, - convert_item_or_items_into_set_else_none, get_type, - convert_item_or_items_into_compiled_regexes_else_none, - type_is_subclass_of_type_group, type_in_type_group, get_doc, - number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans, - np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer, - TEXT_VIEW, TREE_VIEW, DELTA_VIEW, COLORED_VIEW, COLORED_COMPACT_VIEW, - detailed__dict__, add_root_to_paths, - np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS, - PydanticBaseModel, Opcode, SetOrdered, ipranges) -from deepdiff.serialization import SerializationMixin -from deepdiff.distance import DistanceMixin, logarithmic_similarity -from deepdiff.model import ( - RemapDict, ResultDict, TextResult, TreeResult, DiffLevel, - DictRelationship, AttributeRelationship, REPORT_KEYS, - SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, - SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, - FORCE_DEFAULT, -) -from deepdiff.deephash import DeepHash, combine_hashes_lists -from deepdiff.base import Base -from deepdiff.lfucache import LFUCache, DummyLFU -from deepdiff.colored_view import ColoredView - -if TYPE_CHECKING: - from pytz.tzinfo import BaseTzInfo - logger = logging.getLogger(__name__) -MAX_PASSES_REACHED_MSG = ( - 'DeepDiff has reached the max number of passes of {}. ' - 'You can possibly get more accurate results by increasing the max_passes parameter.') +GETATTR = 'GETATTR' +GET = 'GET' -MAX_DIFFS_REACHED_MSG = ( - 'DeepDiff has reached the max number of diffs of {}. ' - 'You can possibly get more accurate results by increasing the max_diffs parameter.') +class _WildcardToken: + """Sentinel object for wildcard path tokens. -notpresent_indexed = IndexedHash(indexes=[0], item=notpresent) + Using a dedicated class (instead of plain strings) ensures that a literal + dict key ``'*'`` (parsed from ``root['*']``) is never confused with the + wildcard ``*`` (parsed from ``root[*]``). + """ + def __init__(self, symbol): + self._symbol = symbol -doc = get_doc('diff_doc.rst') + def __repr__(self): + return self._symbol + def __eq__(self, other): + return isinstance(other, _WildcardToken) and self._symbol == other._symbol -PROGRESS_MSG = "DeepDiff {} seconds in progress. Pass #{}, Diff #{}" + def __hash__(self): + return hash(('_WildcardToken', self._symbol)) -def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], None], duration: float) -> None: - """ - Report the progress every few seconds. - """ - progress_logger(PROGRESS_MSG.format(duration, _stats[PASSES_COUNT], _stats[DIFF_COUNT])) - - -DISTANCE_CACHE_HIT_COUNT = 'DISTANCE CACHE HIT COUNT' -DIFF_COUNT = 'DIFF COUNT' -PASSES_COUNT = 'PASSES COUNT' -MAX_PASS_LIMIT_REACHED = 'MAX PASS LIMIT REACHED' -MAX_DIFF_LIMIT_REACHED = 'MAX DIFF LIMIT REACHED' -DISTANCE_CACHE_ENABLED = 'DISTANCE CACHE ENABLED' -PREVIOUS_DIFF_COUNT = 'PREVIOUS DIFF COUNT' -PREVIOUS_DISTANCE_CACHE_HIT_COUNT = 'PREVIOUS DISTANCE CACHE HIT COUNT' -CANT_FIND_NUMPY_MSG = 'Unable to import numpy. This must be a bug in DeepDiff since a numpy array is detected.' -INVALID_VIEW_MSG = "view parameter must be one of 'text', 'tree', 'delta', 'colored' or 'colored_compact'. But {} was passed." -CUTOFF_RANGE_ERROR_MSG = 'cutoff_distance_for_pairs needs to be a positive float max 1.' -VERBOSE_LEVEL_RANGE_MSG = 'verbose_level should be 0, 1, or 2.' -PURGE_LEVEL_RANGE_MSG = 'cache_purge_level should be 0, 1, or 2.' -_ENABLE_CACHE_EVERY_X_DIFF = '_ENABLE_CACHE_EVERY_X_DIFF' - -model_fields_set = frozenset(["model_fields_set"]) - - -# What is the threshold to consider 2 items to be pairs. Only used when ignore_order = True. -CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT = 0.3 - -# What is the threshold to calculate pairs of items between 2 iterables. -# For example 2 iterables that have nothing in common, do not need their pairs to be calculated. -CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT = 0.7 - -DEEPHASH_PARAM_KEYS = ( - 'exclude_types', - 'exclude_paths', - 'include_paths', - 'exclude_regex_paths', - 'hasher', - 'significant_digits', - 'number_format_notation', - 'ignore_string_type_changes', - 'ignore_numeric_type_changes', - 'ignore_uuid_types', - 'use_enum_value', - 'ignore_type_in_groups', - 'ignore_type_subclasses', - 'ignore_string_case', - 'exclude_obj_callback', - 'ignore_private_variables', - 'encodings', - 'ignore_encoding_errors', - 'default_timezone', - 'custom_operators', -) +SINGLE_WILDCARD = _WildcardToken('*') +MULTI_WILDCARD = _WildcardToken('**') -class DeepDiffProtocol(Protocol): - t1: Any - t2: Any - cutoff_distance_for_pairs: float - use_log_scale: bool - log_scale_similarity_threshold: float - view: str - math_epsilon: Optional[float] - - - -class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, DeepDiffProtocol, Base): - __doc__ = doc - - CACHE_AUTO_ADJUST_THRESHOLD = 0.25 - - def __init__(self, - t1: Any, - t2: Any, - _original_type: Optional[Any]=None, - cache_purge_level: int=1, - cache_size: int=0, - cache_tuning_sample_size: int=0, - custom_operators: Optional[List[Any]] =None, - cutoff_distance_for_pairs: float=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT, - cutoff_intersection_for_pairs: float=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT, - default_timezone:Union[datetime.timezone, "BaseTzInfo"]=datetime.timezone.utc, - encodings: Optional[List[str]]=None, - exclude_obj_callback: Optional[Callable]=None, - exclude_obj_callback_strict: Optional[Callable]=None, - exclude_paths: Union[str, List[str], Set[str], FrozenSet[str], None]=None, - exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None, - exclude_types: Optional[List[type]]=None, - get_deep_distance: bool=False, - group_by: Union[str, Tuple[str, str], Callable, None]=None, - group_by_sort_key: Union[str, Callable, None]=None, - hasher: Optional[Callable]=None, - hashes: Optional[Dict[Any, Any]]=None, - ignore_encoding_errors: bool=False, - ignore_nan_inequality: bool=False, - ignore_numeric_type_changes: bool=False, - ignore_order: bool=False, - ignore_order_func: Optional[Callable]=None, - ignore_private_variables: bool=True, - ignore_string_case: bool=False, - ignore_string_type_changes: bool=False, - ignore_type_in_groups: Optional[List[Tuple[Any, ...]]]=None, - ignore_type_subclasses: bool=False, - ignore_uuid_types: bool=False, - include_obj_callback: Optional[Callable]=None, - include_obj_callback_strict: Optional[Callable]=None, - include_paths: Union[str, List[str], None]=None, - iterable_compare_func: Optional[Callable]=None, - log_frequency_in_sec: int=0, - log_scale_similarity_threshold: float=0.1, - log_stacktrace: bool=False, - math_epsilon: Optional[float]=None, - max_diffs: Optional[int]=None, - max_passes: int=10000000, - number_format_notation: Literal["f", "e"]="f", - number_to_string_func: Optional[Callable]=None, - progress_logger: Callable[[str], None]=logger.info, - report_repetition: bool=False, - significant_digits: Optional[int]=None, - threshold_to_diff_deeper: float = 0.33, - truncate_datetime: Optional[str]=None, - use_enum_value: bool=False, - use_log_scale: bool=False, - verbose_level: int=1, - view: str=TEXT_VIEW, - zip_ordered_iterables: bool=False, - _parameters: Optional[Dict[str, Any]]=None, - _shared_parameters: Optional[Dict[str, Any]]=None, - **kwargs): - super().__init__() - if kwargs: - raise ValueError(( - "The following parameter(s) are not valid: %s\n" - "The valid parameters are ignore_order, report_repetition, significant_digits, " - "number_format_notation, exclude_paths, include_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " - "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, ignore_uuid_types, truncate_datetime, " - "ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, " - "view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, " - "cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, " - "cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, log_stacktrace," - "math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, default_timezone " - "ignore_order_func, custom_operators, encodings, ignore_encoding_errors, use_log_scale, log_scale_similarity_threshold " - "_parameters and _shared_parameters.") % ', '.join(kwargs.keys())) - - if _parameters: - self.__dict__.update(_parameters) - else: - self.custom_operators = custom_operators or [] - self.ignore_order = ignore_order - - self.ignore_order_func = ignore_order_func - - ignore_type_in_groups = ignore_type_in_groups or [] - if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups: - ignore_numeric_type_changes = True - self.ignore_numeric_type_changes = ignore_numeric_type_changes - if strings == ignore_type_in_groups or strings in ignore_type_in_groups: - ignore_string_type_changes = True - # Handle ignore_uuid_types - check if uuid+str group is already in ignore_type_in_groups - uuid_str_group = (uuids[0], str) - if uuid_str_group == ignore_type_in_groups or uuid_str_group in ignore_type_in_groups: - ignore_uuid_types = True - self.ignore_uuid_types = ignore_uuid_types - self.use_enum_value = use_enum_value - self.log_scale_similarity_threshold = log_scale_similarity_threshold - self.use_log_scale = use_log_scale - self.default_timezone = default_timezone - self.log_stacktrace = log_stacktrace - self.threshold_to_diff_deeper = threshold_to_diff_deeper - self.ignore_string_type_changes = ignore_string_type_changes - self.ignore_type_in_groups = self.get_ignore_types_in_groups( - ignore_type_in_groups=ignore_type_in_groups, - ignore_string_type_changes=ignore_string_type_changes, - ignore_numeric_type_changes=ignore_numeric_type_changes, - ignore_type_subclasses=ignore_type_subclasses, - ignore_uuid_types=ignore_uuid_types) - self.report_repetition = report_repetition - self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths)) - self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths)) - self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) - self.exclude_types = set(exclude_types) if exclude_types else None - self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance - self.ignore_type_subclasses = ignore_type_subclasses - self.type_check_func = type_in_type_group if ignore_type_subclasses else type_is_subclass_of_type_group - self.ignore_string_case = ignore_string_case - self.exclude_obj_callback = exclude_obj_callback - self.exclude_obj_callback_strict = exclude_obj_callback_strict - self.include_obj_callback = include_obj_callback - self.include_obj_callback_strict = include_obj_callback_strict - self.number_to_string = number_to_string_func or number_to_string - self.iterable_compare_func = iterable_compare_func - self.zip_ordered_iterables = zip_ordered_iterables - self.ignore_private_variables = ignore_private_variables - self.ignore_nan_inequality = ignore_nan_inequality - self.hasher = hasher - self.cache_tuning_sample_size = cache_tuning_sample_size - self.group_by = group_by - if callable(group_by_sort_key): - self.group_by_sort_key = group_by_sort_key - elif group_by_sort_key: - def _group_by_sort_key(x): - return x[group_by_sort_key] - self.group_by_sort_key = _group_by_sort_key - else: - self.group_by_sort_key = None - self.encodings = encodings - self.ignore_encoding_errors = ignore_encoding_errors - - self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) - self.math_epsilon = math_epsilon - if self.math_epsilon is not None and self.ignore_order: - logger.warning("math_epsilon in conjunction with ignore_order=True is only used for flat object comparisons. Custom math_epsilon will not have an effect when comparing nested objects.") - self.truncate_datetime = get_truncate_datetime(truncate_datetime) - self.number_format_notation = number_format_notation - if verbose_level in {0, 1, 2}: - self.verbose_level = verbose_level - else: - raise ValueError(VERBOSE_LEVEL_RANGE_MSG) - if cache_purge_level not in {0, 1, 2}: - raise ValueError(PURGE_LEVEL_RANGE_MSG) - self.view = view - # Setting up the cache for dynamic programming. One dictionary per instance of root of DeepDiff running. - self.max_passes = max_passes - self.max_diffs = max_diffs - self.cutoff_distance_for_pairs = float(cutoff_distance_for_pairs) - self.cutoff_intersection_for_pairs = float(cutoff_intersection_for_pairs) - if self.cutoff_distance_for_pairs < 0 or self.cutoff_distance_for_pairs > 1: - raise ValueError(CUTOFF_RANGE_ERROR_MSG) - # _Parameters are the clean _parameters to initialize DeepDiff with so we avoid all the above - # cleaning functionalities when running DeepDiff recursively. - # However DeepHash has its own set of _parameters that are slightly different than DeepDIff. - # DeepDiff _parameters are transformed to DeepHash _parameters via _get_deephash_params method. - self.progress_logger = progress_logger - self.cache_size = cache_size - _parameters = self.__dict__.copy() - _parameters['group_by'] = None # overwriting since these parameters will be passed on to other passes. - if log_stacktrace: - self.log_err = logger.exception - else: - self.log_err = logger.error - - # Non-Root - if _shared_parameters: - self.is_root = False - self._shared_parameters = _shared_parameters - self.__dict__.update(_shared_parameters) - # We are in some pass other than root - progress_timer = None - # Root - else: - self.is_root = True - # Caching the DeepDiff results for dynamic programming - self._distance_cache = LFUCache(cache_size) if cache_size else DummyLFU() - self._stats = { - PASSES_COUNT: 0, - DIFF_COUNT: 0, - DISTANCE_CACHE_HIT_COUNT: 0, - PREVIOUS_DIFF_COUNT: 0, - PREVIOUS_DISTANCE_CACHE_HIT_COUNT: 0, - MAX_PASS_LIMIT_REACHED: False, - MAX_DIFF_LIMIT_REACHED: False, - DISTANCE_CACHE_ENABLED: bool(cache_size), - } - self.hashes = dict_() if hashes is None else hashes - self._numpy_paths = dict_() # if _numpy_paths is None else _numpy_paths - self.group_by_keys = set() # Track keys that originated from group_by operations - self._shared_parameters = { - 'hashes': self.hashes, - '_stats': self._stats, - '_distance_cache': self._distance_cache, - 'group_by_keys': self.group_by_keys, - '_numpy_paths': self._numpy_paths, - _ENABLE_CACHE_EVERY_X_DIFF: self.cache_tuning_sample_size * 10, - } - if log_frequency_in_sec: - # Creating a progress log reporter that runs in a separate thread every log_frequency_in_sec seconds. - progress_timer = RepeatedTimer(log_frequency_in_sec, _report_progress, self._stats, progress_logger) - else: - progress_timer = None - - self._parameters = _parameters - self.deephash_parameters = self._get_deephash_params() - self.tree = TreeResult() - self._iterable_opcodes = {} - if group_by and self.is_root: - try: - original_t1 = t1 - t1 = self._group_iterable_to_dict(t1, group_by, item_name='t1') - except (KeyError, ValueError): - pass - else: - try: - t2 = self._group_iterable_to_dict(t2, group_by, item_name='t2') - except (KeyError, ValueError): - t1 = original_t1 - - self.t1 = t1 - self.t2 = t2 - - try: - root = DiffLevel(t1, t2, verbose_level=self.verbose_level) - # _original_type is only used to pass the original type of the data. Currently only used for numpy arrays. - # The reason is that we convert the numpy array to python list and then later for distance calculations - # we convert only the the last dimension of it into numpy arrays. - self._diff(root, parents_ids=frozenset({id(t1)}), _original_type=_original_type) - - if get_deep_distance and view in {TEXT_VIEW, TREE_VIEW}: - self.tree['deep_distance'] = self._get_rough_distance() - - self.tree.remove_empty_keys() - view_results = self._get_view_results(self.view) - if isinstance(view_results, ColoredView): - self.update(view_results.tree) - self._colored_view = view_results - else: - self.update(view_results) - finally: - if self.is_root: - if cache_purge_level: - del self._distance_cache - del self.hashes - del self._shared_parameters - del self._parameters - for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT, - DISTANCE_CACHE_ENABLED): - del self._stats[key] - if progress_timer: - duration = progress_timer.stop() - self._stats['DURATION SEC'] = duration - logger.info('stats {}'.format(self.get_stats())) - if cache_purge_level == 2: - self.__dict__.clear() - - def _get_deephash_params(self): - result = {key: self._parameters[key] for key in DEEPHASH_PARAM_KEYS} - result['ignore_repetition'] = not self.report_repetition - result['number_to_string_func'] = self.number_to_string - return result - - def _report_result(self, report_type, change_level, local_tree=None): - """ - Add a detected change to the reference-style result dictionary. - report_type will be added to level. - (We'll create the text-style report from there later.) - :param report_type: A well defined string key describing the type of change. - Examples: "set_item_added", "values_changed" - :param change_level: A DiffLevel object describing the objects in question in their - before-change and after-change object structure. - - :local_tree: None - """ +class PathExtractionError(ValueError): + pass - if not self._skip_this(change_level): - change_level.report_type = report_type - tree = self.tree if local_tree is None else local_tree - tree[report_type].add(change_level) - def custom_report_result(self, report_type, level, extra_info=None): - """ - Add a detected change to the reference-style result dictionary. - report_type will be added to level. - (We'll create the text-style report from there later.) - - :param report_type: A well defined string key describing the type of change. - Examples: "set_item_added", "values_changed" - :param parent: A DiffLevel object describing the objects in question in their - before-change and after-change object structure. - :param extra_info: A dict that describe this result - :rtype: None - """ +class RootCanNotBeModified(ValueError): + pass - if not self._skip_this(level): - level.report_type = report_type - level.additional[CUSTOM_FIELD] = extra_info - self.tree[report_type].add(level) - @staticmethod - def _dict_from_slots(object: Any) -> Dict[str, Any]: - def unmangle(attribute: str) -> str: - if attribute.startswith('__') and attribute != '__weakref__': - return '_{type}{attribute}'.format( - type=type(object).__name__, - attribute=attribute - ) - return attribute - - all_slots = [] - - if isinstance(object, type): - mro = object.__mro__ # pragma: no cover. I have not been able to write a test for this case. But we still check for it. +def _add_to_elements(elements, elem, inside): + # Ignore private items + if not elem: + return + if not elem.startswith('__'): + # Handle wildcard tokens (* and **) as-is. + # Unquoted root[*] arrives as bare '*' which matches the string check. + # Quoted root['*'] arrives as "'*'" which does NOT match, so it falls + # through to literal_eval and becomes the plain string '*' — which is + # distinct from the _WildcardToken sentinel and thus treated as a + # literal dict key. + if elem in ('*', '**'): + action = GETATTR if inside == '.' else GET + elements.append((SINGLE_WILDCARD if elem == '*' else MULTI_WILDCARD, action)) + return + remove_quotes = False + if '𝆺𝅥𝅯' in elem or '\\' in elem: + remove_quotes = True else: - mro = object.__class__.__mro__ + try: + elem = literal_eval(elem) + remove_quotes = False + except (ValueError, SyntaxError): + remove_quotes = True + if remove_quotes and elem[0] == elem[-1] and elem[0] in {'"', "'"}: + elem = elem[1: -1] + action = GETATTR if inside == '.' else GET + elements.append((elem, action)) - for type_in_mro in mro: - slots = getattr(type_in_mro, '__slots__', None) - if slots: - if isinstance(slots, strings): - all_slots.append(slots) - else: - all_slots.extend(slots) - - return {i: getattr(object, key) for i in all_slots if hasattr(object, key := unmangle(i))} - - def _diff_enum(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), local_tree: Optional[Any]=None) -> None: - t1 = detailed__dict__(level.t1, include_keys=ENUM_INCLUDE_KEYS) - t2 = detailed__dict__(level.t2, include_keys=ENUM_INCLUDE_KEYS) - - self._diff_dict( - level, - parents_ids, - print_as_attribute=True, - override=True, - override_t1=t1, - override_t2=t2, - local_tree=local_tree, - ) - - def _diff_obj(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), is_namedtuple: bool=False, local_tree: Optional[Any]=None, is_pydantic_object: bool=False) -> None: - """Difference of 2 objects""" - processing_error = False - t1: Optional[Dict[str, Any]] = None - t2: Optional[Dict[str, Any]] = None - try: - if is_namedtuple: - t1 = level.t1._asdict() - t2 = level.t2._asdict() - elif is_pydantic_object: - t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables, ignore_keys=model_fields_set) - t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables, ignore_keys=model_fields_set) - elif all('__dict__' in dir(t) for t in level): - t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables) - t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables) - elif all('__slots__' in dir(t) for t in level): - t1 = self._dict_from_slots(level.t1) - t2 = self._dict_from_slots(level.t2) - else: - t1 = {k: v for k, v in getmembers(level.t1) if not callable(v)} - t2 = {k: v for k, v in getmembers(level.t2) if not callable(v)} - except AttributeError: - processing_error = True - if processing_error is True or t1 is None or t2 is None: - self._report_result('unprocessed', level, local_tree=local_tree) - return - self._diff_dict( - level, - parents_ids, - print_as_attribute=True, - override=True, - override_t1=t1, - override_t2=t2, - local_tree=local_tree, - ) - - def _skip_this(self, level: Any) -> bool: - """ - Check whether this comparison should be skipped because one of the objects to compare meets exclusion criteria. - :rtype: bool - """ - level_path = level.path() - skip = False - if self.exclude_paths and level_path in self.exclude_paths: - skip = True - if self.include_paths and level_path != 'root': - if level_path not in self.include_paths: - skip = True - for prefix in self.include_paths: - if prefix in level_path or level_path in prefix: - skip = False - break - elif self.exclude_regex_paths and any( - [exclude_regex_path.search(level_path) for exclude_regex_path in self.exclude_regex_paths]): - skip = True - elif self.exclude_types_tuple and \ - (isinstance(level.t1, self.exclude_types_tuple) or isinstance(level.t2, self.exclude_types_tuple)): - skip = True - elif self.exclude_obj_callback and \ - (self.exclude_obj_callback(level.t1, level_path) or self.exclude_obj_callback(level.t2, level_path)): - skip = True - elif self.exclude_obj_callback_strict and \ - (self.exclude_obj_callback_strict(level.t1, level_path) and - self.exclude_obj_callback_strict(level.t2, level_path)): - skip = True - elif self.include_obj_callback and level_path != 'root': - skip = True - if (self.include_obj_callback(level.t1, level_path) or self.include_obj_callback(level.t2, level_path)): - skip = False - elif self.include_obj_callback_strict and level_path != 'root': - skip = True - if (self.include_obj_callback_strict(level.t1, level_path) and - self.include_obj_callback_strict(level.t2, level_path)): - skip = False - - return skip - - def _skip_this_key(self, level: Any, key: Any) -> bool: - # if include_paths is not set, than treet every path as included - if self.include_paths is None: - return False - if "{}['{}']".format(level.path(), key) in self.include_paths: - return False - if level.path() in self.include_paths: - # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"] - return False - for prefix in self.include_paths: - if "{}['{}']".format(level.path(), key) in prefix: - # matches as long the prefix is longer than this object key - # eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths - # level+key root['foo'] matches prefix root['foo']['bar'] from include_paths - # level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards - return False - # check if a higher level is included as a whole (=without any sublevels specified) - # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"] - # but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"] - up = level.up - while up is not None: - if up.path() in self.include_paths: - return False - up = up.up - return True +DEFAULT_FIRST_ELEMENT = ('root', GETATTR) - def _get_clean_to_keys_mapping(self, keys: Any, level: Any) -> Dict[Any, Any]: - """ - Get a dictionary of cleaned value of keys to the keys themselves. - This is mainly used to transform the keys when the type changes of keys should be ignored. - TODO: needs also some key conversion for groups of types other than the built-in strings and numbers. - """ - result = dict_() - for key in keys: - if self.ignore_string_type_changes and isinstance(key, bytes): - clean_key = key.decode('utf-8') - elif self.ignore_string_type_changes and isinstance(key, memoryview): - clean_key = key.tobytes().decode('utf-8') - elif self.use_enum_value and isinstance(key, Enum): - clean_key = key.value - elif isinstance(key, numbers): - # Skip type prefixing for keys that originated from group_by operations - if hasattr(self, 'group_by_keys') and key in self.group_by_keys: - if self.significant_digits is None: - clean_key = key - else: - clean_key = self.number_to_string(key, significant_digits=self.significant_digits, - number_format_notation=self.number_format_notation) # type: ignore # type: ignore +@lru_cache(maxsize=1024 * 128) +def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT): + """ + Given a path, it extracts the elements that form the path and their relevant most likely retrieval action. + + >>> from deepdiff import _path_to_elements + >>> path = "root[4.3].b['a3']" + >>> _path_to_elements(path, root_element=None) + [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')] + """ + if isinstance(path, (tuple, list)): + return path + elements = [] + if root_element: + elements.append(root_element) + elem = '' + inside = False + prev_char = None + path = path[4:] # removing "root from the beginning" + brackets = [] + inside_quotes = False + quote_used = '' + for char in path: + if prev_char == '𝆺𝅥𝅯': + elem += char + elif char in {'"', "'"}: + elem += char + # If we are inside and the quote is not what we expected, the quote is not closing + if not(inside_quotes and quote_used != char): + inside_quotes = not inside_quotes + if inside_quotes: + quote_used = char else: - type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__ - if self.significant_digits is None: - clean_key = key - else: - clean_key = self.number_to_string(key, significant_digits=self.significant_digits, - number_format_notation=self.number_format_notation) # type: ignore # type: ignore - clean_key = KEY_TO_VAL_STR.format(type_, clean_key) + _add_to_elements(elements, elem, inside) + elem = '' + quote_used = '' + elif inside_quotes: + elem += char + elif char == '[': + if inside == '.': + _add_to_elements(elements, elem, inside) + inside = '[' + elem = '' + # we are already inside. The bracket is a part of the word. + elif inside == '[': + elem += char else: - clean_key = key - if self.ignore_string_case and isinstance(clean_key, str): - clean_key = clean_key.lower() - if clean_key in result: - logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes' - 'or ignore_numeric_type_changes are set to be true.').format( - key, result[clean_key], level.path())) + inside = '[' + brackets.append('[') + elem = '' + elif char == '.': + if inside == '[': + elem += char + elif inside == '.': + _add_to_elements(elements, elem, inside) + elem = '' else: - result[clean_key] = key - return result - - def _diff_dict( - self, - level: Any, - parents_ids: FrozenSet[int]=frozenset([]), - print_as_attribute: bool=False, - override: bool=False, - override_t1: Optional[Any]=None, - override_t2: Optional[Any]=None, - local_tree: Optional[Any]=None, - ) -> None: - """Difference of 2 dictionaries""" - if override: - # for special stuff like custom objects and named tuples we receive preprocessed t1 and t2 - # but must not spoil the chain (=level) with it - t1 = override_t1 - t2 = override_t2 - else: - t1 = level.t1 - t2 = level.t2 - - if print_as_attribute: - item_added_key = "attribute_added" - item_removed_key = "attribute_removed" - rel_class = AttributeRelationship - else: - item_added_key = "dictionary_item_added" - item_removed_key = "dictionary_item_removed" - rel_class = DictRelationship - - if self.ignore_private_variables: - t1_keys = SetOrdered([key for key in t1 if not(isinstance(key, str) and key.startswith('__')) and not self._skip_this_key(level, key)]) - t2_keys = SetOrdered([key for key in t2 if not(isinstance(key, str) and key.startswith('__')) and not self._skip_this_key(level, key)]) - else: - t1_keys = SetOrdered([key for key in t1 if not self._skip_this_key(level, key)]) - t2_keys = SetOrdered([key for key in t2 if not self._skip_this_key(level, key)]) - if self.ignore_string_type_changes or self.ignore_numeric_type_changes or self.ignore_string_case: - t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level) - t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level) - t1_keys = SetOrdered(t1_clean_to_keys.keys()) - t2_keys = SetOrdered(t2_clean_to_keys.keys()) - else: - t1_clean_to_keys = t2_clean_to_keys = None - - t_keys_intersect = t2_keys & t1_keys - t_keys_added = t2_keys - t_keys_intersect - t_keys_removed = t1_keys - t_keys_intersect - - if self.threshold_to_diff_deeper: - if self.exclude_paths: - t_keys_union = {f"{level.path()}[{repr(key)}]" for key in (t2_keys | t1_keys)} - t_keys_union -= self.exclude_paths - t_keys_union_len = len(t_keys_union) + inside = '.' + elem = '' + elif char == ']': + if brackets and brackets[-1] == '[': + brackets.pop() + if brackets: + elem += char else: - t_keys_union_len = len(t2_keys | t1_keys) - if t_keys_union_len > 1 and len(t_keys_intersect) / t_keys_union_len < self.threshold_to_diff_deeper: - self._report_result('values_changed', level, local_tree=local_tree) - return - - for key in t_keys_added: - if self._count_diff() is StopIteration: - return - - key = t2_clean_to_keys[key] if t2_clean_to_keys else key - change_level = level.branch_deeper( - notpresent, - t2[key], - child_relationship_class=rel_class, - child_relationship_param=key, - child_relationship_param2=key, - ) - self._report_result(item_added_key, change_level, local_tree=local_tree) - - for key in t_keys_removed: - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - - key = t1_clean_to_keys[key] if t1_clean_to_keys else key - change_level = level.branch_deeper( - t1[key], - notpresent, - child_relationship_class=rel_class, - child_relationship_param=key, - child_relationship_param2=key, - ) - self._report_result(item_removed_key, change_level, local_tree=local_tree) - - for key in t_keys_intersect: # key present in both dicts - need to compare values - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - - key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key - key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key - item_id = id(t1[key1]) - if parents_ids and item_id in parents_ids: - continue - parents_ids_added = add_to_frozen_set(parents_ids, item_id) - - # Go one level deeper - next_level = level.branch_deeper( - t1[key1], - t2[key2], - child_relationship_class=rel_class, - child_relationship_param=key, - child_relationship_param2=key, - ) - self._diff(next_level, parents_ids_added, local_tree=local_tree) - - def _diff_set(self, level: Any, local_tree: Optional[Any]=None) -> None: - """Difference of sets""" - t1_hashtable = self._create_hashtable(level, 't1') - t2_hashtable = self._create_hashtable(level, 't2') - - t1_hashes = set(t1_hashtable.keys()) - t2_hashes = set(t2_hashtable.keys()) - - hashes_added = t2_hashes - t1_hashes - hashes_removed = t1_hashes - t2_hashes - - items_added = [t2_hashtable[i].item for i in hashes_added] - items_removed = [t1_hashtable[i].item for i in hashes_removed] - - for item in items_added: - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - - change_level = level.branch_deeper( - notpresent, item, child_relationship_class=SetRelationship) - self._report_result('set_item_added', change_level, local_tree=local_tree) - - for item in items_removed: - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - - change_level = level.branch_deeper( - item, notpresent, child_relationship_class=SetRelationship) - self._report_result('set_item_removed', change_level, local_tree=local_tree) - - @staticmethod - def _iterables_subscriptable(t1: Any, t2: Any) -> bool: - try: - if getattr(t1, '__getitem__') and getattr(t2, '__getitem__'): - return True - else: # pragma: no cover - return False # should never happen - except AttributeError: - return False - - def _diff_iterable(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), _original_type: Optional[type]=None, local_tree: Optional[Any]=None) -> None: - """Difference of iterables""" - if (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order: - self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree) - else: - self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type, local_tree=local_tree) - - def _compare_in_order( - self, level, - t1_from_index=None, t1_to_index=None, - t2_from_index=None, t2_to_index=None - ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]: - """ - Default compare if `iterable_compare_func` is not provided. - This will compare in sequence order. - """ - if t1_from_index is None: - return [((i, i), (x, y)) for i, (x, y) in enumerate( - zip_longest( - level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))] + _add_to_elements(elements, elem, inside) + elem = '' + inside = False else: - t1_chunk = level.t1[t1_from_index:t1_to_index] - t2_chunk = level.t2[t2_from_index:t2_to_index] - return [((i + t1_from_index, i + t2_from_index), (x, y)) for i, (x, y) in enumerate( - zip_longest( - t1_chunk, t2_chunk, fillvalue=ListItemRemovedOrAdded))] - - def _get_matching_pairs( - self, level, - t1_from_index=None, t1_to_index=None, - t2_from_index=None, t2_to_index=None - ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]: - """ - Given a level get matching pairs. This returns list of two tuples in the form: - [ - (t1 index, t2 index), (t1 item, t2 item) - ] - - This will compare using the passed in `iterable_compare_func` if available. - Default it to compare in order - """ - - if self.iterable_compare_func is None: - # Match in order if there is no compare function provided - return self._compare_in_order( - level, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index, - ) - try: - matches = [] - y_matched = set() - y_index_matched = set() - for i, x in enumerate(level.t1): - x_found = False - for j, y in enumerate(level.t2): - - if(j in y_index_matched): - # This ensures a one-to-one relationship of matches from t1 to t2. - # If y this index in t2 has already been matched to another x - # it cannot have another match, so just continue. - continue - - if(self.iterable_compare_func(x, y, level)): - deep_hash = DeepHash(y, - hashes=self.hashes, - apply_hash=True, - **self.deephash_parameters, - ) - y_index_matched.add(j) - y_matched.add(deep_hash[y]) - matches.append(((i, j), (x, y))) - x_found = True - break - - if(not x_found): - matches.append(((i, -1), (x, ListItemRemovedOrAdded))) - for j, y in enumerate(level.t2): - - deep_hash = DeepHash(y, - hashes=self.hashes, - apply_hash=True, - **self.deephash_parameters, - ) - if(deep_hash[y] not in y_matched): - matches.append(((-1, j), (ListItemRemovedOrAdded, y))) - return matches - except CannotCompare: - return self._compare_in_order( - level, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index - ) - - def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None): - # We're handling both subscriptable and non-subscriptable iterables. Which one is it? - subscriptable = self._iterables_subscriptable(level.t1, level.t2) - if subscriptable: - child_relationship_class = SubscriptableIterableRelationship - else: - child_relationship_class = NonSubscriptableIterableRelationship - - if ( - not self.zip_ordered_iterables - and isinstance(level.t1, Sequence) - and isinstance(level.t2, Sequence) - and self._all_values_basic_hashable(level.t1) - and self._all_values_basic_hashable(level.t2) - and self.iterable_compare_func is None - ): - local_tree_pass = TreeResult() - opcodes_with_values = self._diff_ordered_iterable_by_difflib( - level, - parents_ids=parents_ids, - _original_type=_original_type, - child_relationship_class=child_relationship_class, - local_tree=local_tree_pass, - ) - # Sometimes DeepDiff's old iterable diff does a better job than DeepDiff - if len(local_tree_pass) > 1: - local_tree_pass2 = TreeResult() - self._diff_by_forming_pairs_and_comparing_one_by_one( - level, - parents_ids=parents_ids, - _original_type=_original_type, - child_relationship_class=child_relationship_class, - local_tree=local_tree_pass2, - ) - if len(local_tree_pass) >= len(local_tree_pass2): - local_tree_pass = local_tree_pass2 - else: - self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values - for report_type, levels in local_tree_pass.items(): - if levels: - self.tree[report_type] |= levels - else: - self._diff_by_forming_pairs_and_comparing_one_by_one( - level, - parents_ids=parents_ids, - _original_type=_original_type, - child_relationship_class=child_relationship_class, - local_tree=local_tree, - ) - - def _all_values_basic_hashable(self, iterable: Iterable[Any]) -> bool: - """ - Are all items basic hashable types? - Or there are custom types too? - """ - - # We don't want to exhaust a generator - if isinstance(iterable, types.GeneratorType): - return False - for item in iterable: - if not isinstance(item, basic_types): - return False - return True - - def _diff_by_forming_pairs_and_comparing_one_by_one( - self, level, local_tree, parents_ids=frozenset(), - _original_type=None, child_relationship_class=None, - t1_from_index=None, t1_to_index=None, - t2_from_index=None, t2_to_index=None, - ): - for (i, j), (x, y) in self._get_matching_pairs( - level, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index - ): - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - - reference_param1 = i - reference_param2 = j - if y is ListItemRemovedOrAdded: # item removed completely - change_level = level.branch_deeper( - x, - notpresent, - child_relationship_class=child_relationship_class, - child_relationship_param=reference_param1, - child_relationship_param2=reference_param2, - ) - self._report_result('iterable_item_removed', change_level, local_tree=local_tree) - - elif x is ListItemRemovedOrAdded: # new item added - change_level = level.branch_deeper( - notpresent, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=reference_param1, - child_relationship_param2=reference_param2, - ) - self._report_result('iterable_item_added', change_level, local_tree=local_tree) - - else: # check if item value has changed - if (i != j and ((x == y) or self.iterable_compare_func)): - # Item moved - change_level = level.branch_deeper( - x, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=reference_param1, - child_relationship_param2=reference_param2 - ) - self._report_result('iterable_item_moved', change_level, local_tree=local_tree) - - if self.iterable_compare_func: - # Mark additional context denoting that we have moved an item. - # This will allow for correctly setting paths relative to t2 when using an iterable_compare_func - level.additional["moved"] = True - - else: - continue - - item_id = id(x) - if parents_ids and item_id in parents_ids: - continue - parents_ids_added = add_to_frozen_set(parents_ids, item_id) - - # Go one level deeper - next_level = level.branch_deeper( - x, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=reference_param1, - child_relationship_param2=reference_param2 - ) - self._diff(next_level, parents_ids_added, local_tree=local_tree) - - def _diff_ordered_iterable_by_difflib( - self, level, local_tree, parents_ids=frozenset(), _original_type=None, child_relationship_class=None, - ): - - seq = difflib.SequenceMatcher(isjunk=None, a=level.t1, b=level.t2, autojunk=False) - - opcodes = seq.get_opcodes() - opcodes_with_values = [] - - # TODO: this logic should be revisted so we detect reverse operations - # like when a replacement happens at index X and a reverse replacement happens at index Y - # in those cases we have a "iterable_item_moved" operation. - for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes: - if tag == 'equal': - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - )) - continue - # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format( - # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index])) - - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - old_values = level.t1[t1_from_index: t1_to_index], - new_values = level.t2[t2_from_index: t2_to_index], - )) - - if tag == 'replace': - self._diff_by_forming_pairs_and_comparing_one_by_one( - level, local_tree=local_tree, parents_ids=parents_ids, - _original_type=_original_type, child_relationship_class=child_relationship_class, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index, - ) - elif tag == 'delete': - for index, x in enumerate(level.t1[t1_from_index:t1_to_index]): - change_level = level.branch_deeper( - x, - notpresent, - child_relationship_class=child_relationship_class, - child_relationship_param=index + t1_from_index, - child_relationship_param2=index + t1_from_index, - ) - self._report_result('iterable_item_removed', change_level, local_tree=local_tree) - elif tag == 'insert': - for index, y in enumerate(level.t2[t2_from_index:t2_to_index]): - change_level = level.branch_deeper( - notpresent, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=index + t2_from_index, - child_relationship_param2=index + t2_from_index, - ) - self._report_result('iterable_item_added', change_level, local_tree=local_tree) - return opcodes_with_values - - - def _diff_str(self, level, local_tree=None): - """Compare strings""" - if self.ignore_string_case: - level.t1 = level.t1.lower() - level.t2 = level.t2.lower() - - if type(level.t1) == type(level.t2) and level.t1 == level.t2: # NOQA - return - - # do we add a diff for convenience? - do_diff = True - t1_str = level.t1 - t2_str = level.t2 - - if isinstance(level.t1, memoryview): - try: - t1_str = level.t1.tobytes().decode('ascii') - except UnicodeDecodeError: - do_diff = False - elif isinstance(level.t1, bytes_type): - try: - t1_str = level.t1.decode('ascii') - except UnicodeDecodeError: - do_diff = False - - if isinstance(level.t2, memoryview): - try: - t2_str = level.t2.tobytes().decode('ascii') - except UnicodeDecodeError: - do_diff = False - elif isinstance(level.t2, bytes_type): + elem += char + prev_char = char + if elem: + _add_to_elements(elements, elem, inside) + return tuple(elements) + + +def _get_nested_obj(obj, elements, next_element=None): + for (elem, action) in elements: + check_elem(elem) + if action == GET: + obj = obj[elem] + elif action == GETATTR: + obj = getattr(obj, elem) + return obj + + +def _guess_type(elements, elem, index, next_element): + # If we are not at the last elements + if index < len(elements) - 1: + # We assume it is a nested dictionary not a nested list + return {} + if isinstance(next_element, int): + return [] + return {} + + +def check_elem(elem): + if isinstance(elem, str) and elem.startswith("__") and elem.endswith("__"): + raise ValueError("traversing dunder attributes is not allowed") + + +def _get_nested_obj_and_force(obj, elements, next_element=None): + prev_elem = None + prev_action = None + prev_obj = obj + for index, (elem, action) in enumerate(elements): + check_elem(elem) + _prev_obj = obj + if action == GET: try: - t2_str = level.t2.decode('ascii') - except UnicodeDecodeError: - do_diff = False - - if isinstance(level.t1, Enum): - t1_str = level.t1.value - - if isinstance(level.t2, Enum): - t2_str = level.t2.value + obj = obj[elem] + prev_obj = _prev_obj + except KeyError: + obj[elem] = _guess_type(elements, elem, index, next_element) + obj = obj[elem] + prev_obj = _prev_obj + except IndexError: + if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj): + obj.extend([None] * (elem - len(obj))) + obj.append(_guess_type(elements, elem, index), next_element) + obj = obj[-1] + prev_obj = _prev_obj + elif isinstance(obj, list) and len(obj) == 0 and prev_elem: + # We ran into an empty list that should have been a dictionary + # We need to change it from an empty list to a dictionary + obj = {elem: _guess_type(elements, elem, index, next_element)} + if prev_action == GET: + prev_obj[prev_elem] = obj + else: + setattr(prev_obj, prev_elem, obj) + obj = obj[elem] + elif action == GETATTR: + obj = getattr(obj, elem) + prev_obj = _prev_obj + prev_elem = elem + prev_action = action + return obj - if t1_str == t2_str: - return - if do_diff: - if '\n' in t1_str or isinstance(t2_str, str) and '\n' in t2_str: - diff = difflib.unified_diff( - t1_str.splitlines(), t2_str.splitlines(), lineterm='') - diff = list(diff) - if diff: - level.additional['diff'] = '\n'.join(diff) - - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_tuple(self, level, parents_ids, local_tree=None): - # Checking to see if it has _fields. Which probably means it is a named - # tuple. - try: - level.t1._asdict - # It must be a normal tuple - except AttributeError: - self._diff_iterable(level, parents_ids, local_tree=local_tree) - # We assume it is a namedtuple then - else: - self._diff_obj(level, parents_ids, is_namedtuple=True, local_tree=local_tree) +def extract(obj, path): + """ + Get the item from obj based on path. + + Example: + + >>> from deepdiff import extract + >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]} + >>> path = "root[1][0]['2']" + >>> extract(obj, path) + 'b' + + Note that you can use extract in conjunction with DeepDiff results + or even with the search and :ref:`deepsearch_label` modules. For example: + + >>> from deepdiff import grep + >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]} + >>> result = obj | grep(5) + >>> result + {'matched_values': ['root[2][1]']} + >>> result['matched_values'][0] + 'root[2][1]' + >>> path = result['matched_values'][0] + >>> extract(obj, path) + 5 + + + .. note:: + Note that even if DeepDiff tried gives you a path to an item in a set, + there is no such thing in Python and hence you will get an error trying + to extract that item from a set. + If you want to be able to get items from sets, use the SetOrdered module + to generate the sets. + In fact Deepdiff uses SetOrdered as a dependency. + + >>> from deepdiff import grep, extract + >>> obj = {"a", "b"} + >>> obj | grep("b") + Set item detected in the path.'set' objects do NOT support indexing. But DeepSearch will still report a path. + {'matched_values': SetOrdered(['root[0]'])} + >>> extract(obj, 'root[0]') + Traceback (most recent call last): + File "", line 1, in + File "deepdiff/deepdiff/path.py", line 126, in extract + return _get_nested_obj(obj, elements) + File "deepdiff/deepdiff/path.py", line 84, in _get_nested_obj + obj = obj[elem] + TypeError: 'set' object is not subscriptable + >>> from orderly_set import SetOrdered + >>> obj = SetOrdered(["a", "b"]) + >>> extract(obj, 'root[0]') + 'a' - def _add_hash(self, hashes, item_hash, item, i): - if item_hash in hashes: - hashes[item_hash].indexes.append(i) - else: - hashes[item_hash] = IndexedHash(indexes=[i], item=item) + """ + elements = _path_to_elements(path, root_element=None) + return _get_nested_obj(obj, elements) - def _create_hashtable(self, level, t): - """Create hashtable of {item_hash: (indexes, item)}""" - obj = getattr(level, t) - local_hashes = dict_() - for (i, item) in enumerate(obj): - try: - parent = "{}[{}]".format(level.path(), i) - # Note: in the DeepDiff we only calculate the hash of items when we have to. - # So self.hashes does not include hashes of all objects in t1 and t2. - # It only includes the ones needed when comparing iterables. - # The self.hashes dictionary gets shared between different runs of DeepHash - # So that any object that is already calculated to have a hash is not re-calculated. - deep_hash = DeepHash( - item, - hashes=self.hashes, - parent=parent, - apply_hash=True, - **self.deephash_parameters, - ) - except UnicodeDecodeError as err: - err.reason = f"Can not produce a hash for {level.path()}: {err.reason}" - raise - except NotImplementedError: - raise - # except Exception as e: # pragma: no cover - # logger.error("Can not produce a hash for %s." - # "Not counting this object.\n %s" % - # (level.path(), e)) - else: - try: - item_hash = deep_hash[item] - except KeyError: - pass - else: - if item_hash is unprocessed: # pragma: no cover - self.log_err("Item %s was not processed while hashing " - "thus not counting this object." % - level.path()) - else: - self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i) - - # Also we hash the iterables themselves too so that we can later create cache keys from those hashes. - DeepHash( - obj, - hashes=self.hashes, - parent=level.path(), - apply_hash=True, - **self.deephash_parameters, - ) - return local_hashes +def parse_path(path, root_element=DEFAULT_FIRST_ELEMENT, include_actions=False): + """ + Parse a path to a format that is machine readable - @staticmethod - @lru_cache(maxsize=2028) - def _get_distance_cache_key(added_hash, removed_hash): - key1, key2 = (added_hash, removed_hash) if added_hash > removed_hash else (removed_hash, added_hash) - if isinstance(key1, int): - # If the hash function produces integers we convert them to hex values. - # This was used when the default hash function was Murmur3 128bit which produces integers. - key1 = hex(key1).encode('utf-8') - key2 = hex(key2).encode('utf-8') - elif isinstance(key1, str): - key1 = key1.encode('utf-8') - key2 = key2.encode('utf-8') - return key1 + b'--' + key2 + b'dc' - - def _get_rough_distance_of_hashed_objs( - self, added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type=None): - # We need the rough distance between the 2 objects to see if they qualify to be pairs or not - _distance = cache_key = None - if self._stats[DISTANCE_CACHE_ENABLED]: - cache_key = self._get_distance_cache_key(added_hash, removed_hash) - if cache_key in self._distance_cache: - self._stats[DISTANCE_CACHE_HIT_COUNT] += 1 - _distance = self._distance_cache.get(cache_key) - if _distance is None: - # We can only cache the rough distance and not the actual diff result for reuse. - # The reason is that we have modified the parameters explicitly so they are different and can't - # be used for diff reporting - diff = DeepDiff( - removed_hash_obj.item, added_hash_obj.item, - _parameters=self._parameters, - _shared_parameters=self._shared_parameters, - view=DELTA_VIEW, - _original_type=_original_type, - iterable_compare_func=self.iterable_compare_func, - ) - _distance = diff._get_rough_distance() - if cache_key and self._stats[DISTANCE_CACHE_ENABLED]: - self._distance_cache.set(cache_key, value=_distance) - return _distance - - def _get_most_in_common_pairs_in_iterables( - self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type): - """ - Get the closest pairs between items that are removed and items that are added. + **Parameters** - returns a dictionary of hashes that are closest to each other. - The dictionary is going to be symmetrical so any key will be a value too and otherwise. + path : A string + The path string such as "root[1][2]['age']" - Note that due to the current reporting structure in DeepDiff, we don't compare an item that - was added to an item that is in both t1 and t2. + root_element: string, default='root' + What the root is called in the path. - For example + include_actions: boolean, default=False + If True, we return the action required to retrieve the item at each element of the path. - [{1, 2}, {4, 5, 6}] - [{1, 2}, {1, 2, 3}] + **Examples** - is only compared between {4, 5, 6} and {1, 2, 3} even though technically {1, 2, 3} is - just one item different than {1, 2} + >>> from deepdiff import parse_path + >>> parse_path("root[1][2]['age']") + [1, 2, 'age'] + >>> parse_path("root[1][2]['age']", include_actions=True) + [{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}] + >>> + >>> parse_path("root['joe'].age") + ['joe', 'age'] + >>> parse_path("root['joe'].age", include_actions=True) + [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}] - Perhaps in future we can have a report key that is item duplicated and modified instead of just added. - """ - cache_key = None - if self._stats[DISTANCE_CACHE_ENABLED]: - cache_key = combine_hashes_lists(items=[hashes_added, hashes_removed], prefix='pairs_cache') - if cache_key in self._distance_cache: - return self._distance_cache.get(cache_key).copy() - - # A dictionary of hashes to distances and each distance to an ordered set of hashes. - # It tells us about the distance of each object from other objects. - # And the objects with the same distances are grouped together in an ordered set. - # It also includes a "max" key that is just the value of the biggest current distance in the - # most_in_common_pairs dictionary. - def defaultdict_orderedset(): - return defaultdict(SetOrdered) - most_in_common_pairs = defaultdict(defaultdict_orderedset) - pairs = dict_() - - pre_calced_distances = None - if hashes_added and hashes_removed and np and len(hashes_added) > 1 and len(hashes_removed) > 1: - # pre-calculates distances ONLY for 1D arrays whether an _original_type - # was explicitly passed or a homogeneous array is detected. - # Numpy is needed for this optimization. - pre_calced_distances = self._precalculate_numpy_arrays_distance( - hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) - - if hashes_added and hashes_removed \ - and self.iterable_compare_func \ - and len(hashes_added) > 0 and len(hashes_removed) > 0: - pre_calced_distances = self._precalculate_distance_by_custom_compare_func( - hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) - - for added_hash in hashes_added: - for removed_hash in hashes_removed: - added_hash_obj = t2_hashtable[added_hash] - removed_hash_obj = t1_hashtable[removed_hash] - - # Loop is detected - if id(removed_hash_obj.item) in parents_ids: - continue - - _distance = None - if pre_calced_distances: - _distance = pre_calced_distances.get("{}--{}".format(added_hash, removed_hash)) - if _distance is None: - _distance = self._get_rough_distance_of_hashed_objs( - added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type) - # Left for future debugging - # print(f'{Fore.RED}distance of {added_hash_obj.item} and {removed_hash_obj.item}: {_distance}{Style.RESET_ALL}') - # Discard potential pairs that are too far. - if _distance >= self.cutoff_distance_for_pairs: - continue - pairs_of_item = most_in_common_pairs[added_hash] - pairs_of_item[_distance].add(removed_hash) - used_to_hashes = set() - - distances_to_from_hashes = defaultdict(SetOrdered) - for from_hash, distances_to_to_hashes in most_in_common_pairs.items(): - # del distances_to_to_hashes['max'] - for dist in distances_to_to_hashes: - distances_to_from_hashes[dist].add(from_hash) - - for dist in sorted(distances_to_from_hashes.keys()): - from_hashes = distances_to_from_hashes[dist] - while from_hashes: - from_hash = from_hashes.pop() - if from_hash not in used_to_hashes: - to_hashes = most_in_common_pairs[from_hash][dist] - while to_hashes: - to_hash = to_hashes.pop() - if to_hash not in used_to_hashes: - used_to_hashes.add(from_hash) - used_to_hashes.add(to_hash) - # Left for future debugging: - # print(f'{bcolors.FAIL}Adding {t2_hashtable[from_hash].item} as a pairs of {t1_hashtable[to_hash].item} with distance of {dist}{bcolors.ENDC}') - pairs[from_hash] = to_hash - - inverse_pairs = {v: k for k, v in pairs.items()} - pairs.update(inverse_pairs) - if cache_key and self._stats[DISTANCE_CACHE_ENABLED]: - self._distance_cache.set(cache_key, value=pairs) - return pairs.copy() - - def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None, local_tree=None): - """Diff of hashable or unhashable iterables. Only used when ignoring the order.""" - - full_t1_hashtable = self._create_hashtable(level, 't1') - full_t2_hashtable = self._create_hashtable(level, 't2') - t1_hashes = SetOrdered(full_t1_hashtable.keys()) - t2_hashes = SetOrdered(full_t2_hashtable.keys()) - hashes_added = t2_hashes - t1_hashes - hashes_removed = t1_hashes - t2_hashes - - # Deciding whether to calculate pairs or not. - if (len(hashes_added) + len(hashes_removed)) / (len(full_t1_hashtable) + len(full_t2_hashtable) + 1) > self.cutoff_intersection_for_pairs: - get_pairs = False - else: - get_pairs = True + """ - # reduce the size of hashtables - if self.report_repetition: - t1_hashtable = full_t1_hashtable - t2_hashtable = full_t2_hashtable - else: - t1_hashtable = {k: v for k, v in full_t1_hashtable.items() if k in hashes_removed} - t2_hashtable = {k: v for k, v in full_t2_hashtable.items() if k in hashes_added} - if self._stats[PASSES_COUNT] < self.max_passes and get_pairs: - self._stats[PASSES_COUNT] += 1 - pairs = self._get_most_in_common_pairs_in_iterables( - hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type) - elif get_pairs: - if not self._stats[MAX_PASS_LIMIT_REACHED]: - self._stats[MAX_PASS_LIMIT_REACHED] = True - logger.warning(MAX_PASSES_REACHED_MSG.format(self.max_passes)) - pairs = dict_() - else: - pairs = dict_() - - def get_other_pair(hash_value, in_t1=True): - """ - Gets the other paired indexed hash item to the hash_value in the pairs dictionary - in_t1: are we looking for the other pair in t1 or t2? - """ - if in_t1: - hashtable = t1_hashtable - the_other_hashes = hashes_removed - else: - hashtable = t2_hashtable - the_other_hashes = hashes_added - other = pairs.pop(hash_value, notpresent) - if other is notpresent: - other = notpresent_indexed - else: - # The pairs are symmetrical. - # removing the other direction of pair - # so it does not get used. - del pairs[other] - the_other_hashes.remove(other) - other = hashtable[other] - return other - - if self.report_repetition: - for hash_value in hashes_added: - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition (when report_repetition=False). - other = get_other_pair(hash_value) - item_id = id(other.item) - indexes = t2_hashtable[hash_value].indexes if other.item is notpresent else other.indexes - # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. - # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). - # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. - if len(t2_hashtable[hash_value].indexes) == 1: - index2 = t2_hashtable[hash_value].indexes[0] - else: - index2 = None - for i in indexes: - change_level = level.branch_deeper( - other.item, - t2_hashtable[hash_value].item, - child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=i, - child_relationship_param2=index2, - ) - if other.item is notpresent: - self._report_result('iterable_item_added', change_level, local_tree=local_tree) - else: - parents_ids_added = add_to_frozen_set(parents_ids, item_id) - self._diff(change_level, parents_ids_added, local_tree=local_tree) - for hash_value in hashes_removed: - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - other = get_other_pair(hash_value, in_t1=False) - item_id = id(other.item) - # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. - # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). - # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. - if other.item is notpresent or len(other.indexes > 1): - index2 = None - else: - index2 = other.indexes[0] - for i in t1_hashtable[hash_value].indexes: - change_level = level.branch_deeper( - t1_hashtable[hash_value].item, - other.item, - child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=i, - child_relationship_param2=index2, - ) - if other.item is notpresent: - self._report_result('iterable_item_removed', change_level, local_tree=local_tree) - else: - # I was not able to make a test case for the following 2 lines since the cases end up - # getting resolved above in the hashes_added calcs. However I am leaving these 2 lines - # in case things change in future. - parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. - self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. - - items_intersect = t2_hashes.intersection(t1_hashes) - - for hash_value in items_intersect: - t1_indexes = t1_hashtable[hash_value].indexes - t2_indexes = t2_hashtable[hash_value].indexes - t1_indexes_len = len(t1_indexes) - t2_indexes_len = len(t2_indexes) - if t1_indexes_len != t2_indexes_len: # this is a repetition change! - # create "change" entry, keep current level untouched to handle further changes - repetition_change_level = level.branch_deeper( - t1_hashtable[hash_value].item, - t2_hashtable[hash_value].item, # nb: those are equal! - child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=t1_hashtable[hash_value] - .indexes[0]) - repetition_change_level.additional['repetition'] = RemapDict( - old_repeat=t1_indexes_len, - new_repeat=t2_indexes_len, - old_indexes=t1_indexes, - new_indexes=t2_indexes) - self._report_result('repetition_change', - repetition_change_level, local_tree=local_tree) + result = _path_to_elements(path, root_element=root_element) + result = iter(result) + if root_element: + next(result) # We don't want the root item + if include_actions is False: + return [i[0] for i in result] + return [{'element': i[0], 'action': i[1]} for i in result] + + +def stringify_element(param, quote_str=None): + has_quote = "'" in param + has_double_quote = '"' in param + if has_quote and has_double_quote and not quote_str: + new_param = [] + for char in param: + if char in {'"', "'"}: + new_param.append('𝆺𝅥𝅯') + new_param.append(char) + result = '"' + ''.join(new_param) + '"' + elif has_quote: + result = f'"{param}"' + elif has_double_quote: + result = f"'{param}'" + else: + result = param if quote_str is None else quote_str.format(param) + return result + + +def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"): + """ + Gets the path as an string. + For example [1, 2, 'age'] should become + root[1][2]['age'] + """ + if not path: + return root_element[0] + result = [root_element[0]] + has_actions = False + try: + if path[0][1] in {GET, GETATTR}: + has_actions = True + except (KeyError, IndexError, TypeError): + pass + if not has_actions: + path = [(i, GET) for i in path] + path[0] = (path[0][0], root_element[1]) # The action for the first element might be a GET or GETATTR. We update the action based on the root_element. + for element, action in path: + if isinstance(element, str) and action == GET: + element = stringify_element(element, quote_str) + if action == GET: + result.append(f"[{element}]") else: - for hash_value in hashes_added: - if self._count_diff() is StopIteration: - return - other = get_other_pair(hash_value) - item_id = id(other.item) - index = t2_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0] - index2 = t2_hashtable[hash_value].indexes[0] - change_level = level.branch_deeper( - other.item, - t2_hashtable[hash_value].item, - child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=index, - child_relationship_param2=index2, - ) - if other.item is notpresent: - self._report_result('iterable_item_added', change_level, local_tree=local_tree) - else: - parents_ids_added = add_to_frozen_set(parents_ids, item_id) - self._diff(change_level, parents_ids_added, local_tree=local_tree) - - for hash_value in hashes_removed: - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - other = get_other_pair(hash_value, in_t1=False) - item_id = id(other.item) - index = t1_hashtable[hash_value].indexes[0] - index2 = t1_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0] - change_level = level.branch_deeper( - t1_hashtable[hash_value].item, - other.item, - child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=index, - child_relationship_param2=index2, - ) - if other.item is notpresent: - self._report_result('iterable_item_removed', change_level, local_tree=local_tree) - else: - # Just like the case when report_repetition = True, these lines never run currently. - # However they will stay here in case things change in future. - parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. - self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. - - def _diff_booleans(self, level, local_tree=None): - if level.t1 != level.t2: - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_numbers(self, level, local_tree=None, report_type_change=True): - """Diff Numbers""" - if report_type_change: - t1_type = "number" if self.ignore_numeric_type_changes else level.t1.__class__.__name__ - t2_type = "number" if self.ignore_numeric_type_changes else level.t2.__class__.__name__ - else: - t1_type = t2_type = '' - - if self.use_log_scale: - if not logarithmic_similarity(level.t1, level.t2, threshold=self.log_scale_similarity_threshold): - self._report_result('values_changed', level, local_tree=local_tree) - elif self.math_epsilon is not None: - if not is_close(level.t1, level.t2, abs_tol=self.math_epsilon): - self._report_result('values_changed', level, local_tree=local_tree) - elif self.significant_digits is None: - if level.t1 != level.t2: - self._report_result('values_changed', level, local_tree=local_tree) - else: - # Bernhard10: I use string formatting for comparison, to be consistent with usecases where - # data is read from files that were previously written from python and - # to be consistent with on-screen representation of numbers. - # Other options would be abs(t1-t2)<10**-self.significant_digits - # or math.is_close (python3.5+) - # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001 - # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 - # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number) - t1_s = self.number_to_string(level.t1, - significant_digits=self.significant_digits, - number_format_notation=self.number_format_notation) # type: ignore - t2_s = self.number_to_string(level.t2, - significant_digits=self.significant_digits, - number_format_notation=self.number_format_notation) # type: ignore - - t1_s = KEY_TO_VAL_STR.format(t1_type, t1_s) - t2_s = KEY_TO_VAL_STR.format(t2_type, t2_s) - if t1_s != t2_s: - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_ipranges(self, level, local_tree=None): - """Diff IP ranges""" - if str(level.t1) != str(level.t2): - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_datetime(self, level, local_tree=None): - """Diff DateTimes""" - level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone) - level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone) - - if level.t1 != level.t2: - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_time(self, level, local_tree=None): - """Diff DateTimes""" - if self.truncate_datetime: - level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone) - level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone) - - if level.t1 != level.t2: - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_uuids(self, level, local_tree=None): - """Diff UUIDs""" - if level.t1.int != level.t2.int: - self._report_result('values_changed', level, local_tree=local_tree) - - def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None): - """Diff numpy arrays""" - if level.path() not in self._numpy_paths: - self._numpy_paths[level.path()] = get_type(level.t2).__name__ - if np is None: - # This line should never be run. If it is ever called means the type check detected a numpy array - # which means numpy module needs to be available. So np can't be None. - raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover - - if (self.ignore_order_func and not self.ignore_order_func(level)) or not self.ignore_order: - # fast checks - if self.significant_digits is None: - if np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality): - return # all good - else: - try: - np.testing.assert_almost_equal(level.t1, level.t2, decimal=self.significant_digits) - except TypeError: - np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality) - except AssertionError: - pass # do detailed checking below - else: - return # all good - - # compare array meta-data - _original_type = level.t1.dtype - if level.t1.shape != level.t2.shape: - # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier. - # They will be converted back to Numpy at their final dimension. - level.t1 = level.t1.tolist() - level.t2 = level.t2.tolist() - self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) - else: - # metadata same -- the difference is in the content - shape = level.t1.shape - dimensions = len(shape) - if dimensions == 1: - self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) - elif (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order: - # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier. - # They will be converted back to Numpy at their final dimension. - level.t1 = level.t1.tolist() - level.t2 = level.t2.tolist() - self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree) - else: - for (t1_path, t1_row), (t2_path, t2_row) in zip( - get_numpy_ndarray_rows(level.t1, shape), - get_numpy_ndarray_rows(level.t2, shape)): - - new_level = level.branch_deeper( - t1_row, - t2_row, - child_relationship_class=NumpyArrayRelationship, - child_relationship_param=t1_path, - child_relationship_param2=t2_path, - ) - - self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type, local_tree=local_tree) - - def _diff_types(self, level, local_tree=None): - """Diff types""" - level.report_type = 'type_changes' - self._report_result('type_changes', level, local_tree=local_tree) - - def _count_diff(self): - if (self.max_diffs is not None and self._stats[DIFF_COUNT] > self.max_diffs): - if not self._stats[MAX_DIFF_LIMIT_REACHED]: - self._stats[MAX_DIFF_LIMIT_REACHED] = True - logger.warning(MAX_DIFFS_REACHED_MSG.format(self.max_diffs)) - return StopIteration - self._stats[DIFF_COUNT] += 1 - if self.cache_size and self.cache_tuning_sample_size: - self._auto_tune_cache() - - def _auto_tune_cache(self): - take_sample = (self._stats[DIFF_COUNT] % self.cache_tuning_sample_size == 0) - if self.cache_tuning_sample_size: - if self._stats[DISTANCE_CACHE_ENABLED]: - if take_sample: - self._auto_off_cache() - # Turn on the cache once in a while - elif self._stats[DIFF_COUNT] % self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] == 0: - self.progress_logger('Re-enabling the distance and level caches.') - # decreasing the sampling frequency - self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] *= 10 - self._stats[DISTANCE_CACHE_ENABLED] = True - if take_sample: - for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT): - self._stats[key] = self._stats[key[9:]] - - def _auto_off_cache(self): - """ - Auto adjust the cache based on the usage - """ - if self._stats[DISTANCE_CACHE_ENABLED]: - angle = (self._stats[DISTANCE_CACHE_HIT_COUNT] - self._stats['PREVIOUS {}'.format(DISTANCE_CACHE_HIT_COUNT)]) / (self._stats[DIFF_COUNT] - self._stats[PREVIOUS_DIFF_COUNT]) - if angle < self.CACHE_AUTO_ADJUST_THRESHOLD: - self._stats[DISTANCE_CACHE_ENABLED] = False - self.progress_logger('Due to minimal cache hits, {} is disabled.'.format('distance cache')) - - def _use_custom_operator(self, level): - """ - For each level we check all custom operators. - If any one of them was a match for the level, we run the diff of the operator. - If the operator returned True, the operator must have decided these objects should not - be compared anymore. It might have already reported their results. - In that case the report will appear in the final results of this diff. - Otherwise basically the 2 objects in the level are being omitted from the results. - """ + result.append(f".{element}") + return ''.join(result) - for operator in self.custom_operators: - if operator.match(level): - prevent_default = operator.give_up_diffing(level=level, diff_instance=self) - if prevent_default: - return True - return False +# Regex to detect wildcard segments in a raw path string. +# Matches [*], [**], .*, .** that are NOT inside quotes. +_WILDCARD_RE = re.compile( + r'\[\*\*?\]' # [*] or [**] + r'|\.\*\*?(?=[.\[]|$)' # .* or .** followed by . or [ or end of string +) - def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None): - """ - The main diff method - **parameters** +def path_has_wildcard(path): + """Check if a path string contains wildcard segments (* or **).""" + return bool(_WILDCARD_RE.search(path)) - level: the tree level or tree node - parents_ids: the ids of all the parent objects in the tree from the current node. - _original_type: If the objects had an original type that was different than what currently exists in the level.t1 and t2 - """ - if self._count_diff() is StopIteration: - return - if self._use_custom_operator(level): - return +class GlobPathMatcher: + """Pre-compiled matcher for a single glob pattern path. - if level.t1 is level.t2: - return + Parses a pattern like ``root['users'][*]['password']`` into segments + and matches concrete path strings against it. - if self._skip_this(level): - return + ``*`` matches exactly one path segment (any key, index, or attribute). + ``**`` matches zero or more path segments. + """ - report_type_change = True - if get_type(level.t1) != get_type(level.t2): - for type_group in self.ignore_type_in_groups: - if self.type_check_func(level.t1, type_group) and self.type_check_func(level.t2, type_group): - report_type_change = False - break - if self.use_enum_value and isinstance(level.t1, Enum): - level.t1 = level.t1.value - report_type_change = False - if self.use_enum_value and isinstance(level.t2, Enum): - level.t2 = level.t2.value - report_type_change = False - if report_type_change: - self._diff_types(level, local_tree=local_tree) - return - # This is an edge case where t1=None or t2=None and None is in the ignore type group. - if level.t1 is None or level.t2 is None: - self._report_result('values_changed', level, local_tree=local_tree) - return - - if self.ignore_nan_inequality and isinstance(level.t1, (float, np_floating)) and str(level.t1) == str(level.t2) == 'nan': - return + def __init__(self, pattern_path): + self.original_pattern = pattern_path + elements = _path_to_elements(pattern_path, root_element=('root', GETATTR)) + # Skip the root element for matching + self._pattern = elements[1:] + + def match(self, path_string): + """Return True if *path_string* matches this pattern exactly.""" + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + return self._match_segments(self._pattern, target, 0, 0) + + def match_or_is_ancestor(self, path_string): + """Return True if *path_string* matches OR is an ancestor of a potential match. + + This is needed for ``include_paths``: we must not prune a path that + could lead to a matching descendant. + """ + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + return (self._match_segments(self._pattern, target, 0, 0) or + self._could_match_descendant(self._pattern, target, 0, 0)) + + def match_or_is_descendant(self, path_string): + """Return True if *path_string* matches OR is a descendant of a matching path. + + This checks whether the pattern matches any prefix of *path_string*, + meaning the path is "inside" a matched subtree. + """ + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + # Check exact match first + if self._match_segments(self._pattern, target, 0, 0): + return True + # Check if any prefix of target matches (making this path a descendant) + for length in range(len(target)): + if self._match_segments(self._pattern, target[:length], 0, 0): + return True + return False - if isinstance(level.t1, booleans): - self._diff_booleans(level, local_tree=local_tree) - - elif isinstance(level.t1, strings): - # Special handling when comparing string with UUID and ignore_uuid_types is True - if self.ignore_uuid_types and isinstance(level.t2, uuids): - try: - # Convert string to UUID for comparison - t1_uuid = uuid.UUID(level.t1) - if t1_uuid.int != level.t2.int: - self._report_result('values_changed', level, local_tree=local_tree) - except (ValueError, AttributeError): - # If string is not a valid UUID, report as changed - self._report_result('values_changed', level, local_tree=local_tree) - else: - self._diff_str(level, local_tree=local_tree) - - elif isinstance(level.t1, datetime.datetime): - self._diff_datetime(level, local_tree=local_tree) - - elif isinstance(level.t1, ipranges): - self._diff_ipranges(level, local_tree=local_tree) - - elif isinstance(level.t1, (datetime.date, datetime.timedelta, datetime.time)): - self._diff_time(level, local_tree=local_tree) - - elif isinstance(level.t1, uuids): - # Special handling when comparing UUID with string and ignore_uuid_types is True - if self.ignore_uuid_types and isinstance(level.t2, str): - try: - # Convert string to UUID for comparison - t2_uuid = uuid.UUID(level.t2) - if level.t1.int != t2_uuid.int: - self._report_result('values_changed', level, local_tree=local_tree) - except (ValueError, AttributeError): - # If string is not a valid UUID, report as changed - self._report_result('values_changed', level, local_tree=local_tree) + @staticmethod + def _match_segments(pattern, target, pi, ti): + """Recursive segment matcher with backtracking for ``**``.""" + while pi < len(pattern) and ti < len(target): + pat_elem = pattern[pi][0] + + if pat_elem == MULTI_WILDCARD: + # ** matches zero or more segments — try every suffix + for k in range(ti, len(target) + 1): + if GlobPathMatcher._match_segments(pattern, target, pi + 1, k): + return True + return False + elif pat_elem == SINGLE_WILDCARD: + # * matches exactly one segment regardless of value/action + pi += 1 + ti += 1 else: - self._diff_uuids(level, local_tree=local_tree) - - elif isinstance(level.t1, numbers): - self._diff_numbers(level, local_tree=local_tree, report_type_change=report_type_change) - - elif isinstance(level.t1, Mapping): - self._diff_dict(level, parents_ids, local_tree=local_tree) - - elif isinstance(level.t1, tuple): - self._diff_tuple(level, parents_ids, local_tree=local_tree) - - elif isinstance(level.t1, (set, frozenset, SetOrdered)): - self._diff_set(level, local_tree=local_tree) + tgt_elem = target[ti][0] + if pat_elem != tgt_elem: + return False + pi += 1 + ti += 1 - elif isinstance(level.t1, np_ndarray): - self._diff_numpy_array(level, parents_ids, local_tree=local_tree) + # Consume any trailing ** (they can match zero segments) + while pi < len(pattern) and pattern[pi][0] == MULTI_WILDCARD: + pi += 1 - elif isinstance(level.t1, PydanticBaseModel): - self._diff_obj(level, parents_ids, local_tree=local_tree, is_pydantic_object=True) + return pi == len(pattern) and ti == len(target) - elif isinstance(level.t1, Iterable): - self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) + @staticmethod + def _could_match_descendant(pattern, target, pi, ti): + """Check if *target* is a prefix that could lead to a match deeper down.""" + if ti == len(target): + # Target exhausted — it's an ancestor if pattern has remaining segments + return pi < len(pattern) - elif isinstance(level.t1, Enum): - self._diff_enum(level, parents_ids, local_tree=local_tree) + if pi >= len(pattern): + return False - else: - self._diff_obj(level, parents_ids) + pat_elem = pattern[pi][0] - def _get_view_results(self, view, verbose_level=None): - """ - Get the results based on the view - """ - result = self.tree - if not self.report_repetition: # and self.is_root: - result.mutual_add_removes_to_become_value_changes() - if view == TREE_VIEW: - pass - elif view == TEXT_VIEW: - effective_verbose_level = verbose_level if verbose_level is not None else self.verbose_level - result = TextResult(tree_results=self.tree, verbose_level=effective_verbose_level) - result.remove_empty_keys() - elif view == DELTA_VIEW: - result = self._to_delta_dict(report_repetition_required=False) - elif view == COLORED_VIEW: - result = ColoredView(t2=self.t2, tree_result=self.tree, compact=False) - elif view == COLORED_COMPACT_VIEW: - result = ColoredView(t2=self.t2, tree_result=self.tree, compact=True) + if pat_elem == MULTI_WILDCARD: + return (GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti) or + GlobPathMatcher._could_match_descendant(pattern, target, pi, ti + 1)) + elif pat_elem == SINGLE_WILDCARD: + return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) else: - raise ValueError(INVALID_VIEW_MSG.format(view)) - return result - - @staticmethod - def _get_key_for_group_by(row, group_by, item_name): - """ - Get the key value to group a row by, using the specified group_by parameter. - - Example - >>> row = {'first': 'John', 'middle': 'Joe', 'last': 'Smith'} - >>> DeepDiff._get_key_for_group_by(row, 'first', 't1') - 'John' - >>> nested_row = {'id': 123, 'demographics': {'names': {'first': 'John', 'middle': 'Joe', 'last': 'Smith'}}} - >>> group_by = lambda x: x['demographics']['names']['first'] - >>> DeepDiff._get_key_for_group_by(nested_row, group_by, 't1') - 'John' - - Args: - row (dict): The dictionary (row) to extract the group by key from. - group_by (str or callable): The key name or function to call to get to the key value to group by. - item_name (str): The name of the item, used for error messages. - - Returns: - str: The key value to group by. - - Raises: - KeyError: If the specified key is not found in the row. - """ - try: - if callable(group_by): - return group_by(row) - return row.pop(group_by) - except KeyError: - logger.error("Unable to group {} by {}. The key is missing in {}".format(item_name, group_by, row)) - raise - - def _group_iterable_to_dict(self, item, group_by, item_name): - """ - Convert a list of dictionaries into a dictionary of dictionaries - where the key is the value of the group_by key in each dictionary. - """ - group_by_level2 = None - if isinstance(group_by, (list, tuple)): - group_by_level1 = group_by[0] - if len(group_by) > 1: - group_by_level2 = group_by[1] - else: - group_by_level1 = group_by - if isinstance(item, Iterable) and not isinstance(item, Mapping): - result = {} - item_copy = deepcopy(item) - for row in item_copy: - if isinstance(row, Mapping): - key1 = self._get_key_for_group_by(row, group_by_level1, item_name) - # Track keys created by group_by to avoid type prefixing later - if hasattr(self, 'group_by_keys'): - self.group_by_keys.add(key1) - if group_by_level2: - key2 = self._get_key_for_group_by(row, group_by_level2, item_name) - # Track level 2 keys as well - if hasattr(self, 'group_by_keys'): - self.group_by_keys.add(key2) - if key1 not in result: - result[key1] = {} - if self.group_by_sort_key: - if key2 not in result[key1]: - result[key1][key2] = [] - result_key1_key2 = result[key1][key2] - if row not in result_key1_key2: - result_key1_key2.append(row) - else: - result[key1][key2] = row - else: - if self.group_by_sort_key: - if key1 not in result: - result[key1] = [] - if row not in result[key1]: - result[key1].append(row) - else: - result[key1] = row - else: - msg = "Unable to group {} by {} since the item {} is not a dictionary.".format(item_name, group_by_level1, row) - logger.error(msg) - raise ValueError(msg) - if self.group_by_sort_key: - if group_by_level2: - for key1, row1 in result.items(): - for key2, row in row1.items(): - row.sort(key=self.group_by_sort_key) - else: - for key, row in result.items(): - row.sort(key=self.group_by_sort_key) - return result - msg = "Unable to group {} by {}".format(item_name, group_by) - logger.error(msg) - raise ValueError(msg) - - def get_stats(self): - """ - Get some stats on internals of the DeepDiff run. - """ - return self._stats + tgt_elem = target[ti][0] + if pat_elem != tgt_elem: + return False + return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) - @property - def affected_paths(self): - """ - Get the list of paths that were affected. - Whether a value was changed or they were added or removed. - - Example - >>> from pprint import pprint - >>> t1 = {1: 1, 2: 2, 3: [3], 4: 4} - >>> t2 = {1: 1, 2: 4, 3: [3, 4], 5: 5, 6: 6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=4) - { 'dictionary_item_added': ['root[5]', 'root[6]'], - 'dictionary_item_removed': ['root[4]'], - 'iterable_item_added': {'root[3][1]': 4}, - 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - >>> sorted(ddiff.affected_paths) - ['root[2]', 'root[3][1]', 'root[4]', 'root[5]', 'root[6]'] - >>> sorted(ddiff.affected_root_keys) - [2, 3, 4, 5, 6] - """ - result = SetOrdered() - for key in REPORT_KEYS: - value = self.get(key) - if value: - if isinstance(value, SetOrdered): - result |= value - else: - result |= SetOrdered(value.keys()) - return result +def compile_glob_paths(paths): + """Compile a list of glob pattern strings into GlobPathMatcher objects. - @property - def affected_root_keys(self): - """ - Get the list of root keys that were affected. - Whether a value was changed or they were added or removed. - - Example - >>> from pprint import pprint - >>> t1 = {1: 1, 2: 2, 3: [3], 4: 4} - >>> t2 = {1: 1, 2: 4, 3: [3, 4], 5: 5, 6: 6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=4) - { 'dictionary_item_added': ['root[5]', 'root[6]'], - 'dictionary_item_removed': ['root[4]'], - 'iterable_item_added': {'root[3][1]': 4}, - 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - >>> sorted(ddiff.affected_paths) - ['root[2]', 'root[3][1]', 'root[4]', 'root[5]', 'root[6]'] - >>> sorted(ddiff.affected_root_keys) - [2, 3, 4, 5, 6] - """ - result = SetOrdered() - for key in REPORT_KEYS: - value = self.tree.get(key) - if value: - if isinstance(value, SetOrdered): - values_list = value - else: - values_list = value.keys() - for item in values_list: - root_key = item.get_root_key() - if root_key is not notpresent: - result.add(root_key) - return result - - def __str__(self): - if hasattr(self, '_colored_view') and self.view in {COLORED_VIEW, COLORED_COMPACT_VIEW}: - return str(self._colored_view) - return super().__str__() - - -if __name__ == "__main__": # pragma: no cover - import doctest - doctest.testmod() + Returns a list of ``GlobPathMatcher`` or ``None`` if *paths* is empty/None. + """ + if not paths: + return None + return [GlobPathMatcher(p) for p in paths] diff --git a/deepdiff/helper.py b/deepdiff/helper.py index cb382afd..f7eeea24 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -352,6 +352,30 @@ def add_root_to_paths(paths: Optional[Iterable[str]]) -> Optional[SetOrdered]: return result +def separate_wildcard_and_exact_paths(paths): + """Separate a set of paths into exact paths and wildcard pattern paths. + + Returns ``(exact_set_or_none, wildcard_list_or_none)``. + Wildcard paths must start with ``root``; a ``ValueError`` is raised otherwise. + """ + if not paths: + return None, None + from deepdiff.path import path_has_wildcard, compile_glob_paths + exact = set() + wildcards = [] + for path in paths: + if path_has_wildcard(path): + if not path.startswith('root'): + raise ValueError( + "Wildcard paths must start with 'root'. Got: {}".format(path)) + wildcards.append(path) + else: + exact.add(path) + exact_result = exact if exact else None + glob_result = compile_glob_paths(wildcards) if wildcards else None + return exact_result, glob_result + + RE_COMPILED_TYPE = type(re.compile('')) diff --git a/deepdiff/path.py b/deepdiff/path.py index e5b64c70..2ac62b5e 100644 --- a/deepdiff/path.py +++ b/deepdiff/path.py @@ -1,3 +1,4 @@ +import re import logging from ast import literal_eval from functools import lru_cache @@ -8,6 +9,30 @@ GET = 'GET' +class _WildcardToken: + """Sentinel object for wildcard path tokens. + + Using a dedicated class (instead of plain strings) ensures that a literal + dict key ``'*'`` (parsed from ``root['*']``) is never confused with the + wildcard ``*`` (parsed from ``root[*]``). + """ + def __init__(self, symbol): + self._symbol = symbol + + def __repr__(self): + return self._symbol + + def __eq__(self, other): + return isinstance(other, _WildcardToken) and self._symbol == other._symbol + + def __hash__(self): + return hash(('_WildcardToken', self._symbol)) + + +SINGLE_WILDCARD = _WildcardToken('*') +MULTI_WILDCARD = _WildcardToken('**') + + class PathExtractionError(ValueError): pass @@ -21,6 +46,16 @@ def _add_to_elements(elements, elem, inside): if not elem: return if not elem.startswith('__'): + # Handle wildcard tokens (* and **) as-is. + # Unquoted root[*] arrives as bare '*' which matches the string check. + # Quoted root['*'] arrives as "'*'" which does NOT match, so it falls + # through to literal_eval and becomes the plain string '*' — which is + # distinct from the _WildcardToken sentinel and thus treated as a + # literal dict key. + if elem in ('*', '**'): + action = GETATTR if inside == '.' else GET + elements.append((SINGLE_WILDCARD if elem == '*' else MULTI_WILDCARD, action)) + return remove_quotes = False if '𝆺𝅥𝅯' in elem or '\\' in elem: remove_quotes = True @@ -321,3 +356,129 @@ def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"): else: result.append(f".{element}") return ''.join(result) + + +# Regex to detect wildcard segments in a raw path string. +# Matches [*], [**], .*, .** that are NOT inside quotes. +_WILDCARD_RE = re.compile( + r'\[\*\*?\]' # [*] or [**] + r'|\.\*\*?(?=[.\[]|$)' # .* or .** followed by . or [ or end of string +) + + +def path_has_wildcard(path): + """Check if a path string contains wildcard segments (* or **).""" + return bool(_WILDCARD_RE.search(path)) + + +class GlobPathMatcher: + """Pre-compiled matcher for a single glob pattern path. + + Parses a pattern like ``root['users'][*]['password']`` into segments + and matches concrete path strings against it. + + ``*`` matches exactly one path segment (any key, index, or attribute). + ``**`` matches zero or more path segments. + """ + + def __init__(self, pattern_path): + self.original_pattern = pattern_path + elements = _path_to_elements(pattern_path, root_element=('root', GETATTR)) + # Skip the root element for matching + self._pattern = elements[1:] + + def match(self, path_string): + """Return True if *path_string* matches this pattern exactly.""" + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + return self._match_segments(self._pattern, target, 0, 0) + + def match_or_is_ancestor(self, path_string): + """Return True if *path_string* matches OR is an ancestor of a potential match. + + This is needed for ``include_paths``: we must not prune a path that + could lead to a matching descendant. + """ + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + return (self._match_segments(self._pattern, target, 0, 0) or + self._could_match_descendant(self._pattern, target, 0, 0)) + + def match_or_is_descendant(self, path_string): + """Return True if *path_string* matches OR is a descendant of a matching path. + + This checks whether the pattern matches any prefix of *path_string*, + meaning the path is "inside" a matched subtree. + """ + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + # Check exact match first + if self._match_segments(self._pattern, target, 0, 0): + return True + # Check if any prefix of target matches (making this path a descendant) + for length in range(len(target)): + if self._match_segments(self._pattern, target[:length], 0, 0): + return True + return False + + @staticmethod + def _match_segments(pattern, target, pi, ti): + """Recursive segment matcher with backtracking for ``**``.""" + while pi < len(pattern) and ti < len(target): + pat_elem = pattern[pi][0] + + if pat_elem == MULTI_WILDCARD: + # ** matches zero or more segments — try every suffix + for k in range(ti, len(target) + 1): + if GlobPathMatcher._match_segments(pattern, target, pi + 1, k): + return True + return False + elif pat_elem == SINGLE_WILDCARD: + # * matches exactly one segment regardless of value/action + pi += 1 + ti += 1 + else: + tgt_elem = target[ti][0] + if pat_elem != tgt_elem: + return False + pi += 1 + ti += 1 + + # Consume any trailing ** (they can match zero segments) + while pi < len(pattern) and pattern[pi][0] == MULTI_WILDCARD: + pi += 1 + + return pi == len(pattern) and ti == len(target) + + @staticmethod + def _could_match_descendant(pattern, target, pi, ti): + """Check if *target* is a prefix that could lead to a match deeper down.""" + if ti == len(target): + # Target exhausted — it's an ancestor if pattern has remaining segments + return pi < len(pattern) + + if pi >= len(pattern): + return False + + pat_elem = pattern[pi][0] + + if pat_elem == MULTI_WILDCARD: + return (GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti) or + GlobPathMatcher._could_match_descendant(pattern, target, pi, ti + 1)) + elif pat_elem == SINGLE_WILDCARD: + return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + else: + tgt_elem = target[ti][0] + if pat_elem != tgt_elem: + return False + return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + + +def compile_glob_paths(paths): + """Compile a list of glob pattern strings into GlobPathMatcher objects. + + Returns a list of ``GlobPathMatcher`` or ``None`` if *paths* is empty/None. + """ + if not paths: + return None + return [GlobPathMatcher(p) for p in paths] diff --git a/deepdiff/search.py b/deepdiff/search.py index fdb73d79..9b1b11a1 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -6,7 +6,8 @@ import logging from deepdiff.helper import ( - strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges + strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges, + separate_wildcard_and_exact_paths, ) @@ -106,7 +107,8 @@ def __init__(self, self.obj: Any = obj self.case_sensitive: bool = case_sensitive if isinstance(item, strings) else True item = item if self.case_sensitive else (item.lower() if isinstance(item, str) else item) - self.exclude_paths: SetOrdered = SetOrdered(exclude_paths) + _exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(set(exclude_paths) if exclude_paths else None) + self.exclude_paths: SetOrdered = SetOrdered(_exclude_exact) if _exclude_exact else SetOrdered() self.exclude_regex_paths: List[Pattern[str]] = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths] self.exclude_types: SetOrdered = SetOrdered(exclude_types) self.exclude_types_tuple: tuple[type, ...] = tuple( @@ -193,6 +195,8 @@ def __skip_this(self, item: Any, parent: str) -> bool: skip = False if parent in self.exclude_paths: skip = True + elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths): + skip = True elif self.exclude_regex_paths and any( [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): skip = True diff --git a/docs/deephash_doc.rst b/docs/deephash_doc.rst index da271b77..7039281f 100644 --- a/docs/deephash_doc.rst +++ b/docs/deephash_doc.rst @@ -32,10 +32,12 @@ exclude_types: list, default = None exclude_paths: list, default = None List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. include_paths: list, default = None List of the only paths to include in the report. If only one item, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. exclude_regex_paths: list, default = None diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst index e01dab29..1fc18db6 100644 --- a/docs/diff_doc.rst +++ b/docs/diff_doc.rst @@ -55,7 +55,8 @@ encodings: List, default = None exclude_paths: list, default = None :ref:`exclude_paths_label` - List of paths to exclude from the report. If only one item, you can path it as a string. + List of paths to exclude from the report. If only one item, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. exclude_regex_paths: list, default = None :ref:`exclude_regex_paths_label` @@ -77,6 +78,7 @@ exclude_obj_callback_strict: function, default = None include_paths: list, default = None :ref:`include_paths_label` List of the only paths to include in the report. If only one item is in the list, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. include_obj_callback: function, default = None :ref:`include_obj_callback_label` diff --git a/docs/exclude_paths.rst b/docs/exclude_paths.rst index 2de453ba..0c9b78a6 100644 --- a/docs/exclude_paths.rst +++ b/docs/exclude_paths.rst @@ -59,6 +59,49 @@ Example {'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}} +.. _wildcard_paths_label: + +Wildcard (Glob) Paths +--------------------- + +Both ``exclude_paths`` and ``include_paths`` support wildcard patterns for matching multiple paths at once: + +- ``[*]`` or ``.*`` matches exactly **one** path segment (any key, index, or attribute). +- ``[**]`` or ``.**`` matches **zero or more** path segments at any depth. + +Wildcard patterns must use the full ``root`` prefix (shorthand keys are not supported for wildcards). + +Exclude all ``password`` fields regardless of the parent key: + >>> t1 = {"users": {"alice": {"name": "Alice", "password": "s1"}, "bob": {"name": "Bob", "password": "s2"}}} + >>> t2 = {"users": {"alice": {"name": "Alice", "password": "x1"}, "bob": {"name": "Bob", "password": "x2"}}} + >>> DeepDiff(t1, t2, exclude_paths=["root['users'][*]['password']"]) + {} + +Include only ``name`` fields at any depth: + >>> t1 = {"a": {"name": "A", "secret": 1}, "b": {"name": "B", "secret": 2}} + >>> t2 = {"a": {"name": "X", "secret": 1}, "b": {"name": "Y", "secret": 2}} + >>> result = DeepDiff(t1, t2, include_paths=["root[*]['name']"]) + >>> set(result.get('values_changed', {}).keys()) == {"root['a']['name']", "root['b']['name']"} + True + +Use ``[**]`` to match at any depth: + >>> t1 = {"config": {"db": {"password": "old"}, "cache": {"password": "old"}}} + >>> t2 = {"config": {"db": {"password": "new"}, "cache": {"password": "new"}}} + >>> DeepDiff(t1, t2, exclude_paths=["root[**]['password']"]) + {} + +Literal keys named ``*`` or ``**`` are not treated as wildcards when quoted: + >>> t1 = {"*": 1, "a": 2} + >>> t2 = {"*": 10, "a": 20} + >>> result = DeepDiff(t1, t2, exclude_paths=["root['*']"]) + >>> "root['a']" in result.get('values_changed', {}) + True + +When both ``exclude_paths`` and ``include_paths`` apply to the same path, exclusion takes precedence. + +Wildcards also work with ``DeepHash`` and ``DeepSearch`` exclude_paths. + + .. _exclude_regex_paths_label: Exclude Regex Paths diff --git a/docs/search_doc.rst b/docs/search_doc.rst index 0b268735..7039281f 100644 --- a/docs/search_doc.rst +++ b/docs/search_doc.rst @@ -1,74 +1,388 @@ :orphan: -grep is a more user friendly interface for DeepSearch. It takes exactly the same arguments as DeepSearch except that you pipe the object into it instead of passing it as a parameter. +**DeepHash** -It works just like grep in linux shell! +DeepHash calculates the hash of objects based on their contents in a deterministic way. +This way 2 objects with the same content should have the same hash. + +The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. +For example you can use DeepHash to calculate the hash of a set or a dictionary! + +At the core of it, DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses SHA256. You have the option to pass any other hashing function to be used instead. + +**Import** + >>> from deepdiff import DeepHash **Parameters** -item : The item to search for +obj : any object, The object to be hashed based on its content. + + +apply_hash: Boolean, default = True + DeepHash at its core is doing deterministic serialization of objects into strings. + Then it hashes the string. + The only time you want the apply_hash to be False is if you want to know what + the string representation of your object is BEFORE it gets hashed. + + +exclude_types: list, default = None + List of object types to exclude from hashing. + + +exclude_paths: list, default = None + List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. + + +include_paths: list, default = None + List of the only paths to include in the report. If only one item, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. + + +exclude_regex_paths: list, default = None + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one regex path. + + +exclude_obj_callback + function, default = None + A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. + This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. + + +encodings: List, default = None + Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out ignore_encoding_errors if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] + + +hashes: dictionary, default = empty dictionary + A dictionary of {object or object id: object hash} to start with. + Any object that is encountered and it is already in the hashes dictionary or its id is in the hashes dictionary, + will re-use the hash that is provided by this dictionary instead of re-calculating + its hash. This is typically used when you have a series of objects to be hashed and there might be repeats of the same object. + + +hasher: function. default = DeepHash.sha256hex + hasher is the hashing function. The default is DeepHash.sha256hex. + But you can pass another hash function to it if you want. + For example a cryptographic hash function or Python's builtin hash function. + All it needs is a function that takes the input in string format and returns the hash. + + You can use it by passing: hasher=hash for Python's builtin hash. + + The following alternative is already provided: + + - hasher=DeepHash.sha1hex + + Note that prior to DeepDiff 5.2, Murmur3 was the default hash function. + But Murmur3 is removed from DeepDiff dependencies since then. + + +ignore_repetition: Boolean, default = True + If repetitions in an iterable should cause the hash of iterable to be different. + Note that the deepdiff diffing functionality lets this to be the default at all times. + But if you are using DeepHash directly, you can set this parameter. + -verbose_level : int >= 0, default = 1. - Verbose level one shows the paths of found items. - Verbose level 2 shows the path and value of the found items. +ignore_type_in_groups + Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. + The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. -exclude_paths: list, default = None. - List of paths to exclude from the report. + For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: -exclude_types: list, default = None. - List of object types to exclude from the report. + 1. Set ignore_string_type_changes=True which is the default. + 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . -case_sensitive: Boolean, default = False + Now what if you want also typeA and typeB to be ignored when comparing agains each other? -match_string: Boolean, default = False - If True, the value of the object or its children have to exactly match the item. - If False, the value of the item can be a part of the value of the object or its children + 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] + 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] -use_regexp: Boolean, default = False +ignore_string_type_changes: Boolean, default = True + string type conversions should not affect the hash output when this is set to True. + For example "Hello" and b"Hello" should produce the same hash. -strict_checking: Boolean, default = True - If True, it will check the type of the object to match, so when searching for '1234', - it will NOT match the int 1234. Currently this only affects the numeric values searching. + By setting it to True, both the string and bytes of hello return the same hash. + + +ignore_numeric_type_changes: Boolean, default = False + numeric type conversions should not affect the hash output when this is set to True. + For example 10, 10.0 and Decimal(10) should produce the same hash. + When ignore_numeric_type_changes is set to True, all numbers are converted + to strings with the precision of significant_digits parameter and number_format_notation notation. + If no significant_digits is passed by the user, a default value of 12 is used. + + +ignore_type_subclasses + Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. + + +ignore_string_case + Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. + + +ignore_private_variables: Boolean, default = True + Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). + + +ignore_encoding_errors: Boolean, default = False + If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. + +ignore_iterable_order: Boolean, default = True + If order of items in an iterable should not cause the hash of the iterable to be different. + +number_format_notation : string, default="f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + + +significant_digits : int >= 0, default=None + By default the significant_digits compares only that many digits AFTER the decimal point. However you can set override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. + + Important: This will affect ANY number comparison when it is set. + + Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 12. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. + + Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. + + Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + + For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + + When you set the number_format_notation="e", we use "{:.Xe}".format(Your Number) where X=significant_digits. + +truncate_datetime: string, default = None + Can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it + + + +**Returns** + A dictionary of {item: item hash}. + If your object is nested, it will build hashes of all the objects it contains too. + + +.. note:: + DeepHash output is not like conventional hash functions. It is a dictionary of object IDs to their hashes. This happens because DeepHash calculates the hash of the object and any other objects found within the object in a recursive manner. If you only need the hash of the object you are passing, all you need to do is to do: + + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj)[obj] # doctest: +SKIP **Examples** -Importing - >>> from deepdiff import grep - >>> from pprint import pprint - -Search in list for string - >>> obj = ["long somewhere", "string", 0, "somewhere great!"] - >>> item = "somewhere" - >>> ds = obj | grep(item) - >>> print(ds) - {'matched_values': ['root[0]', 'root[3]']} - -Search in nested data for string - >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] - >>> item = "somewhere" - >>> ds = obj | grep(item, verbose_level=2) - >>> pprint(ds, indent=2) - { 'matched_paths': {"root[1]['somewhere']": 'around'}, - 'matched_values': { 'root[0]': 'something somewhere', - "root[1]['long']": 'somewhere'}} - -You can also use regular expressions - >>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] - >>> ds = obj | grep("some.*", use_regexp=True) - >>> pprint(ds, indent=2) - { 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], - 'matched_values': ['root[0]', "root[1]['long']"]} - - -Change strict_checking to False to match numbers in strings and vice versa: - >>> obj = {"long": "somewhere", "num": 1123456, 0: 0, "somewhere": "around"} - >>> item = "1234" - >>> result = {"matched_values": {"root['num']"}} - >>> ds = obj | grep(item, verbose_level=1, use_regexp=True) - >>> pprint(ds) - {} +Let's say you have a dictionary object. + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + +If you try to hash it: + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + +But with DeepHash: + + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) # doctest: +SKIP + + So what is exactly the hash of obj in this case? + DeepHash is calculating the hash of the obj and any other object that obj contains. + The output of DeepHash is a dictionary of object IDs to their hashes. + In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + + >>> hashes = DeepHash(obj) + >>> hashes[obj] + 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' + + Which you can write as: + + >>> hashes = DeepHash(obj)[obj] + + At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. + + If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. + + If you do a deep copy of the obj, it should still give you the same hash: + + >>> from copy import deepcopy + >>> obj2 = deepcopy(obj) + >>> DeepHash(obj2)[obj2] + 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' + + Note that by default DeepHash will include string type differences. So if your strings were bytes: + + >>> obj3 = {1: 2, b'a': b'b'} + >>> DeepHash(obj3)[obj3] + '71db3231177d49f78b52a356ca206e6179417b681604d00ed703a077049e3300' + + But if you want the same hash if string types are different, set ignore_string_type_changes to True: + + >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] + 'e60c2befb84be625037c75e1e26d0bfc85a0ffc1f3cde9500f68f6eac55e5ad6' + + ignore_numeric_type_changes is by default False too. + + >>> from decimal import Decimal + >>> obj1 = {4:10} + >>> obj2 = {4.0: Decimal(10.0)} + >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] + False + + But by setting it to True, we can get the same hash. + + >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] + True + +number_format_notation: String, default = "f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + + +ignore_string_type_changes: Boolean, default = True + By setting it to True, both the string and bytes of hello return the same hash. + + >>> DeepHash(b'hello', ignore_string_type_changes=True)[b'hello'] + '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' + >>> DeepHash('hello', ignore_string_type_changes=True)['hello'] + '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' + + +ignore_numeric_type_changes: Boolean, default = False + For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 + + That way they both produce the same hash. + + >>> t1 = {1: 1, 2: 2.22} + >>> DeepHash(t1)[1] + 'c1800a30c736483f13615542e7096f7973631fef8ca935ee1ed9f35fb06fd44e' + >>> DeepHash(t1, ignore_numeric_type_changes=True)[1] == DeepHash(t1, ignore_numeric_type_changes=True)[1.0] + True + + You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + + >>> from deepdiff import DeepDiff + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d1[burrito] == d2[taco] + True + + +ignore_type_subclasses + Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. + + >>> from deepdiff import DeepHash + >>> + >>> class ClassB: + ... def __init__(self, x): + ... self.x = x + ... def __repr__(self): + ... return "obj b" + ... + >>> + >>> class ClassC(ClassB): + ... def __repr__(self): + ... return "obj c" + ... + >>> obj_b = ClassB(1) + >>> obj_c = ClassC(1) + >>> + >>> # By default, subclasses are considered part of the type group. + ... # ignore_type_in_groups=[(ClassB, )] matches ClassC too since it's a subclass. + ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )]) + >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )]) + >>> hashes_b[obj_b] == hashes_c[obj_c] + True + >>> + >>> # With ignore_type_subclasses=True, only exact type matches count. + ... # ClassC no longer matches (ClassB, ) group, so hashes differ. + ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) + >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) + >>> hashes_b[obj_b] != hashes_c[obj_c] + True + +ignore_string_case + Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. + + >>> from deepdiff import DeepHash + >>> DeepHash('hello')['hello'] == DeepHash('heLLO')['heLLO'] + False + >>> DeepHash('hello', ignore_string_case=True)['hello'] == DeepHash('heLLO', ignore_string_case=True)['heLLO'] + True + +exclude_obj_callback + function, default = None + A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. + This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. + + >>> def exclude_obj_callback(obj, path): + ... return True if isinstance(obj, str) and obj in ('x', 'y') else False + ... + >>> dic1 = {"x": 1, "y": 2, "z": 3} + >>> t1 = [dic1] + >>> t1_hash = DeepHash(t1, exclude_obj_callback=exclude_obj_callback) + >>> + >>> dic2 = {"z": 3} + >>> t2 = [dic2] + >>> t2_hash = DeepHash(t2, exclude_obj_callback=exclude_obj_callback) + >>> + >>> t1_hash[t1] == t2_hash[t2] + True + +number_format_notation : string, default="f" + When numbers are converted to the string, you have the choices between "f" as fixed point and "e" as scientific notation: + + >>> t1=10002 + >>> t2=10004 + >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="f") + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="f") + >>> + >>> t1_hash[t1] == t2_hash[t2] + False + >>> + >>> + >>> # Now we use the scientific notation + ... t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e") + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e") >>> - >>> ds = obj | grep(item, verbose_level=1, use_regexp=True, strict_checking=False) - >>> pprint(ds) - {'matched_values': ["root['num']"]} + >>> t1_hash[t1] == t2_hash[t2] + True + +Defining your own number_to_string_func + Lets say you want the hash of numbers below 100 to be the same for some reason. + + >>> from deepdiff import DeepHash + >>> from deepdiff.helper import number_to_string + >>> def custom_number_to_string(number, *args, **kwargs): + ... number = 100 if number < 100 else number + ... return number_to_string(number, *args, **kwargs) + ... + >>> t1 = [10, 12, 100000] + >>> t2 = [50, 63, 100021] + >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) + >>> t1_hash[t1] == t2_hash[t2] + True + + So both lists produced the same hash thanks to the low significant digits for 100000 vs 100021 and also the custom_number_to_string that converted all numbers below 100 to be 100! diff --git a/tests/test_glob_paths.py b/tests/test_glob_paths.py new file mode 100644 index 00000000..d97ead2b --- /dev/null +++ b/tests/test_glob_paths.py @@ -0,0 +1,719 @@ +import pytest +from deepdiff import DeepDiff, DeepHash, DeepSearch, grep +from deepdiff.path import ( + GlobPathMatcher, compile_glob_paths, path_has_wildcard, + _path_to_elements, SINGLE_WILDCARD, MULTI_WILDCARD, +) +from deepdiff.helper import separate_wildcard_and_exact_paths + + +# ── path_has_wildcard detection ────────────────────────────────────── + + +class TestPathHasWildcard: + + @pytest.mark.parametrize("path, expected", [ + ("root[*]", True), + ("root[**]", True), + ("root.*", True), + ("root.**", True), + ("root['users'][*]['name']", True), + ("root[**]['password']", True), + ("root['*']", False), # literal key named '*' + ("root['**']", False), # literal key named '**' + ("root['foo']['bar']", False), + ("root[0][1]", False), + ("root.foo.bar", False), + ("root[*][*]", True), # multiple wildcards + ("root[**][**]", True), + ("root.*.bar.*", True), # multiple dot wildcards + ]) + def test_detection(self, path, expected): + assert path_has_wildcard(path) is expected + + +# ── _path_to_elements parsing of wildcards ─────────────────────────── + + +class TestWildcardParsing: + + @pytest.mark.parametrize("path, expected", [ + ("root[*]", (('root', 'GETATTR'), (SINGLE_WILDCARD, 'GET'))), + ("root[**]", (('root', 'GETATTR'), (MULTI_WILDCARD, 'GET'))), + ("root['users'][*]['password']", ( + ('root', 'GETATTR'), ('users', 'GET'), (SINGLE_WILDCARD, 'GET'), ('password', 'GET'), + )), + ("root[**]['secret']", ( + ('root', 'GETATTR'), (MULTI_WILDCARD, 'GET'), ('secret', 'GET'), + )), + ("root.*.name", ( + ('root', 'GETATTR'), (SINGLE_WILDCARD, 'GETATTR'), ('name', 'GETATTR'), + )), + ("root[*][*]", ( + ('root', 'GETATTR'), (SINGLE_WILDCARD, 'GET'), (SINGLE_WILDCARD, 'GET'), + )), + ]) + def test_parsing(self, path, expected): + assert _path_to_elements(path) == expected + + def test_literal_star_key_not_wildcard(self): + """root['*'] should parse as a literal string '*', not a wildcard token.""" + elems = _path_to_elements("root['*']") + # The element should be a plain string, not a _WildcardToken + assert elems[1][0] == '*' + assert elems[1][0] != SINGLE_WILDCARD + assert isinstance(elems[1][0], str) + + def test_literal_double_star_key_not_wildcard(self): + """root['**'] should parse as a literal string '**', not a wildcard token.""" + elems = _path_to_elements("root['**']") + assert elems[1][0] == '**' + assert elems[1][0] != MULTI_WILDCARD + assert isinstance(elems[1][0], str) + + def test_wildcard_token_repr(self): + """_WildcardToken repr should return the symbol string.""" + assert repr(SINGLE_WILDCARD) == '*' + assert repr(MULTI_WILDCARD) == '**' + + def test_wildcard_token_hash(self): + """_WildcardToken instances should be hashable and usable in sets/dicts.""" + s = {SINGLE_WILDCARD, MULTI_WILDCARD} + assert len(s) == 2 + assert SINGLE_WILDCARD in s + d = {SINGLE_WILDCARD: 'one', MULTI_WILDCARD: 'many'} + assert d[SINGLE_WILDCARD] == 'one' + + +# ── separate_wildcard_and_exact_paths ──────────────────────────────── + + +class TestSeparateWildcardPaths: + + def test_none_input(self): + exact, globs = separate_wildcard_and_exact_paths(None) + assert exact is None + assert globs is None + + def test_empty_input(self): + exact, globs = separate_wildcard_and_exact_paths(set()) + assert exact is None + assert globs is None + + def test_all_exact(self): + exact, globs = separate_wildcard_and_exact_paths({"root['foo']", "root['bar']"}) + assert exact == {"root['foo']", "root['bar']"} + assert globs is None + + def test_all_wildcards(self): + exact, globs = separate_wildcard_and_exact_paths({"root[*]", "root[**]['x']"}) + assert exact is None + assert len(globs) == 2 + + def test_mixed(self): + exact, globs = separate_wildcard_and_exact_paths( + {"root['foo']", "root[*]['bar']"} + ) + assert exact == {"root['foo']"} + assert len(globs) == 1 + assert globs[0].original_pattern == "root[*]['bar']" + + def test_wildcard_must_start_with_root(self): + with pytest.raises(ValueError, match="Wildcard paths must start with 'root'"): + separate_wildcard_and_exact_paths({"[*]['foo']"}) + + +# ── GlobPathMatcher.match ──────────────────────────────────────────── + + +class TestGlobPathMatcherMatch: + + # ── single wildcard [*] ── + + @pytest.mark.parametrize("target, expected", [ + ("root['a']", True), + ("root[0]", True), + ("root[99]", True), + ("root", False), # too short + ("root['a']['b']", False), # too long + ]) + def test_single_wildcard_basic(self, target, expected): + m = GlobPathMatcher("root[*]") + assert m.match(target) is expected + + @pytest.mark.parametrize("target, expected", [ + ("root['users']['alice']['password']", True), + ("root['users'][0]['password']", True), + ("root['users'][99]['password']", True), + ("root['users']['password']", False), # missing middle segment + ("root['users']['a']['b']['password']", False), # too many middle segments + ("root['users']['alice']['email']", False), # wrong last segment + ]) + def test_single_wildcard_in_middle(self, target, expected): + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match(target) is expected + + def test_multiple_single_wildcards(self): + """root[*][*] matches exactly two segments after root.""" + m = GlobPathMatcher("root[*][*]") + assert m.match("root['a']['b']") is True + assert m.match("root[0][1]") is True + assert m.match("root['a']") is False + assert m.match("root['a']['b']['c']") is False + + # ── double wildcard [**] ── + + @pytest.mark.parametrize("target, expected", [ + ("root", True), # zero segments + ("root['a']", True), # one segment + ("root['a']['b']['c']", True), # many segments + ("root[0][1][2]", True), # numeric indices + ]) + def test_double_wildcard_standalone(self, target, expected): + m = GlobPathMatcher("root[**]") + assert m.match(target) is expected + + @pytest.mark.parametrize("target, expected", [ + ("root['password']", True), # ** matches zero + ("root['a']['password']", True), # ** matches one + ("root['a']['b']['c']['password']", True), # ** matches many + ("root['a']['b']", False), # doesn't end with password + ("root['password']['extra']", False), # extra after password + ]) + def test_double_wildcard_before_key(self, target, expected): + m = GlobPathMatcher("root[**]['password']") + assert m.match(target) is expected + + def test_double_wildcard_both_ends(self): + m = GlobPathMatcher("root[**]['config'][**]['value']") + assert m.match("root['config']['value']") is True + assert m.match("root['a']['config']['value']") is True + assert m.match("root['a']['config']['b']['c']['value']") is True + assert m.match("root['config']['x']") is False + assert m.match("root['value']") is False + + def test_double_wildcard_zero_match_in_middle(self): + """** between two fixed segments can match zero segments.""" + m = GlobPathMatcher("root['a'][**]['b']") + assert m.match("root['a']['b']") is True # ** matches zero + assert m.match("root['a']['x']['b']") is True # ** matches one + assert m.match("root['a']['x']['y']['b']") is True # ** matches two + + def test_adjacent_double_wildcards(self): + m = GlobPathMatcher("root[**][**]['x']") + assert m.match("root['x']") is True + assert m.match("root['a']['x']") is True + assert m.match("root['a']['b']['x']") is True + + # ── dot notation wildcards ── + + def test_dot_single_wildcard(self): + m = GlobPathMatcher("root.*.name") + assert m.match("root.user.name") is True + assert m.match("root.name") is False + + def test_dot_double_wildcard(self): + m = GlobPathMatcher("root.**.name") + assert m.match("root.name") is True + assert m.match("root.a.name") is True + assert m.match("root.a.b.name") is True + + # ── mixed bracket and dot ── + + def test_mixed_bracket_and_dot_wildcard(self): + m = GlobPathMatcher("root[*].name") + assert m.match("root['user'].name") is True + assert m.match("root[0].name") is True + + +# ── GlobPathMatcher.match_or_is_ancestor ───────────────────────────── + + +class TestGlobPathMatcherAncestor: + + def test_ancestor_of_double_wildcard(self): + m = GlobPathMatcher("root[**]['password']") + assert m.match_or_is_ancestor("root['users']") is True + assert m.match_or_is_ancestor("root") is True + + def test_match_also_returns_true(self): + m = GlobPathMatcher("root[**]['password']") + assert m.match_or_is_ancestor("root['password']") is True + + def test_any_path_is_ancestor_with_double_wildcard(self): + """With ** in the pattern, any intermediate path could lead to a match.""" + m = GlobPathMatcher("root[**]['password']") + assert m.match_or_is_ancestor("root['x']") is True + assert m.match_or_is_ancestor("root['x']['y']['z']") is True + + def test_single_wildcard_ancestor_positive(self): + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match_or_is_ancestor("root['users']") is True + assert m.match_or_is_ancestor("root") is True + + def test_single_wildcard_ancestor_negative(self): + """A path that diverges from a single-wildcard pattern is not an ancestor.""" + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match_or_is_ancestor("root['other']") is False + + +# ── GlobPathMatcher.match_or_is_descendant ─────────────────────────── + + +class TestGlobPathMatcherDescendant: + + def test_descendant_of_match(self): + m = GlobPathMatcher("root[**]['config']") + assert m.match_or_is_descendant("root['config']['value']") is True + assert m.match_or_is_descendant("root['config']['a']['b']") is True + + def test_exact_match(self): + m = GlobPathMatcher("root[**]['config']") + assert m.match_or_is_descendant("root['config']") is True + + def test_not_descendant_or_match(self): + m = GlobPathMatcher("root[**]['secret']") + assert m.match_or_is_descendant("root['config']['db']['host']") is False + + def test_ancestor_is_not_descendant(self): + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match_or_is_descendant("root['users']") is False + + def test_descendant_of_single_wildcard_match(self): + m = GlobPathMatcher("root[*]") + assert m.match_or_is_descendant("root['a']['nested']") is True + + +# ── compile_glob_paths ─────────────────────────────────────────────── + + +class TestCompileGlobPaths: + + def test_none_returns_none(self): + assert compile_glob_paths(None) is None + + def test_empty_returns_none(self): + assert compile_glob_paths([]) is None + + def test_compiles_list(self): + result = compile_glob_paths(["root[*]", "root[**]['x']"]) + assert len(result) == 2 + assert all(isinstance(r, GlobPathMatcher) for r in result) + + +# ── DeepDiff integration: exclude_paths with wildcards ─────────────── + + +class TestDeepDiffExcludeGlob: + + def test_exclude_single_wildcard(self): + t1 = {'users': {'alice': {'name': 'Alice', 'pw': 's1'}, 'bob': {'name': 'Bob', 'pw': 's2'}}} + t2 = {'users': {'alice': {'name': 'Alice', 'pw': 'c1'}, 'bob': {'name': 'Bobby', 'pw': 'c2'}}} + diff = DeepDiff(t1, t2, exclude_paths=["root['users'][*]['pw']"]) + changed = diff.get('values_changed', {}) + assert "root['users']['bob']['name']" in changed + assert "root['users']['alice']['pw']" not in changed + assert "root['users']['bob']['pw']" not in changed + + def test_exclude_double_wildcard(self): + t1 = { + 'config': {'db': {'host': 'localhost', 'secret': 'abc'}, + 'api': {'nested': {'secret': 'xyz'}}}, + 'name': 'app' + } + t2 = { + 'config': {'db': {'host': 'remotehost', 'secret': 'def'}, + 'api': {'nested': {'secret': 'uvw'}}}, + 'name': 'app2' + } + diff = DeepDiff(t1, t2, exclude_paths=["root[**]['secret']"]) + changed = diff.get('values_changed', {}) + assert "root['config']['db']['host']" in changed + assert "root['name']" in changed + assert "root['config']['db']['secret']" not in changed + assert "root['config']['api']['nested']['secret']" not in changed + + def test_exclude_wildcard_with_list(self): + t1 = [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}] + t2 = [{'name': 'Alice', 'age': 31}, {'name': 'Bobby', 'age': 26}] + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['age']"]) + changed = diff.get('values_changed', {}) + assert "root[1]['name']" in changed + assert "root[0]['age']" not in changed + assert "root[1]['age']" not in changed + + def test_exclude_mix_exact_and_wildcard(self): + t1 = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}} + t2 = {'a': 10, 'b': 20, 'c': {'d': 30, 'e': 40}} + diff = DeepDiff(t1, t2, exclude_paths=["root['a']", "root['c'][*]"]) + changed = diff.get('values_changed', {}) + assert "root['b']" in changed + assert "root['a']" not in changed + assert "root['c']['d']" not in changed + assert "root['c']['e']" not in changed + + def test_exclude_nested_list_of_dicts(self): + t1 = {'data': [{'id': 1, 'meta': {'ts': 100}}, {'id': 2, 'meta': {'ts': 200}}]} + t2 = {'data': [{'id': 1, 'meta': {'ts': 999}}, {'id': 2, 'meta': {'ts': 888}}]} + diff = DeepDiff(t1, t2, exclude_paths=["root['data'][*]['meta']"]) + assert diff == {} + + def test_exclude_with_type_changes(self): + t1 = {'a': {'x': 1, 'y': 'hello'}} + t2 = {'a': {'x': 'changed_type', 'y': 'world'}} + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['x']"]) + changed = diff.get('values_changed', {}) + assert "root['a']['y']" in changed + assert 'type_changes' not in diff + + +# ── DeepDiff integration: include_paths with wildcards ─────────────── + + +class TestDeepDiffIncludeGlob: + + def test_include_single_wildcard(self): + t1 = {'users': {'alice': {'name': 'Alice', 'pw': 's1'}, 'bob': {'name': 'Bob', 'pw': 's2'}}} + t2 = {'users': {'alice': {'name': 'Alice2', 'pw': 'c1'}, 'bob': {'name': 'Bobby', 'pw': 'c2'}}} + diff = DeepDiff(t1, t2, include_paths=["root['users'][*]['name']"]) + changed = diff.get('values_changed', {}) + assert "root['users']['alice']['name']" in changed + assert "root['users']['bob']['name']" in changed + assert "root['users']['alice']['pw']" not in changed + assert "root['users']['bob']['pw']" not in changed + + def test_include_double_wildcard(self): + t1 = { + 'config': {'db': {'host': 'localhost', 'secret': 'abc'}, + 'api': {'url': 'http://api', 'nested': {'secret': 'xyz'}}}, + 'name': 'app' + } + t2 = { + 'config': {'db': {'host': 'remotehost', 'secret': 'def'}, + 'api': {'url': 'http://api2', 'nested': {'secret': 'uvw'}}}, + 'name': 'app2' + } + diff = DeepDiff(t1, t2, include_paths=["root[**]['secret']"]) + changed = diff.get('values_changed', {}) + assert "root['config']['db']['secret']" in changed + assert "root['config']['api']['nested']['secret']" in changed + assert "root['config']['db']['host']" not in changed + assert "root['config']['api']['url']" not in changed + assert "root['name']" not in changed + + def test_include_mix_exact_and_wildcard(self): + t1 = { + 'config': {'db': {'host': 'localhost', 'secret': 'abc'}}, + 'name': 'app' + } + t2 = { + 'config': {'db': {'host': 'remotehost', 'secret': 'def'}}, + 'name': 'app2' + } + diff = DeepDiff(t1, t2, include_paths=["root[**]['secret']", "root['name']"]) + changed = diff.get('values_changed', {}) + assert "root['config']['db']['secret']" in changed + assert "root['name']" in changed + assert "root['config']['db']['host']" not in changed + + def test_include_wildcard_no_changes(self): + t1 = {'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}} + t2 = {'a': {'x': 1, 'y': 20}, 'b': {'x': 3, 'y': 40}} + diff = DeepDiff(t1, t2, include_paths=["root[*]['x']"]) + assert diff == {} + + def test_include_wildcard_with_added_keys(self): + """When a new key is added, include_paths restricts reporting to matching paths only.""" + t1 = {'a': {'name': 'x'}} + t2 = {'a': {'name': 'y'}, 'b': {'name': 'z'}} + diff = DeepDiff(t1, t2, include_paths=["root[*]['name']"]) + changed = diff.get('values_changed', {}) + assert "root['a']['name']" in changed + # root['b'] addition is not reported because the add is at root['b'], + # not at root[*]['name'] + assert 'dictionary_item_added' not in diff + + def test_include_double_wildcard_with_nested_list(self): + t1 = {'data': [{'scores': [1, 2]}, {'scores': [3, 4]}]} + t2 = {'data': [{'scores': [1, 2]}, {'scores': [3, 5]}]} + diff = DeepDiff(t1, t2, include_paths=["root[**]['scores']"]) + changed = diff.get('values_changed', {}) + assert "root['data'][1]['scores'][1]" in changed + assert len(changed) == 1 + + +# ── Backward compatibility ─────────────────────────────────────────── + + +class TestBackwardCompatibility: + + def test_exact_exclude_paths_unchanged(self): + t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs"]} + t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu"]} + ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) + assert ddiff == {} + + def test_exact_include_paths_unchanged(self): + t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs"]} + t2 = {"for life": "vegan2", "ingredients": ["veggies", "tofu"]} + ddiff = DeepDiff(t1, t2, include_paths={"root['for life']"}) + changed = ddiff.get('values_changed', {}) + assert "root['for life']" in changed + assert len(changed) == 1 + + def test_exclude_regex_paths_unchanged(self): + t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + ddiff = DeepDiff(t1, t2, exclude_regex_paths=[r"root\[\d+\]\['b'\]"]) + assert ddiff == {} + + def test_shorthand_paths_unchanged(self): + t1 = {"for life": "vegan", "ingredients": ["no meat"]} + t2 = {"for life": "vegan", "ingredients": ["veggies"]} + ddiff = DeepDiff(t1, t2, exclude_paths={"ingredients"}) + assert ddiff == {} + + def test_include_paths_with_nested_prefix(self): + """Existing prefix-based include logic must still work.""" + t1 = {"foo": {"bar": {"veg": "potato", "fruit": "apple"}}} + t2 = {"foo": {"bar": {"veg": "potato", "fruit": "peach"}}} + ddiff = DeepDiff(t1, t2, include_paths="root['foo']['bar']") + changed = ddiff.get('values_changed', {}) + assert "root['foo']['bar']['fruit']" in changed + + +# ── DeepSearch integration ─────────────────────────────────────────── + + +class TestDeepSearchGlob: + + def test_exclude_glob_in_search(self): + obj = {'a': {'secret': 'find_me', 'name': 'x'}, 'b': {'secret': 'find_me', 'name': 'y'}} + result = DeepSearch(obj, 'find_me', exclude_paths=["root[*]['secret']"]) + assert result == {} + + def test_exclude_deep_glob_in_search(self): + obj = {'level1': {'level2': {'target': 'needle', 'other': 'needle'}}} + result = DeepSearch(obj, 'needle', exclude_paths=["root[**]['target']"]) + matched = result.get('matched_values', {}) + assert "root['level1']['level2']['other']" in matched + assert "root['level1']['level2']['target']" not in matched + + def test_exclude_glob_via_grep(self): + obj = [{'secret': 'findme', 'name': 'x'}, {'secret': 'findme', 'name': 'y'}] + result = obj | grep('findme', exclude_paths=["root[*]['secret']"]) + assert result == {} + + def test_exclude_deep_glob_in_list_search(self): + obj = [[1, 2, 'target'], [3, 'target', 4]] + result = DeepSearch(obj, 'target', exclude_paths=["root[*][2]"]) + matched = result.get('matched_values', {}) + assert 'root[1][1]' in matched + assert 'root[0][2]' not in matched + + def test_search_with_mixed_exact_and_glob_exclude(self): + obj = {'a': 'val', 'b': {'c': 'val'}, 'd': {'e': {'f': 'val'}}} + result = DeepSearch(obj, 'val', exclude_paths=["root['a']", "root[**]['f']"]) + matched = result.get('matched_values', {}) + assert "root['b']['c']" in matched + assert "root['a']" not in matched + assert "root['d']['e']['f']" not in matched + + +# ── DeepHash integration ───────────────────────────────────────────── + + +class TestDeepHashGlob: + + def test_exclude_exact_makes_hash_equal(self): + t1 = {'name': 'app', 'secret': 'abc'} + t2 = {'name': 'app', 'secret': 'def'} + h1 = DeepHash(t1, exclude_paths=["root['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root['secret']"]) + assert h1[t1] == h2[t2] + + def test_exclude_glob_wildcard_makes_hash_equal(self): + t1 = {'a': {'secret': 'x', 'name': 'n1'}, 'b': {'secret': 'y', 'name': 'n2'}} + t2 = {'a': {'secret': 'X', 'name': 'n1'}, 'b': {'secret': 'Y', 'name': 'n2'}} + h1 = DeepHash(t1, exclude_paths=["root[*]['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root[*]['secret']"]) + assert h1[t1] == h2[t2] + + def test_exclude_deep_glob_makes_hash_equal(self): + t1 = {'a': {'b': {'secret': 1, 'val': 2}}} + t2 = {'a': {'b': {'secret': 99, 'val': 2}}} + h1 = DeepHash(t1, exclude_paths=["root[**]['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root[**]['secret']"]) + assert h1[t1] == h2[t2] + + def test_exclude_glob_hash_not_equal_when_included_part_differs(self): + t1 = {'a': {'secret': 'x', 'name': 'n1'}} + t2 = {'a': {'secret': 'x', 'name': 'DIFFERENT'}} + h1 = DeepHash(t1, exclude_paths=["root[*]['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root[*]['secret']"]) + assert h1[t1] != h2[t2] + + +# ── Edge cases ─────────────────────────────────────────────────────── + + +class TestEdgeCases: + + def test_wildcard_with_ignore_order(self): + t1 = [{'name': 'a', 'pw': '1'}, {'name': 'b', 'pw': '2'}] + t2 = [{'name': 'b', 'pw': '20'}, {'name': 'a', 'pw': '10'}] + diff = DeepDiff(t1, t2, ignore_order=True, exclude_paths=["root[*]['pw']"]) + assert diff == {} + + def test_include_wildcard_with_ignore_order(self): + t1 = [{'name': 'a', 'pw': '1'}, {'name': 'b', 'pw': '2'}] + t2 = [{'name': 'b', 'pw': '20'}, {'name': 'a', 'pw': '10'}] + diff = DeepDiff(t1, t2, ignore_order=True, include_paths=["root[*]['name']"]) + assert diff == {} + + def test_wildcard_with_added_removed_keys(self): + t1 = {'users': {'alice': {'name': 'Alice', 'pw': 'a'}}} + t2 = {'users': {'alice': {'name': 'Alice', 'pw': 'b'}, 'bob': {'name': 'Bob', 'pw': 'c'}}} + diff = DeepDiff(t1, t2, exclude_paths=["root['users'][*]['pw']"]) + added = diff.get('dictionary_item_added', []) + assert any("bob" in str(p) for p in added) + + def test_empty_diff_with_wildcard(self): + t1 = {'a': 1} + t2 = {'a': 1} + diff = DeepDiff(t1, t2, exclude_paths=["root[*]"]) + assert diff == {} + + def test_root_double_wildcard_excludes_everything(self): + t1 = {'a': 1, 'b': {'c': 2}} + t2 = {'a': 10, 'b': {'c': 20}} + diff = DeepDiff(t1, t2, exclude_paths=["root[**]"]) + assert diff == {} + + def test_wildcard_with_custom_object(self): + class Obj: + def __init__(self, name, secret): + self.name = name + self.secret = secret + o1 = Obj('a', 's1') + o2 = Obj('b', 's2') + diff = DeepDiff(o1, o2, exclude_paths=["root.secret"]) + changed = diff.get('values_changed', {}) + assert 'root.name' in changed + assert 'root.secret' not in changed + + def test_exclude_wildcard_with_removed_items(self): + t1 = [{'x': 1, 'y': 2}, {'x': 3, 'y': 4}, {'x': 5, 'y': 6}] + t2 = [{'x': 1, 'y': 2}] + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['y']"]) + removed = diff.get('iterable_item_removed', {}) + assert len(removed) == 2 + + def test_wildcard_verbose_level_2(self): + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['x']"], verbose_level=2) + assert diff == {} + + def test_multiple_wildcards_in_one_pattern(self): + t1 = {'a': {'b': {'c': 1}}, 'x': {'y': {'z': 2}}} + t2 = {'a': {'b': {'c': 10}}, 'x': {'y': {'z': 20}}} + diff = DeepDiff(t1, t2, exclude_paths=["root[*][*][*]"]) + assert diff == {} + + def test_wildcard_does_not_affect_identical_objects(self): + t1 = {'a': [1, 2, 3], 'b': {'c': 'd'}} + diff = DeepDiff(t1, t1, exclude_paths=["root[**]"]) + assert diff == {} + + def test_wildcard_as_single_exclude_path_string(self): + """exclude_paths accepts a single string, not just a list.""" + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + diff = DeepDiff(t1, t2, exclude_paths="root[*]['x']") + assert diff == {} + + def test_include_wildcard_as_single_string(self): + """include_paths accepts a single string, not just a list.""" + t1 = {'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}} + t2 = {'a': {'x': 10, 'y': 2}, 'b': {'x': 30, 'y': 4}} + diff = DeepDiff(t1, t2, include_paths="root[*]['x']") + changed = diff.get('values_changed', {}) + assert len(changed) == 2 + assert "root['a']['y']" not in changed + + def test_literal_star_key_not_treated_as_wildcard(self): + """A dict key named '*' should be treated literally, not as a wildcard.""" + t1 = {'*': 1, 'a': 2, 'b': 3} + t2 = {'*': 10, 'a': 20, 'b': 30} + # Exclude only the literal '*' key, not all keys + diff = DeepDiff(t1, t2, exclude_paths=["root['*']"]) + changed = diff.get('values_changed', {}) + # '*' key should be excluded, but 'a' and 'b' should still show changes + assert "root['*']" not in changed + assert "root['a']" in changed + assert "root['b']" in changed + + def test_glob_matcher_literal_star_vs_wildcard(self): + """GlobPathMatcher(root['*']) should only match literal '*' key.""" + matcher = GlobPathMatcher("root['*']") + # Should NOT match arbitrary keys (that's what root[*] is for) + assert not matcher.match("root['hello']") + assert not matcher.match("root['a']") + # Should match the literal '*' key + assert matcher.match("root['*']") + + def test_exclude_takes_precedence_over_include(self): + """When a path matches both include and exclude, exclude should win.""" + t1 = {'x': 1, 'y': 2} + t2 = {'x': 10, 'y': 20} + diff = DeepDiff(t1, t2, include_paths=["root['x']"], exclude_paths=["root['x']"]) + assert diff == {} + + def test_exclude_glob_takes_precedence_over_include_glob(self): + """Exclude glob should take precedence over include glob for same path.""" + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + diff = DeepDiff(t1, t2, include_paths=["root[*]['x']"], exclude_paths=["root['a'][*]"]) + changed = diff.get('values_changed', {}) + assert "root['a']['x']" not in changed + assert "root['b']['x']" in changed + + def test_include_glob_with_custom_operator(self): + """include_glob_paths should filter custom operator reports to only matching paths.""" + from deepdiff.operator import BaseOperator + + class AlwaysReport(BaseOperator): + """Reports on dict-level comparisons, which are ancestors of the glob target.""" + def give_up_diffing(self, level, diff_instance): + diff_instance.custom_report_result( + 'custom_report', level, {'message': 'custom'}) + return True + + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + # Operator fires on dict type — so it reports at root['a'] and root['b'] level + op = AlwaysReport(types=[dict]) + diff = DeepDiff(t1, t2, include_paths=["root[*]['x']"], custom_operators=[op]) + custom = diff.get('custom_report', set()) + # root['a'] and root['b'] are ancestors of the glob pattern, not matches + # or descendants — _skip_report_for_include_glob should filter them out + assert "root['a']" not in custom + assert "root['b']" not in custom + + def test_mixed_exact_include_and_glob_include(self): + """When both exact include_paths and glob include_paths are used together, + exact matches should pass through without glob filtering.""" + t1 = {'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}} + t2 = {'a': {'x': 10, 'y': 20}, 'b': {'x': 30, 'y': 40}} + diff = DeepDiff( + t1, t2, + include_paths=["root['a']", "root[*]['x']"], + ) + changed = diff.get('values_changed', {}) + # root['a']['y'] is covered by exact include root['a'] + assert "root['a']['y']" in changed + # root['b']['x'] is covered by glob root[*]['x'] + assert "root['b']['x']" in changed + # root['b']['y'] is NOT covered by either + assert "root['b']['y']" not in changed From c82a5ef25da7f690d08cbe4b74682ce52e7b1096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 31 Mar 2026 07:37:39 +0200 Subject: [PATCH 02/23] Add missing files to sdist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the flit configuration to include documentation and test files in source distribution again. The flit defaults changed recently, and the files are missing in 9.0.0 release. Signed-off-by: Michał Górny --- pyproject.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 043f9d28..5c6f2d7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,5 +101,18 @@ statistics = true ignore = "E202" exclude = "./data,./src,.svn,CVS,.bzr,.hg,.git,__pycache__" +[tool.flit.sdist] +include = [ + "AUTHORS.md", + "CHANGELOG.md", + "CITATION.cff", + "conftest.py", + "docs/", + "mypy.ini", + "noxfile.py", + "tests/", + "uv.lock", +] + [tool.pytest.ini_options] addopts = "--pdbcls=IPython.terminal.debugger:Pdb" From 6adc8cf166f7e492b3554f38a38e3c96169e90a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 31 Mar 2026 07:39:04 +0200 Subject: [PATCH 03/23] Remove obsolete `MANIFEST.in` file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the `MANIFEST.in` file that was used by setuptools. Signed-off-by: Michał Górny --- MANIFEST.in | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 4dfbf568..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,20 +0,0 @@ -include LICENSE -include AUTHORS -include CHANGELOG -include *.rst -include deepdiff/*.rst -include *.txt -include *.sh -include pytest.ini -include *.py -exclude uv.lock -recursive-include docs/ *.rst -recursive-include docs/ *.png -recursive-include tests *.csv -recursive-include tests *.json -recursive-include tests *.pickle -recursive-include tests *.py -recursive-include tests *.toml -recursive-include tests *.yaml -global-exclude __pycache__ -global-exclude *.py[co] From 419900925f926cb7134c55e0e36d8eb53f357d6f Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Tue, 31 Mar 2026 16:23:38 +0000 Subject: [PATCH 04/23] Add wildcard/glob pattern support for exclude_paths and include_paths --- .bumpversion.cfg | 6 +- AUTHORS.md | 1 + CHANGELOG.md | 1 + CITATION.cff | 2 +- README.md | 53 +- deepdiff/__init__.py | 2 +- deepdiff/diff.py | 2420 ++++++++++++++--- deepdiff/docstrings/authors.rst | 176 ++ deepdiff/docstrings/basics.rst | 345 +++ deepdiff/docstrings/changelog.rst | 341 +++ deepdiff/docstrings/colored_view.rst | 101 + deepdiff/docstrings/commandline.rst | 320 +++ deepdiff/docstrings/custom.rst | 440 +++ deepdiff/docstrings/deep_distance.rst | 119 + deepdiff/docstrings/deephash.rst | 14 + deepdiff/docstrings/deephash_doc.rst | 388 +++ deepdiff/docstrings/delta.rst | 539 ++++ deepdiff/docstrings/diff.rst | 30 + deepdiff/docstrings/diff_doc.rst | 233 ++ deepdiff/docstrings/dsearch.rst | 19 + deepdiff/docstrings/exclude_paths.rst | 130 + deepdiff/docstrings/extract.rst | 13 + deepdiff/docstrings/faq.rst | 185 ++ deepdiff/docstrings/ignore_order.rst | 318 +++ .../docstrings/ignore_types_or_values.rst | 442 +++ deepdiff/docstrings/index.rst | 153 ++ deepdiff/docstrings/numbers.rst | 202 ++ deepdiff/docstrings/optimizations.rst | 287 ++ deepdiff/docstrings/other.rst | 55 + deepdiff/docstrings/search_doc.rst | 75 + deepdiff/docstrings/serialization.rst | 325 +++ deepdiff/docstrings/stats.rst | 78 + deepdiff/docstrings/support.rst | 19 + deepdiff/docstrings/troubleshoot.rst | 29 + deepdiff/docstrings/view.rst | 365 +++ deepdiff/helper.py | 2 +- docs/authors.rst | 168 +- docs/basics.rst | 346 +-- docs/changelog.rst | 341 +-- docs/colored_view.rst | 102 +- docs/commandline.rst | 321 +-- docs/conf.py | 4 +- docs/custom.rst | 441 +-- docs/deep_distance.rst | 120 +- docs/deephash.rst | 15 +- docs/deephash_doc.rst | 389 +-- docs/delta.rst | 540 +--- docs/diff.rst | 31 +- docs/diff_doc.rst | 234 +- docs/dsearch.rst | 20 +- docs/exclude_paths.rst | 131 +- docs/extract.rst | 14 +- docs/faq.rst | 186 +- docs/ignore_order.rst | 319 +-- docs/ignore_types_or_values.rst | 443 +-- docs/index.rst | 210 +- docs/numbers.rst | 203 +- docs/optimizations.rst | 288 +- docs/other.rst | 56 +- docs/search_doc.rst | 389 +-- docs/serialization.rst | 326 +-- docs/stats.rst | 79 +- docs/support.rst | 20 +- docs/troubleshoot.rst | 30 +- docs/view.rst | 366 +-- pyproject.toml | 3 +- uv.lock | 11 + 67 files changed, 7799 insertions(+), 6575 deletions(-) create mode 100644 deepdiff/docstrings/authors.rst create mode 100644 deepdiff/docstrings/basics.rst create mode 100644 deepdiff/docstrings/changelog.rst create mode 100644 deepdiff/docstrings/colored_view.rst create mode 100644 deepdiff/docstrings/commandline.rst create mode 100644 deepdiff/docstrings/custom.rst create mode 100644 deepdiff/docstrings/deep_distance.rst create mode 100644 deepdiff/docstrings/deephash.rst create mode 100644 deepdiff/docstrings/deephash_doc.rst create mode 100644 deepdiff/docstrings/delta.rst create mode 100644 deepdiff/docstrings/diff.rst create mode 100644 deepdiff/docstrings/diff_doc.rst create mode 100644 deepdiff/docstrings/dsearch.rst create mode 100644 deepdiff/docstrings/exclude_paths.rst create mode 100644 deepdiff/docstrings/extract.rst create mode 100644 deepdiff/docstrings/faq.rst create mode 100644 deepdiff/docstrings/ignore_order.rst create mode 100644 deepdiff/docstrings/ignore_types_or_values.rst create mode 100644 deepdiff/docstrings/index.rst create mode 100644 deepdiff/docstrings/numbers.rst create mode 100644 deepdiff/docstrings/optimizations.rst create mode 100644 deepdiff/docstrings/other.rst create mode 100644 deepdiff/docstrings/search_doc.rst create mode 100644 deepdiff/docstrings/serialization.rst create mode 100644 deepdiff/docstrings/stats.rst create mode 100644 deepdiff/docstrings/support.rst create mode 100644 deepdiff/docstrings/troubleshoot.rst create mode 100644 deepdiff/docstrings/view.rst mode change 100644 => 120000 docs/authors.rst mode change 100644 => 120000 docs/basics.rst mode change 100644 => 120000 docs/changelog.rst mode change 100644 => 120000 docs/colored_view.rst mode change 100644 => 120000 docs/commandline.rst mode change 100644 => 120000 docs/custom.rst mode change 100644 => 120000 docs/deep_distance.rst mode change 100644 => 120000 docs/deephash.rst mode change 100644 => 120000 docs/deephash_doc.rst mode change 100644 => 120000 docs/delta.rst mode change 100644 => 120000 docs/diff.rst mode change 100644 => 120000 docs/diff_doc.rst mode change 100644 => 120000 docs/dsearch.rst mode change 100644 => 120000 docs/exclude_paths.rst mode change 100644 => 120000 docs/extract.rst mode change 100644 => 120000 docs/faq.rst mode change 100644 => 120000 docs/ignore_order.rst mode change 100644 => 120000 docs/ignore_types_or_values.rst mode change 100644 => 120000 docs/index.rst mode change 100644 => 120000 docs/numbers.rst mode change 100644 => 120000 docs/optimizations.rst mode change 100644 => 120000 docs/other.rst mode change 100644 => 120000 docs/search_doc.rst mode change 100644 => 120000 docs/serialization.rst mode change 100644 => 120000 docs/stats.rst mode change 100644 => 120000 docs/support.rst mode change 100644 => 120000 docs/troubleshoot.rst mode change 100644 => 120000 docs/view.rst diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c6206780..c781f85e 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,7 +1,7 @@ [bumpversion] -current_version = 8.7.0 -commit = True -tag = True +current_version = 9.0.0 +commit = False +tag = False tag_name = {new_version} [bumpversion:file:pyproject.toml] diff --git a/AUTHORS.md b/AUTHORS.md index fc6fd2f2..5e208b5e 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -85,3 +85,4 @@ Authors in order of the timeline of their contributions: - [ljames8](https://github.com/ljames8) for fixing logarithmic similarity type hint. - [srini047](https://github.com/srini047) for fixing README typo. - [Nagato-Yuzuru](https://github.com/Nagato-Yuzuru) for colored view tests. +- [akshat62](https://github.com/akshat62) for adding Fraction numeric support. diff --git a/CHANGELOG.md b/CHANGELOG.md index e36c0d49..272a2399 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - Fixed parameter bug in `to_flat_dicts` where `include_action_in_path` and `report_type_changes` were not being passed through - Fixed `ignore_keys` issue in `detailed__dict__` thanks to [vitalis89](https://github.com/vitalis89) - Fixed logarithmic similarity type hint thanks to [ljames8](https://github.com/ljames8) + - Added `Fraction` numeric support thanks to [akshat62](https://github.com/akshat62) - v8-6-2 - Security fix (CVE-2026-33155): Prevent denial-of-service via crafted pickle payloads that trigger massive memory allocation through the REDUCE opcode. Size-sensitive callables like `bytes()` and `bytearray()` are now wrapped to reject allocations exceeding 128 MB. - v8-6-1 diff --git a/CITATION.cff b/CITATION.cff index efd1029e..06dbb01e 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -5,6 +5,6 @@ authors: given-names: "Sep" orcid: "https://orcid.org/0009-0009-5828-4345" title: "DeepDiff" -version: 8.7.0 +version: 9.0.0 date-released: 2026 url: "https://github.com/seperman/deepdiff" diff --git a/README.md b/README.md index 73abe9a6..8c3fc20c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 8.7.0 +# DeepDiff v 9.0.0 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -21,13 +21,13 @@ Tested on Python 3.10+ and PyPy3. -- **[Documentation](https://zepworks.com/deepdiff/8.7.0/)** +- **[Documentation](https://zepworks.com/deepdiff/9.0.0/)** ## What is new? Please check the [ChangeLog](CHANGELOG.md) file for the detailed information. -DeepDiff 8-7-0 +DeepDiff 9-0-0 - migration note: - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. - Dropping support for Python 3.9 @@ -43,50 +43,7 @@ DeepDiff 8-7-0 - Fixed parameter bug in `to_flat_dicts` where `include_action_in_path` and `report_type_changes` were not being passed through - Fixed `ignore_keys` issue in `detailed__dict__` thanks to @vitalis89 - Fixed logarithmic similarity type hint thanks to @ljames8 - -DeepDiff 8-6-2 -- **Security (CVE-2026-33155):** Fixed a memory exhaustion DoS vulnerability in `_RestrictedUnpickler` by limiting the maximum allocation size for `bytes` and `bytearray` during deserialization. - -DeepDiff 8-6-1 -- Patched security vulnerability in the Delta class which was vulnerable to class pollution via its constructor, and when combined with a gadget available in DeltaDiff itself, it could lead to Denial of Service and Remote Code Execution (via insecure Pickle deserialization). - -DeepDiff 8-6-0 - -- Added Colored View thanks to @mauvilsa -- Added support for applying deltas to NamedTuple thanks to @paulsc -- Fixed test_delta.py with Python 3.14 thanks to @Romain-Geissler-1A -- Added python property serialization to json -- Added ip address serialization -- Switched to UV from pip -- Added Claude.md -- Added uuid hashing thanks to @akshat62 -- Added `ignore_uuid_types` flag to DeepDiff to avoid type reports when comparing UUID and string. -- Added comprehensive type hints across the codebase (multiple commits for better type safety) -- Added support for memoryview serialization -- Added support for bytes serialization (non-UTF8 compatible) -- Fixed bug where group_by with numbers would leak type info into group path reports -- Fixed bug in `_get_clean_to_keys_mapping without` explicit significant digits -- Added support for python dict key serialization -- Enhanced support for IP address serialization with safe module imports -- Added development tooling improvements (pyright config, .envrc example) -- Updated documentation and development instructions - - -DeepDiff 8-5-0 - -- Updating deprecated pydantic calls -- Switching to pyproject.toml -- Fix for moving nested tables when using iterable_compare_func. by -- Fix recursion depth limit when hashing numpy.datetime64 -- Moving from legacy setuptools use to pyproject.toml - - -DeepDiff 8-4-2 - -- fixes the type hints for the base -- fixes summarize so if json dumps fails, we can still get a repr of the results -- adds ipaddress support - +- Added `Fraction` numeric support thanks to @akshat62 ## Installation @@ -131,7 +88,7 @@ Please take a look at the [CHANGELOG](CHANGELOG.md) file. - Method 1: Use [`uv`](https://github.com/astral-sh/uv) to install the dependencies: `uv sync --all-extras`. - Method 2: Use pip: `pip install -e ".[cli,coverage,dev,docs,static,test]"` -5. Build `flit build` +5. Build `uv build` # Contribute diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 754f3c87..312b0e35 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes.""" # flake8: noqa -__version__ = '8.7.0' +__version__ = '9.0.0' import logging if __name__ == '__main__': diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 2ac62b5e..4a1314ef 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -1,484 +1,2068 @@ -import re +#!/usr/bin/env python + +# In order to run the docstrings: +# python3 -m deepdiff.diff +# You might need to run it many times since dictionaries come in different orders +# every time you run the docstrings. +# However the docstring expects it in a specific order in order to pass! +import difflib import logging -from ast import literal_eval +import types +import datetime +import uuid +from enum import Enum +from copy import deepcopy +from math import isclose as is_close +from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol, Literal +from collections.abc import Mapping, Iterable, Sequence +from collections import defaultdict +from inspect import getmembers +from itertools import zip_longest from functools import lru_cache +from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent, + IndexedHash, unprocessed, add_to_frozen_set, basic_types, + convert_item_or_items_into_set_else_none, get_type, + convert_item_or_items_into_compiled_regexes_else_none, + type_is_subclass_of_type_group, type_in_type_group, get_doc, + number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans, + np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer, + TEXT_VIEW, TREE_VIEW, DELTA_VIEW, COLORED_VIEW, COLORED_COMPACT_VIEW, + detailed__dict__, add_root_to_paths, + np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS, + PydanticBaseModel, Opcode, SetOrdered, ipranges, + separate_wildcard_and_exact_paths) +from deepdiff.serialization import SerializationMixin +from deepdiff.distance import DistanceMixin, logarithmic_similarity +from deepdiff.model import ( + RemapDict, ResultDict, TextResult, TreeResult, DiffLevel, + DictRelationship, AttributeRelationship, REPORT_KEYS, + SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, + SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, + FORCE_DEFAULT, +) +from deepdiff.deephash import DeepHash, combine_hashes_lists +from deepdiff.base import Base +from deepdiff.lfucache import LFUCache, DummyLFU +from deepdiff.colored_view import ColoredView + +if TYPE_CHECKING: + from pytz.tzinfo import BaseTzInfo + logger = logging.getLogger(__name__) -GETATTR = 'GETATTR' -GET = 'GET' +MAX_PASSES_REACHED_MSG = ( + 'DeepDiff has reached the max number of passes of {}. ' + 'You can possibly get more accurate results by increasing the max_passes parameter.') +MAX_DIFFS_REACHED_MSG = ( + 'DeepDiff has reached the max number of diffs of {}. ' + 'You can possibly get more accurate results by increasing the max_diffs parameter.') -class _WildcardToken: - """Sentinel object for wildcard path tokens. - Using a dedicated class (instead of plain strings) ensures that a literal - dict key ``'*'`` (parsed from ``root['*']``) is never confused with the - wildcard ``*`` (parsed from ``root[*]``). - """ - def __init__(self, symbol): - self._symbol = symbol +notpresent_indexed = IndexedHash(indexes=[0], item=notpresent) - def __repr__(self): - return self._symbol +doc = get_doc('diff_doc.rst') - def __eq__(self, other): - return isinstance(other, _WildcardToken) and self._symbol == other._symbol - def __hash__(self): - return hash(('_WildcardToken', self._symbol)) +PROGRESS_MSG = "DeepDiff {} seconds in progress. Pass #{}, Diff #{}" -SINGLE_WILDCARD = _WildcardToken('*') -MULTI_WILDCARD = _WildcardToken('**') +def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], None], duration: float) -> None: + """ + Report the progress every few seconds. + """ + progress_logger(PROGRESS_MSG.format(duration, _stats[PASSES_COUNT], _stats[DIFF_COUNT])) + + +DISTANCE_CACHE_HIT_COUNT = 'DISTANCE CACHE HIT COUNT' +DIFF_COUNT = 'DIFF COUNT' +PASSES_COUNT = 'PASSES COUNT' +MAX_PASS_LIMIT_REACHED = 'MAX PASS LIMIT REACHED' +MAX_DIFF_LIMIT_REACHED = 'MAX DIFF LIMIT REACHED' +DISTANCE_CACHE_ENABLED = 'DISTANCE CACHE ENABLED' +PREVIOUS_DIFF_COUNT = 'PREVIOUS DIFF COUNT' +PREVIOUS_DISTANCE_CACHE_HIT_COUNT = 'PREVIOUS DISTANCE CACHE HIT COUNT' +CANT_FIND_NUMPY_MSG = 'Unable to import numpy. This must be a bug in DeepDiff since a numpy array is detected.' +INVALID_VIEW_MSG = "view parameter must be one of 'text', 'tree', 'delta', 'colored' or 'colored_compact'. But {} was passed." +CUTOFF_RANGE_ERROR_MSG = 'cutoff_distance_for_pairs needs to be a positive float max 1.' +VERBOSE_LEVEL_RANGE_MSG = 'verbose_level should be 0, 1, or 2.' +PURGE_LEVEL_RANGE_MSG = 'cache_purge_level should be 0, 1, or 2.' +_ENABLE_CACHE_EVERY_X_DIFF = '_ENABLE_CACHE_EVERY_X_DIFF' + +model_fields_set = frozenset(["model_fields_set"]) + + +# What is the threshold to consider 2 items to be pairs. Only used when ignore_order = True. +CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT = 0.3 + +# What is the threshold to calculate pairs of items between 2 iterables. +# For example 2 iterables that have nothing in common, do not need their pairs to be calculated. +CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT = 0.7 + +DEEPHASH_PARAM_KEYS = ( + 'exclude_types', + 'exclude_paths', + 'exclude_glob_paths', + 'include_paths', + 'include_glob_paths', + 'exclude_regex_paths', + 'hasher', + 'significant_digits', + 'number_format_notation', + 'ignore_string_type_changes', + 'ignore_numeric_type_changes', + 'ignore_uuid_types', + 'use_enum_value', + 'ignore_type_in_groups', + 'ignore_type_subclasses', + 'ignore_string_case', + 'exclude_obj_callback', + 'ignore_private_variables', + 'encodings', + 'ignore_encoding_errors', + 'default_timezone', + 'custom_operators', +) -class PathExtractionError(ValueError): - pass +class DeepDiffProtocol(Protocol): + t1: Any + t2: Any + cutoff_distance_for_pairs: float + use_log_scale: bool + log_scale_similarity_threshold: float + view: str + math_epsilon: Optional[float] + + + +class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, DeepDiffProtocol, Base): + __doc__ = doc + + CACHE_AUTO_ADJUST_THRESHOLD = 0.25 + + def __init__(self, + t1: Any, + t2: Any, + _original_type: Optional[Any]=None, + cache_purge_level: int=1, + cache_size: int=0, + cache_tuning_sample_size: int=0, + custom_operators: Optional[List[Any]] =None, + cutoff_distance_for_pairs: float=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT, + cutoff_intersection_for_pairs: float=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT, + default_timezone:Union[datetime.timezone, "BaseTzInfo"]=datetime.timezone.utc, + encodings: Optional[List[str]]=None, + exclude_obj_callback: Optional[Callable]=None, + exclude_obj_callback_strict: Optional[Callable]=None, + exclude_paths: Union[str, List[str], Set[str], FrozenSet[str], None]=None, + exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None, + exclude_types: Optional[List[type]]=None, + get_deep_distance: bool=False, + group_by: Union[str, Tuple[str, str], Callable, None]=None, + group_by_sort_key: Union[str, Callable, None]=None, + hasher: Optional[Callable]=None, + hashes: Optional[Dict[Any, Any]]=None, + ignore_encoding_errors: bool=False, + ignore_nan_inequality: bool=False, + ignore_numeric_type_changes: bool=False, + ignore_order: bool=False, + ignore_order_func: Optional[Callable]=None, + ignore_private_variables: bool=True, + ignore_string_case: bool=False, + ignore_string_type_changes: bool=False, + ignore_type_in_groups: Optional[List[Tuple[Any, ...]]]=None, + ignore_type_subclasses: bool=False, + ignore_uuid_types: bool=False, + include_obj_callback: Optional[Callable]=None, + include_obj_callback_strict: Optional[Callable]=None, + include_paths: Union[str, List[str], None]=None, + iterable_compare_func: Optional[Callable]=None, + log_frequency_in_sec: int=0, + log_scale_similarity_threshold: float=0.1, + log_stacktrace: bool=False, + math_epsilon: Optional[float]=None, + max_diffs: Optional[int]=None, + max_passes: int=10000000, + number_format_notation: Literal["f", "e"]="f", + number_to_string_func: Optional[Callable]=None, + progress_logger: Callable[[str], None]=logger.info, + report_repetition: bool=False, + significant_digits: Optional[int]=None, + threshold_to_diff_deeper: float = 0.33, + truncate_datetime: Optional[str]=None, + use_enum_value: bool=False, + use_log_scale: bool=False, + verbose_level: int=1, + view: str=TEXT_VIEW, + zip_ordered_iterables: bool=False, + _parameters: Optional[Dict[str, Any]]=None, + _shared_parameters: Optional[Dict[str, Any]]=None, + **kwargs): + super().__init__() + # Defaults for glob path attributes — needed for non-root instances + # that may receive _parameters without these keys. + self.exclude_glob_paths = None + self.include_glob_paths = None + if kwargs: + raise ValueError(( + "The following parameter(s) are not valid: %s\n" + "The valid parameters are ignore_order, report_repetition, significant_digits, " + "number_format_notation, exclude_paths, include_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " + "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, ignore_uuid_types, truncate_datetime, " + "ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, " + "view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, " + "cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, " + "cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, log_stacktrace," + "math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, default_timezone " + "ignore_order_func, custom_operators, encodings, ignore_encoding_errors, use_log_scale, log_scale_similarity_threshold " + "_parameters and _shared_parameters.") % ', '.join(kwargs.keys())) + + if _parameters: + self.__dict__.update(_parameters) + else: + self.custom_operators = custom_operators or [] + self.ignore_order = ignore_order + + self.ignore_order_func = ignore_order_func + + ignore_type_in_groups = ignore_type_in_groups or [] + if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups: + ignore_numeric_type_changes = True + self.ignore_numeric_type_changes = ignore_numeric_type_changes + if strings == ignore_type_in_groups or strings in ignore_type_in_groups: + ignore_string_type_changes = True + # Handle ignore_uuid_types - check if uuid+str group is already in ignore_type_in_groups + uuid_str_group = (uuids[0], str) + if uuid_str_group == ignore_type_in_groups or uuid_str_group in ignore_type_in_groups: + ignore_uuid_types = True + self.ignore_uuid_types = ignore_uuid_types + self.use_enum_value = use_enum_value + self.log_scale_similarity_threshold = log_scale_similarity_threshold + self.use_log_scale = use_log_scale + self.default_timezone = default_timezone + self.log_stacktrace = log_stacktrace + self.threshold_to_diff_deeper = threshold_to_diff_deeper + self.ignore_string_type_changes = ignore_string_type_changes + self.ignore_type_in_groups = self.get_ignore_types_in_groups( + ignore_type_in_groups=ignore_type_in_groups, + ignore_string_type_changes=ignore_string_type_changes, + ignore_numeric_type_changes=ignore_numeric_type_changes, + ignore_type_subclasses=ignore_type_subclasses, + ignore_uuid_types=ignore_uuid_types) + self.report_repetition = report_repetition + _exclude_set = convert_item_or_items_into_set_else_none(exclude_paths) + _exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(_exclude_set) + self.exclude_paths = add_root_to_paths(_exclude_exact) + _include_set = convert_item_or_items_into_set_else_none(include_paths) + _include_exact, self.include_glob_paths = separate_wildcard_and_exact_paths(_include_set) + self.include_paths = add_root_to_paths(_include_exact) + self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) + self.exclude_types = set(exclude_types) if exclude_types else None + self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance + self.ignore_type_subclasses = ignore_type_subclasses + self.type_check_func = type_in_type_group if ignore_type_subclasses else type_is_subclass_of_type_group + self.ignore_string_case = ignore_string_case + self.exclude_obj_callback = exclude_obj_callback + self.exclude_obj_callback_strict = exclude_obj_callback_strict + self.include_obj_callback = include_obj_callback + self.include_obj_callback_strict = include_obj_callback_strict + self.number_to_string = number_to_string_func or number_to_string + self.iterable_compare_func = iterable_compare_func + self.zip_ordered_iterables = zip_ordered_iterables + self.ignore_private_variables = ignore_private_variables + self.ignore_nan_inequality = ignore_nan_inequality + self.hasher = hasher + self.cache_tuning_sample_size = cache_tuning_sample_size + self.group_by = group_by + if callable(group_by_sort_key): + self.group_by_sort_key = group_by_sort_key + elif group_by_sort_key: + def _group_by_sort_key(x): + return x[group_by_sort_key] + self.group_by_sort_key = _group_by_sort_key + else: + self.group_by_sort_key = None + self.encodings = encodings + self.ignore_encoding_errors = ignore_encoding_errors + + self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.math_epsilon = math_epsilon + if self.math_epsilon is not None and self.ignore_order: + logger.warning("math_epsilon in conjunction with ignore_order=True is only used for flat object comparisons. Custom math_epsilon will not have an effect when comparing nested objects.") + self.truncate_datetime = get_truncate_datetime(truncate_datetime) + self.number_format_notation = number_format_notation + if verbose_level in {0, 1, 2}: + self.verbose_level = verbose_level + else: + raise ValueError(VERBOSE_LEVEL_RANGE_MSG) + if cache_purge_level not in {0, 1, 2}: + raise ValueError(PURGE_LEVEL_RANGE_MSG) + self.view = view + # Setting up the cache for dynamic programming. One dictionary per instance of root of DeepDiff running. + self.max_passes = max_passes + self.max_diffs = max_diffs + self.cutoff_distance_for_pairs = float(cutoff_distance_for_pairs) + self.cutoff_intersection_for_pairs = float(cutoff_intersection_for_pairs) + if self.cutoff_distance_for_pairs < 0 or self.cutoff_distance_for_pairs > 1: + raise ValueError(CUTOFF_RANGE_ERROR_MSG) + # _Parameters are the clean _parameters to initialize DeepDiff with so we avoid all the above + # cleaning functionalities when running DeepDiff recursively. + # However DeepHash has its own set of _parameters that are slightly different than DeepDIff. + # DeepDiff _parameters are transformed to DeepHash _parameters via _get_deephash_params method. + self.progress_logger = progress_logger + self.cache_size = cache_size + _parameters = self.__dict__.copy() + _parameters['group_by'] = None # overwriting since these parameters will be passed on to other passes. + if log_stacktrace: + self.log_err = logger.exception + else: + self.log_err = logger.error + + # Non-Root + if _shared_parameters: + self.is_root = False + self._shared_parameters = _shared_parameters + self.__dict__.update(_shared_parameters) + # We are in some pass other than root + progress_timer = None + # Root + else: + self.is_root = True + # Caching the DeepDiff results for dynamic programming + self._distance_cache = LFUCache(cache_size) if cache_size else DummyLFU() + self._stats = { + PASSES_COUNT: 0, + DIFF_COUNT: 0, + DISTANCE_CACHE_HIT_COUNT: 0, + PREVIOUS_DIFF_COUNT: 0, + PREVIOUS_DISTANCE_CACHE_HIT_COUNT: 0, + MAX_PASS_LIMIT_REACHED: False, + MAX_DIFF_LIMIT_REACHED: False, + DISTANCE_CACHE_ENABLED: bool(cache_size), + } + self.hashes = dict_() if hashes is None else hashes + self._numpy_paths = dict_() # if _numpy_paths is None else _numpy_paths + self.group_by_keys = set() # Track keys that originated from group_by operations + self._shared_parameters = { + 'hashes': self.hashes, + '_stats': self._stats, + '_distance_cache': self._distance_cache, + 'group_by_keys': self.group_by_keys, + '_numpy_paths': self._numpy_paths, + _ENABLE_CACHE_EVERY_X_DIFF: self.cache_tuning_sample_size * 10, + } + if log_frequency_in_sec: + # Creating a progress log reporter that runs in a separate thread every log_frequency_in_sec seconds. + progress_timer = RepeatedTimer(log_frequency_in_sec, _report_progress, self._stats, progress_logger) + else: + progress_timer = None + self._parameters = _parameters + self.deephash_parameters = self._get_deephash_params() + self.tree = TreeResult() + self._iterable_opcodes = {} + if group_by and self.is_root: + try: + original_t1 = t1 + t1 = self._group_iterable_to_dict(t1, group_by, item_name='t1') + except (KeyError, ValueError): + pass + else: + try: + t2 = self._group_iterable_to_dict(t2, group_by, item_name='t2') + except (KeyError, ValueError): + t1 = original_t1 + + self.t1 = t1 + self.t2 = t2 + + try: + root = DiffLevel(t1, t2, verbose_level=self.verbose_level) + # _original_type is only used to pass the original type of the data. Currently only used for numpy arrays. + # The reason is that we convert the numpy array to python list and then later for distance calculations + # we convert only the the last dimension of it into numpy arrays. + self._diff(root, parents_ids=frozenset({id(t1)}), _original_type=_original_type) + + if get_deep_distance and view in {TEXT_VIEW, TREE_VIEW}: + self.tree['deep_distance'] = self._get_rough_distance() + + self.tree.remove_empty_keys() + view_results = self._get_view_results(self.view) + if isinstance(view_results, ColoredView): + self.update(view_results.tree) + self._colored_view = view_results + else: + self.update(view_results) + finally: + if self.is_root: + if cache_purge_level: + del self._distance_cache + del self.hashes + del self._shared_parameters + del self._parameters + for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT, + DISTANCE_CACHE_ENABLED): + del self._stats[key] + if progress_timer: + duration = progress_timer.stop() + self._stats['DURATION SEC'] = duration + logger.info('stats {}'.format(self.get_stats())) + if cache_purge_level == 2: + self.__dict__.clear() + + def _get_deephash_params(self): + result = {key: self._parameters.get(key) for key in DEEPHASH_PARAM_KEYS} + result['ignore_repetition'] = not self.report_repetition + result['number_to_string_func'] = self.number_to_string + return result + + def _report_result(self, report_type, change_level, local_tree=None): + """ + Add a detected change to the reference-style result dictionary. + report_type will be added to level. + (We'll create the text-style report from there later.) + :param report_type: A well defined string key describing the type of change. + Examples: "set_item_added", "values_changed" + :param change_level: A DiffLevel object describing the objects in question in their + before-change and after-change object structure. + + :local_tree: None + """ -class RootCanNotBeModified(ValueError): - pass + if not self._skip_this(change_level): + if self._skip_report_for_include_glob(change_level): + return + change_level.report_type = report_type + tree = self.tree if local_tree is None else local_tree + tree[report_type].add(change_level) + def custom_report_result(self, report_type, level, extra_info=None): + """ + Add a detected change to the reference-style result dictionary. + report_type will be added to level. + (We'll create the text-style report from there later.) + + :param report_type: A well defined string key describing the type of change. + Examples: "set_item_added", "values_changed" + :param parent: A DiffLevel object describing the objects in question in their + before-change and after-change object structure. + :param extra_info: A dict that describe this result + :rtype: None + """ -def _add_to_elements(elements, elem, inside): - # Ignore private items - if not elem: - return - if not elem.startswith('__'): - # Handle wildcard tokens (* and **) as-is. - # Unquoted root[*] arrives as bare '*' which matches the string check. - # Quoted root['*'] arrives as "'*'" which does NOT match, so it falls - # through to literal_eval and becomes the plain string '*' — which is - # distinct from the _WildcardToken sentinel and thus treated as a - # literal dict key. - if elem in ('*', '**'): - action = GETATTR if inside == '.' else GET - elements.append((SINGLE_WILDCARD if elem == '*' else MULTI_WILDCARD, action)) - return - remove_quotes = False - if '𝆺𝅥𝅯' in elem or '\\' in elem: - remove_quotes = True - else: - try: - elem = literal_eval(elem) - remove_quotes = False - except (ValueError, SyntaxError): - remove_quotes = True - if remove_quotes and elem[0] == elem[-1] and elem[0] in {'"', "'"}: - elem = elem[1: -1] - action = GETATTR if inside == '.' else GET - elements.append((elem, action)) + if not self._skip_this(level): + if self._skip_report_for_include_glob(level): + return + level.report_type = report_type + level.additional[CUSTOM_FIELD] = extra_info + self.tree[report_type].add(level) + + def _skip_report_for_include_glob(self, level): + """When include_glob_paths is set, _skip_this allows ancestors through for traversal. + This method does a stricter check at report time: only report if the path + actually matches a glob pattern or is a descendant of a matching path, + or if it already matches an exact include_path.""" + if not self.include_glob_paths: + return False + level_path = level.path() + # If exact include_paths already matched, don't skip + if self.include_paths: + if level_path in self.include_paths: + return False + for prefix in self.include_paths: + if prefix in level_path: + return False + # Check glob patterns: match or descendant + for gp in self.include_glob_paths: + if gp.match_or_is_descendant(level_path): + return False + return True + @staticmethod + def _dict_from_slots(object: Any) -> Dict[str, Any]: + def unmangle(attribute: str) -> str: + if attribute.startswith('__') and attribute != '__weakref__': + return '_{type}{attribute}'.format( + type=type(object).__name__, + attribute=attribute + ) + return attribute + + all_slots = [] + + if isinstance(object, type): + mro = object.__mro__ # pragma: no cover. I have not been able to write a test for this case. But we still check for it. + else: + mro = object.__class__.__mro__ -DEFAULT_FIRST_ELEMENT = ('root', GETATTR) + for type_in_mro in mro: + slots = getattr(type_in_mro, '__slots__', None) + if slots: + if isinstance(slots, strings): + all_slots.append(slots) + else: + all_slots.extend(slots) + + return {i: getattr(object, key) for i in all_slots if hasattr(object, key := unmangle(i))} + + def _diff_enum(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), local_tree: Optional[Any]=None) -> None: + t1 = detailed__dict__(level.t1, include_keys=ENUM_INCLUDE_KEYS) + t2 = detailed__dict__(level.t2, include_keys=ENUM_INCLUDE_KEYS) + + self._diff_dict( + level, + parents_ids, + print_as_attribute=True, + override=True, + override_t1=t1, + override_t2=t2, + local_tree=local_tree, + ) + + def _diff_obj(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), is_namedtuple: bool=False, local_tree: Optional[Any]=None, is_pydantic_object: bool=False) -> None: + """Difference of 2 objects""" + processing_error = False + t1: Optional[Dict[str, Any]] = None + t2: Optional[Dict[str, Any]] = None + try: + if is_namedtuple: + t1 = level.t1._asdict() + t2 = level.t2._asdict() + elif is_pydantic_object: + t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables, ignore_keys=model_fields_set) + t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables, ignore_keys=model_fields_set) + elif all('__dict__' in dir(t) for t in level): + t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables) + t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables) + elif all('__slots__' in dir(t) for t in level): + t1 = self._dict_from_slots(level.t1) + t2 = self._dict_from_slots(level.t2) + else: + t1 = {k: v for k, v in getmembers(level.t1) if not callable(v)} + t2 = {k: v for k, v in getmembers(level.t2) if not callable(v)} + except AttributeError: + processing_error = True + if processing_error is True or t1 is None or t2 is None: + self._report_result('unprocessed', level, local_tree=local_tree) + return + self._diff_dict( + level, + parents_ids, + print_as_attribute=True, + override=True, + override_t1=t1, + override_t2=t2, + local_tree=local_tree, + ) + + def _skip_this(self, level: Any) -> bool: + """ + Check whether this comparison should be skipped because one of the objects to compare meets exclusion criteria. + :rtype: bool + """ + level_path = level.path() + skip = False + if self.exclude_paths and level_path in self.exclude_paths: + skip = True + elif self.exclude_glob_paths and any(gp.match(level_path) for gp in self.exclude_glob_paths): + skip = True + if not skip and (self.include_paths or self.include_glob_paths) and level_path != 'root': + skip = True + if self.include_paths: + if level_path in self.include_paths: + skip = False + else: + for prefix in self.include_paths: + if prefix in level_path or level_path in prefix: + skip = False + break + if skip and self.include_glob_paths: + for gp in self.include_glob_paths: + if gp.match_or_is_ancestor(level_path): + skip = False + break + elif self.exclude_regex_paths and any( + [exclude_regex_path.search(level_path) for exclude_regex_path in self.exclude_regex_paths]): + skip = True + elif self.exclude_types_tuple and \ + (isinstance(level.t1, self.exclude_types_tuple) or isinstance(level.t2, self.exclude_types_tuple)): + skip = True + elif self.exclude_obj_callback and \ + (self.exclude_obj_callback(level.t1, level_path) or self.exclude_obj_callback(level.t2, level_path)): + skip = True + elif self.exclude_obj_callback_strict and \ + (self.exclude_obj_callback_strict(level.t1, level_path) and + self.exclude_obj_callback_strict(level.t2, level_path)): + skip = True + elif self.include_obj_callback and level_path != 'root': + skip = True + if (self.include_obj_callback(level.t1, level_path) or self.include_obj_callback(level.t2, level_path)): + skip = False + elif self.include_obj_callback_strict and level_path != 'root': + skip = True + if (self.include_obj_callback_strict(level.t1, level_path) and + self.include_obj_callback_strict(level.t2, level_path)): + skip = False + + return skip + + def _skip_this_key(self, level: Any, key: Any) -> bool: + # if include_paths is not set, than treet every path as included + if self.include_paths is None and self.include_glob_paths is None: + return False + key_path = "{}['{}']".format(level.path(), key) + if self.include_paths: + if key_path in self.include_paths: + return False + if level.path() in self.include_paths: + # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"] + return False + for prefix in self.include_paths: + if key_path in prefix: + # matches as long the prefix is longer than this object key + # eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths + # level+key root['foo'] matches prefix root['foo']['bar'] from include_paths + # level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards + return False + # check if a higher level is included as a whole (=without any sublevels specified) + # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"] + # but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"] + up = level.up + while up is not None: + if up.path() in self.include_paths: + return False + up = up.up + if self.include_glob_paths: + for gp in self.include_glob_paths: + if gp.match_or_is_ancestor(key_path): + return False + return True -@lru_cache(maxsize=1024 * 128) -def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT): - """ - Given a path, it extracts the elements that form the path and their relevant most likely retrieval action. + def _get_clean_to_keys_mapping(self, keys: Any, level: Any) -> Dict[Any, Any]: + """ + Get a dictionary of cleaned value of keys to the keys themselves. + This is mainly used to transform the keys when the type changes of keys should be ignored. - >>> from deepdiff import _path_to_elements - >>> path = "root[4.3].b['a3']" - >>> _path_to_elements(path, root_element=None) - [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')] - """ - if isinstance(path, (tuple, list)): - return path - elements = [] - if root_element: - elements.append(root_element) - elem = '' - inside = False - prev_char = None - path = path[4:] # removing "root from the beginning" - brackets = [] - inside_quotes = False - quote_used = '' - for char in path: - if prev_char == '𝆺𝅥𝅯': - elem += char - elif char in {'"', "'"}: - elem += char - # If we are inside and the quote is not what we expected, the quote is not closing - if not(inside_quotes and quote_used != char): - inside_quotes = not inside_quotes - if inside_quotes: - quote_used = char + TODO: needs also some key conversion for groups of types other than the built-in strings and numbers. + """ + result = dict_() + for key in keys: + if self.ignore_string_type_changes and isinstance(key, bytes): + clean_key = key.decode('utf-8') + elif self.ignore_string_type_changes and isinstance(key, memoryview): + clean_key = key.tobytes().decode('utf-8') + elif self.use_enum_value and isinstance(key, Enum): + clean_key = key.value + elif isinstance(key, numbers): + # Skip type prefixing for keys that originated from group_by operations + if hasattr(self, 'group_by_keys') and key in self.group_by_keys: + if self.significant_digits is None: + clean_key = key + else: + clean_key = self.number_to_string(key, significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) # type: ignore # type: ignore else: - _add_to_elements(elements, elem, inside) - elem = '' - quote_used = '' - elif inside_quotes: - elem += char - elif char == '[': - if inside == '.': - _add_to_elements(elements, elem, inside) - inside = '[' - elem = '' - # we are already inside. The bracket is a part of the word. - elif inside == '[': - elem += char + type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__ + if self.significant_digits is None: + clean_key = key + else: + clean_key = self.number_to_string(key, significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) # type: ignore # type: ignore + clean_key = KEY_TO_VAL_STR.format(type_, clean_key) else: - inside = '[' - brackets.append('[') - elem = '' - elif char == '.': - if inside == '[': - elem += char - elif inside == '.': - _add_to_elements(elements, elem, inside) - elem = '' + clean_key = key + if self.ignore_string_case and isinstance(clean_key, str): + clean_key = clean_key.lower() + if clean_key in result: + logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes' + 'or ignore_numeric_type_changes are set to be true.').format( + key, result[clean_key], level.path())) else: - inside = '.' - elem = '' - elif char == ']': - if brackets and brackets[-1] == '[': - brackets.pop() - if brackets: - elem += char + result[clean_key] = key + return result + + def _diff_dict( + self, + level: Any, + parents_ids: FrozenSet[int]=frozenset([]), + print_as_attribute: bool=False, + override: bool=False, + override_t1: Optional[Any]=None, + override_t2: Optional[Any]=None, + local_tree: Optional[Any]=None, + ) -> None: + """Difference of 2 dictionaries""" + if override: + # for special stuff like custom objects and named tuples we receive preprocessed t1 and t2 + # but must not spoil the chain (=level) with it + t1 = override_t1 + t2 = override_t2 + else: + t1 = level.t1 + t2 = level.t2 + + if print_as_attribute: + item_added_key = "attribute_added" + item_removed_key = "attribute_removed" + rel_class = AttributeRelationship + else: + item_added_key = "dictionary_item_added" + item_removed_key = "dictionary_item_removed" + rel_class = DictRelationship + + if self.ignore_private_variables: + t1_keys = SetOrdered([key for key in t1 if not(isinstance(key, str) and key.startswith('__')) and not self._skip_this_key(level, key)]) + t2_keys = SetOrdered([key for key in t2 if not(isinstance(key, str) and key.startswith('__')) and not self._skip_this_key(level, key)]) + else: + t1_keys = SetOrdered([key for key in t1 if not self._skip_this_key(level, key)]) + t2_keys = SetOrdered([key for key in t2 if not self._skip_this_key(level, key)]) + if self.ignore_string_type_changes or self.ignore_numeric_type_changes or self.ignore_string_case: + t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level) + t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level) + t1_keys = SetOrdered(t1_clean_to_keys.keys()) + t2_keys = SetOrdered(t2_clean_to_keys.keys()) + else: + t1_clean_to_keys = t2_clean_to_keys = None + + t_keys_intersect = t2_keys & t1_keys + t_keys_added = t2_keys - t_keys_intersect + t_keys_removed = t1_keys - t_keys_intersect + + if self.threshold_to_diff_deeper: + if self.exclude_paths or self.exclude_glob_paths: + t_keys_union = {f"{level.path()}[{repr(key)}]" for key in (t2_keys | t1_keys)} + if self.exclude_paths: + t_keys_union -= self.exclude_paths + if self.exclude_glob_paths: + t_keys_union = {k for k in t_keys_union + if not any(gp.match(k) for gp in self.exclude_glob_paths)} + t_keys_union_len = len(t_keys_union) else: - _add_to_elements(elements, elem, inside) - elem = '' - inside = False + t_keys_union_len = len(t2_keys | t1_keys) + if t_keys_union_len > 1 and len(t_keys_intersect) / t_keys_union_len < self.threshold_to_diff_deeper: + self._report_result('values_changed', level, local_tree=local_tree) + return + + for key in t_keys_added: + if self._count_diff() is StopIteration: + return + + key = t2_clean_to_keys[key] if t2_clean_to_keys else key + change_level = level.branch_deeper( + notpresent, + t2[key], + child_relationship_class=rel_class, + child_relationship_param=key, + child_relationship_param2=key, + ) + self._report_result(item_added_key, change_level, local_tree=local_tree) + + for key in t_keys_removed: + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + + key = t1_clean_to_keys[key] if t1_clean_to_keys else key + change_level = level.branch_deeper( + t1[key], + notpresent, + child_relationship_class=rel_class, + child_relationship_param=key, + child_relationship_param2=key, + ) + self._report_result(item_removed_key, change_level, local_tree=local_tree) + + for key in t_keys_intersect: # key present in both dicts - need to compare values + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + + key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key + key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key + item_id = id(t1[key1]) + if parents_ids and item_id in parents_ids: + continue + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + + # Go one level deeper + next_level = level.branch_deeper( + t1[key1], + t2[key2], + child_relationship_class=rel_class, + child_relationship_param=key, + child_relationship_param2=key, + ) + self._diff(next_level, parents_ids_added, local_tree=local_tree) + + def _diff_set(self, level: Any, local_tree: Optional[Any]=None) -> None: + """Difference of sets""" + t1_hashtable = self._create_hashtable(level, 't1') + t2_hashtable = self._create_hashtable(level, 't2') + + t1_hashes = set(t1_hashtable.keys()) + t2_hashes = set(t2_hashtable.keys()) + + hashes_added = t2_hashes - t1_hashes + hashes_removed = t1_hashes - t2_hashes + + items_added = [t2_hashtable[i].item for i in hashes_added] + items_removed = [t1_hashtable[i].item for i in hashes_removed] + + for item in items_added: + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + + change_level = level.branch_deeper( + notpresent, item, child_relationship_class=SetRelationship) + self._report_result('set_item_added', change_level, local_tree=local_tree) + + for item in items_removed: + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + + change_level = level.branch_deeper( + item, notpresent, child_relationship_class=SetRelationship) + self._report_result('set_item_removed', change_level, local_tree=local_tree) + + @staticmethod + def _iterables_subscriptable(t1: Any, t2: Any) -> bool: + try: + if getattr(t1, '__getitem__') and getattr(t2, '__getitem__'): + return True + else: # pragma: no cover + return False # should never happen + except AttributeError: + return False + + def _diff_iterable(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), _original_type: Optional[type]=None, local_tree: Optional[Any]=None) -> None: + """Difference of iterables""" + if (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order: + self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree) else: - elem += char - prev_char = char - if elem: - _add_to_elements(elements, elem, inside) - return tuple(elements) - - -def _get_nested_obj(obj, elements, next_element=None): - for (elem, action) in elements: - check_elem(elem) - if action == GET: - obj = obj[elem] - elif action == GETATTR: - obj = getattr(obj, elem) - return obj - - -def _guess_type(elements, elem, index, next_element): - # If we are not at the last elements - if index < len(elements) - 1: - # We assume it is a nested dictionary not a nested list - return {} - if isinstance(next_element, int): - return [] - return {} - - -def check_elem(elem): - if isinstance(elem, str) and elem.startswith("__") and elem.endswith("__"): - raise ValueError("traversing dunder attributes is not allowed") - - -def _get_nested_obj_and_force(obj, elements, next_element=None): - prev_elem = None - prev_action = None - prev_obj = obj - for index, (elem, action) in enumerate(elements): - check_elem(elem) - _prev_obj = obj - if action == GET: - try: - obj = obj[elem] - prev_obj = _prev_obj - except KeyError: - obj[elem] = _guess_type(elements, elem, index, next_element) - obj = obj[elem] - prev_obj = _prev_obj - except IndexError: - if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj): - obj.extend([None] * (elem - len(obj))) - obj.append(_guess_type(elements, elem, index), next_element) - obj = obj[-1] - prev_obj = _prev_obj - elif isinstance(obj, list) and len(obj) == 0 and prev_elem: - # We ran into an empty list that should have been a dictionary - # We need to change it from an empty list to a dictionary - obj = {elem: _guess_type(elements, elem, index, next_element)} - if prev_action == GET: - prev_obj[prev_elem] = obj + self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type, local_tree=local_tree) + + def _compare_in_order( + self, level, + t1_from_index=None, t1_to_index=None, + t2_from_index=None, t2_to_index=None + ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]: + """ + Default compare if `iterable_compare_func` is not provided. + This will compare in sequence order. + """ + if t1_from_index is None: + return [((i, i), (x, y)) for i, (x, y) in enumerate( + zip_longest( + level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))] + else: + t1_chunk = level.t1[t1_from_index:t1_to_index] + t2_chunk = level.t2[t2_from_index:t2_to_index] + return [((i + t1_from_index, i + t2_from_index), (x, y)) for i, (x, y) in enumerate( + zip_longest( + t1_chunk, t2_chunk, fillvalue=ListItemRemovedOrAdded))] + + def _get_matching_pairs( + self, level, + t1_from_index=None, t1_to_index=None, + t2_from_index=None, t2_to_index=None + ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]: + """ + Given a level get matching pairs. This returns list of two tuples in the form: + [ + (t1 index, t2 index), (t1 item, t2 item) + ] + + This will compare using the passed in `iterable_compare_func` if available. + Default it to compare in order + """ + + if self.iterable_compare_func is None: + # Match in order if there is no compare function provided + return self._compare_in_order( + level, + t1_from_index=t1_from_index, t1_to_index=t1_to_index, + t2_from_index=t2_from_index, t2_to_index=t2_to_index, + ) + try: + matches = [] + y_matched = set() + y_index_matched = set() + for i, x in enumerate(level.t1): + x_found = False + for j, y in enumerate(level.t2): + + if(j in y_index_matched): + # This ensures a one-to-one relationship of matches from t1 to t2. + # If y this index in t2 has already been matched to another x + # it cannot have another match, so just continue. + continue + + if(self.iterable_compare_func(x, y, level)): + deep_hash = DeepHash(y, + hashes=self.hashes, + apply_hash=True, + **self.deephash_parameters, + ) + y_index_matched.add(j) + y_matched.add(deep_hash[y]) + matches.append(((i, j), (x, y))) + x_found = True + break + + if(not x_found): + matches.append(((i, -1), (x, ListItemRemovedOrAdded))) + for j, y in enumerate(level.t2): + + deep_hash = DeepHash(y, + hashes=self.hashes, + apply_hash=True, + **self.deephash_parameters, + ) + if(deep_hash[y] not in y_matched): + matches.append(((-1, j), (ListItemRemovedOrAdded, y))) + return matches + except CannotCompare: + return self._compare_in_order( + level, + t1_from_index=t1_from_index, t1_to_index=t1_to_index, + t2_from_index=t2_from_index, t2_to_index=t2_to_index + ) + + def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None): + # We're handling both subscriptable and non-subscriptable iterables. Which one is it? + subscriptable = self._iterables_subscriptable(level.t1, level.t2) + if subscriptable: + child_relationship_class = SubscriptableIterableRelationship + else: + child_relationship_class = NonSubscriptableIterableRelationship + + if ( + not self.zip_ordered_iterables + and isinstance(level.t1, Sequence) + and isinstance(level.t2, Sequence) + and self._all_values_basic_hashable(level.t1) + and self._all_values_basic_hashable(level.t2) + and self.iterable_compare_func is None + ): + local_tree_pass = TreeResult() + opcodes_with_values = self._diff_ordered_iterable_by_difflib( + level, + parents_ids=parents_ids, + _original_type=_original_type, + child_relationship_class=child_relationship_class, + local_tree=local_tree_pass, + ) + # Sometimes DeepDiff's old iterable diff does a better job than DeepDiff + if len(local_tree_pass) > 1: + local_tree_pass2 = TreeResult() + self._diff_by_forming_pairs_and_comparing_one_by_one( + level, + parents_ids=parents_ids, + _original_type=_original_type, + child_relationship_class=child_relationship_class, + local_tree=local_tree_pass2, + ) + if len(local_tree_pass) >= len(local_tree_pass2): + local_tree_pass = local_tree_pass2 + else: + self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values + for report_type, levels in local_tree_pass.items(): + if levels: + self.tree[report_type] |= levels + else: + self._diff_by_forming_pairs_and_comparing_one_by_one( + level, + parents_ids=parents_ids, + _original_type=_original_type, + child_relationship_class=child_relationship_class, + local_tree=local_tree, + ) + + def _all_values_basic_hashable(self, iterable: Iterable[Any]) -> bool: + """ + Are all items basic hashable types? + Or there are custom types too? + """ + + # We don't want to exhaust a generator + if isinstance(iterable, types.GeneratorType): + return False + for item in iterable: + if not isinstance(item, basic_types): + return False + return True + + def _diff_by_forming_pairs_and_comparing_one_by_one( + self, level, local_tree, parents_ids=frozenset(), + _original_type=None, child_relationship_class=None, + t1_from_index=None, t1_to_index=None, + t2_from_index=None, t2_to_index=None, + ): + for (i, j), (x, y) in self._get_matching_pairs( + level, + t1_from_index=t1_from_index, t1_to_index=t1_to_index, + t2_from_index=t2_from_index, t2_to_index=t2_to_index + ): + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + + reference_param1 = i + reference_param2 = j + if y is ListItemRemovedOrAdded: # item removed completely + change_level = level.branch_deeper( + x, + notpresent, + child_relationship_class=child_relationship_class, + child_relationship_param=reference_param1, + child_relationship_param2=reference_param2, + ) + self._report_result('iterable_item_removed', change_level, local_tree=local_tree) + + elif x is ListItemRemovedOrAdded: # new item added + change_level = level.branch_deeper( + notpresent, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=reference_param1, + child_relationship_param2=reference_param2, + ) + self._report_result('iterable_item_added', change_level, local_tree=local_tree) + + else: # check if item value has changed + if (i != j and ((x == y) or self.iterable_compare_func)): + # Item moved + change_level = level.branch_deeper( + x, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=reference_param1, + child_relationship_param2=reference_param2 + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + if self.iterable_compare_func: + # Mark additional context denoting that we have moved an item. + # This will allow for correctly setting paths relative to t2 when using an iterable_compare_func + level.additional["moved"] = True + else: - setattr(prev_obj, prev_elem, obj) - obj = obj[elem] - elif action == GETATTR: - obj = getattr(obj, elem) - prev_obj = _prev_obj - prev_elem = elem - prev_action = action - return obj + continue + + item_id = id(x) + if parents_ids and item_id in parents_ids: + continue + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + + # Go one level deeper + next_level = level.branch_deeper( + x, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=reference_param1, + child_relationship_param2=reference_param2 + ) + self._diff(next_level, parents_ids_added, local_tree=local_tree) + + def _diff_ordered_iterable_by_difflib( + self, level, local_tree, parents_ids=frozenset(), _original_type=None, child_relationship_class=None, + ): + + seq = difflib.SequenceMatcher(isjunk=None, a=level.t1, b=level.t2, autojunk=False) + + opcodes = seq.get_opcodes() + opcodes_with_values = [] + + # TODO: this logic should be revisted so we detect reverse operations + # like when a replacement happens at index X and a reverse replacement happens at index Y + # in those cases we have a "iterable_item_moved" operation. + for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes: + if tag == 'equal': + opcodes_with_values.append(Opcode( + tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, + )) + continue + # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format( + # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index])) + + opcodes_with_values.append(Opcode( + tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, + old_values = level.t1[t1_from_index: t1_to_index], + new_values = level.t2[t2_from_index: t2_to_index], + )) + + if tag == 'replace': + self._diff_by_forming_pairs_and_comparing_one_by_one( + level, local_tree=local_tree, parents_ids=parents_ids, + _original_type=_original_type, child_relationship_class=child_relationship_class, + t1_from_index=t1_from_index, t1_to_index=t1_to_index, + t2_from_index=t2_from_index, t2_to_index=t2_to_index, + ) + elif tag == 'delete': + for index, x in enumerate(level.t1[t1_from_index:t1_to_index]): + change_level = level.branch_deeper( + x, + notpresent, + child_relationship_class=child_relationship_class, + child_relationship_param=index + t1_from_index, + child_relationship_param2=index + t1_from_index, + ) + self._report_result('iterable_item_removed', change_level, local_tree=local_tree) + elif tag == 'insert': + for index, y in enumerate(level.t2[t2_from_index:t2_to_index]): + change_level = level.branch_deeper( + notpresent, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=index + t2_from_index, + child_relationship_param2=index + t2_from_index, + ) + self._report_result('iterable_item_added', change_level, local_tree=local_tree) + return opcodes_with_values + + + def _diff_str(self, level, local_tree=None): + """Compare strings""" + if self.ignore_string_case: + level.t1 = level.t1.lower() + level.t2 = level.t2.lower() + + if type(level.t1) == type(level.t2) and level.t1 == level.t2: # NOQA + return + # do we add a diff for convenience? + do_diff = True + t1_str = level.t1 + t2_str = level.t2 -def extract(obj, path): - """ - Get the item from obj based on path. - - Example: - - >>> from deepdiff import extract - >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]} - >>> path = "root[1][0]['2']" - >>> extract(obj, path) - 'b' - - Note that you can use extract in conjunction with DeepDiff results - or even with the search and :ref:`deepsearch_label` modules. For example: - - >>> from deepdiff import grep - >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]} - >>> result = obj | grep(5) - >>> result - {'matched_values': ['root[2][1]']} - >>> result['matched_values'][0] - 'root[2][1]' - >>> path = result['matched_values'][0] - >>> extract(obj, path) - 5 - - - .. note:: - Note that even if DeepDiff tried gives you a path to an item in a set, - there is no such thing in Python and hence you will get an error trying - to extract that item from a set. - If you want to be able to get items from sets, use the SetOrdered module - to generate the sets. - In fact Deepdiff uses SetOrdered as a dependency. - - >>> from deepdiff import grep, extract - >>> obj = {"a", "b"} - >>> obj | grep("b") - Set item detected in the path.'set' objects do NOT support indexing. But DeepSearch will still report a path. - {'matched_values': SetOrdered(['root[0]'])} - >>> extract(obj, 'root[0]') - Traceback (most recent call last): - File "", line 1, in - File "deepdiff/deepdiff/path.py", line 126, in extract - return _get_nested_obj(obj, elements) - File "deepdiff/deepdiff/path.py", line 84, in _get_nested_obj - obj = obj[elem] - TypeError: 'set' object is not subscriptable - >>> from orderly_set import SetOrdered - >>> obj = SetOrdered(["a", "b"]) - >>> extract(obj, 'root[0]') - 'a' + if isinstance(level.t1, memoryview): + try: + t1_str = level.t1.tobytes().decode('ascii') + except UnicodeDecodeError: + do_diff = False + elif isinstance(level.t1, bytes_type): + try: + t1_str = level.t1.decode('ascii') + except UnicodeDecodeError: + do_diff = False - """ - elements = _path_to_elements(path, root_element=None) - return _get_nested_obj(obj, elements) + if isinstance(level.t2, memoryview): + try: + t2_str = level.t2.tobytes().decode('ascii') + except UnicodeDecodeError: + do_diff = False + elif isinstance(level.t2, bytes_type): + try: + t2_str = level.t2.decode('ascii') + except UnicodeDecodeError: + do_diff = False + if isinstance(level.t1, Enum): + t1_str = level.t1.value -def parse_path(path, root_element=DEFAULT_FIRST_ELEMENT, include_actions=False): - """ - Parse a path to a format that is machine readable + if isinstance(level.t2, Enum): + t2_str = level.t2.value - **Parameters** + if t1_str == t2_str: + return - path : A string - The path string such as "root[1][2]['age']" + if do_diff: + if '\n' in t1_str or isinstance(t2_str, str) and '\n' in t2_str: + diff = difflib.unified_diff( + t1_str.splitlines(), t2_str.splitlines(), lineterm='') + diff = list(diff) + if diff: + level.additional['diff'] = '\n'.join(diff) + + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_tuple(self, level, parents_ids, local_tree=None): + # Checking to see if it has _fields. Which probably means it is a named + # tuple. + try: + level.t1._asdict + # It must be a normal tuple + except AttributeError: + self._diff_iterable(level, parents_ids, local_tree=local_tree) + # We assume it is a namedtuple then + else: + self._diff_obj(level, parents_ids, is_namedtuple=True, local_tree=local_tree) - root_element: string, default='root' - What the root is called in the path. + def _add_hash(self, hashes, item_hash, item, i): + if item_hash in hashes: + hashes[item_hash].indexes.append(i) + else: + hashes[item_hash] = IndexedHash(indexes=[i], item=item) - include_actions: boolean, default=False - If True, we return the action required to retrieve the item at each element of the path. + def _create_hashtable(self, level, t): + """Create hashtable of {item_hash: (indexes, item)}""" + obj = getattr(level, t) - **Examples** + local_hashes = dict_() + for (i, item) in enumerate(obj): + try: + parent = "{}[{}]".format(level.path(), i) + # Note: in the DeepDiff we only calculate the hash of items when we have to. + # So self.hashes does not include hashes of all objects in t1 and t2. + # It only includes the ones needed when comparing iterables. + # The self.hashes dictionary gets shared between different runs of DeepHash + # So that any object that is already calculated to have a hash is not re-calculated. + deep_hash = DeepHash( + item, + hashes=self.hashes, + parent=parent, + apply_hash=True, + **self.deephash_parameters, + ) + except UnicodeDecodeError as err: + err.reason = f"Can not produce a hash for {level.path()}: {err.reason}" + raise + except NotImplementedError: + raise + # except Exception as e: # pragma: no cover + # logger.error("Can not produce a hash for %s." + # "Not counting this object.\n %s" % + # (level.path(), e)) + else: + try: + item_hash = deep_hash[item] + except KeyError: + pass + else: + if item_hash is unprocessed: # pragma: no cover + self.log_err("Item %s was not processed while hashing " + "thus not counting this object." % + level.path()) + else: + self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i) + + # Also we hash the iterables themselves too so that we can later create cache keys from those hashes. + DeepHash( + obj, + hashes=self.hashes, + parent=level.path(), + apply_hash=True, + **self.deephash_parameters, + ) + return local_hashes - >>> from deepdiff import parse_path - >>> parse_path("root[1][2]['age']") - [1, 2, 'age'] - >>> parse_path("root[1][2]['age']", include_actions=True) - [{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}] - >>> - >>> parse_path("root['joe'].age") - ['joe', 'age'] - >>> parse_path("root['joe'].age", include_actions=True) - [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}] + @staticmethod + @lru_cache(maxsize=2028) + def _get_distance_cache_key(added_hash, removed_hash): + key1, key2 = (added_hash, removed_hash) if added_hash > removed_hash else (removed_hash, added_hash) + if isinstance(key1, int): + # If the hash function produces integers we convert them to hex values. + # This was used when the default hash function was Murmur3 128bit which produces integers. + key1 = hex(key1).encode('utf-8') + key2 = hex(key2).encode('utf-8') + elif isinstance(key1, str): + key1 = key1.encode('utf-8') + key2 = key2.encode('utf-8') + return key1 + b'--' + key2 + b'dc' + + def _get_rough_distance_of_hashed_objs( + self, added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type=None): + # We need the rough distance between the 2 objects to see if they qualify to be pairs or not + _distance = cache_key = None + if self._stats[DISTANCE_CACHE_ENABLED]: + cache_key = self._get_distance_cache_key(added_hash, removed_hash) + if cache_key in self._distance_cache: + self._stats[DISTANCE_CACHE_HIT_COUNT] += 1 + _distance = self._distance_cache.get(cache_key) + if _distance is None: + # We can only cache the rough distance and not the actual diff result for reuse. + # The reason is that we have modified the parameters explicitly so they are different and can't + # be used for diff reporting + diff = DeepDiff( + removed_hash_obj.item, added_hash_obj.item, + _parameters=self._parameters, + _shared_parameters=self._shared_parameters, + view=DELTA_VIEW, + _original_type=_original_type, + iterable_compare_func=self.iterable_compare_func, + ) + _distance = diff._get_rough_distance() + if cache_key and self._stats[DISTANCE_CACHE_ENABLED]: + self._distance_cache.set(cache_key, value=_distance) + return _distance + + def _get_most_in_common_pairs_in_iterables( + self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type): + """ + Get the closest pairs between items that are removed and items that are added. - """ + returns a dictionary of hashes that are closest to each other. + The dictionary is going to be symmetrical so any key will be a value too and otherwise. - result = _path_to_elements(path, root_element=root_element) - result = iter(result) - if root_element: - next(result) # We don't want the root item - if include_actions is False: - return [i[0] for i in result] - return [{'element': i[0], 'action': i[1]} for i in result] - - -def stringify_element(param, quote_str=None): - has_quote = "'" in param - has_double_quote = '"' in param - if has_quote and has_double_quote and not quote_str: - new_param = [] - for char in param: - if char in {'"', "'"}: - new_param.append('𝆺𝅥𝅯') - new_param.append(char) - result = '"' + ''.join(new_param) + '"' - elif has_quote: - result = f'"{param}"' - elif has_double_quote: - result = f"'{param}'" - else: - result = param if quote_str is None else quote_str.format(param) - return result - - -def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"): - """ - Gets the path as an string. + Note that due to the current reporting structure in DeepDiff, we don't compare an item that + was added to an item that is in both t1 and t2. - For example [1, 2, 'age'] should become - root[1][2]['age'] - """ - if not path: - return root_element[0] - result = [root_element[0]] - has_actions = False - try: - if path[0][1] in {GET, GETATTR}: - has_actions = True - except (KeyError, IndexError, TypeError): - pass - if not has_actions: - path = [(i, GET) for i in path] - path[0] = (path[0][0], root_element[1]) # The action for the first element might be a GET or GETATTR. We update the action based on the root_element. - for element, action in path: - if isinstance(element, str) and action == GET: - element = stringify_element(element, quote_str) - if action == GET: - result.append(f"[{element}]") + For example + + [{1, 2}, {4, 5, 6}] + [{1, 2}, {1, 2, 3}] + + is only compared between {4, 5, 6} and {1, 2, 3} even though technically {1, 2, 3} is + just one item different than {1, 2} + + Perhaps in future we can have a report key that is item duplicated and modified instead of just added. + """ + cache_key = None + if self._stats[DISTANCE_CACHE_ENABLED]: + cache_key = combine_hashes_lists(items=[hashes_added, hashes_removed], prefix='pairs_cache') + if cache_key in self._distance_cache: + return self._distance_cache.get(cache_key).copy() + + # A dictionary of hashes to distances and each distance to an ordered set of hashes. + # It tells us about the distance of each object from other objects. + # And the objects with the same distances are grouped together in an ordered set. + # It also includes a "max" key that is just the value of the biggest current distance in the + # most_in_common_pairs dictionary. + def defaultdict_orderedset(): + return defaultdict(SetOrdered) + most_in_common_pairs = defaultdict(defaultdict_orderedset) + pairs = dict_() + + pre_calced_distances = None + if hashes_added and hashes_removed and np and len(hashes_added) > 1 and len(hashes_removed) > 1: + # pre-calculates distances ONLY for 1D arrays whether an _original_type + # was explicitly passed or a homogeneous array is detected. + # Numpy is needed for this optimization. + pre_calced_distances = self._precalculate_numpy_arrays_distance( + hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) + + if hashes_added and hashes_removed \ + and self.iterable_compare_func \ + and len(hashes_added) > 0 and len(hashes_removed) > 0: + pre_calced_distances = self._precalculate_distance_by_custom_compare_func( + hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) + + for added_hash in hashes_added: + for removed_hash in hashes_removed: + added_hash_obj = t2_hashtable[added_hash] + removed_hash_obj = t1_hashtable[removed_hash] + + # Loop is detected + if id(removed_hash_obj.item) in parents_ids: + continue + + _distance = None + if pre_calced_distances: + _distance = pre_calced_distances.get("{}--{}".format(added_hash, removed_hash)) + if _distance is None: + _distance = self._get_rough_distance_of_hashed_objs( + added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type) + # Left for future debugging + # print(f'{Fore.RED}distance of {added_hash_obj.item} and {removed_hash_obj.item}: {_distance}{Style.RESET_ALL}') + # Discard potential pairs that are too far. + if _distance >= self.cutoff_distance_for_pairs: + continue + pairs_of_item = most_in_common_pairs[added_hash] + pairs_of_item[_distance].add(removed_hash) + used_to_hashes = set() + + distances_to_from_hashes = defaultdict(SetOrdered) + for from_hash, distances_to_to_hashes in most_in_common_pairs.items(): + # del distances_to_to_hashes['max'] + for dist in distances_to_to_hashes: + distances_to_from_hashes[dist].add(from_hash) + + for dist in sorted(distances_to_from_hashes.keys()): + from_hashes = distances_to_from_hashes[dist] + while from_hashes: + from_hash = from_hashes.pop() + if from_hash not in used_to_hashes: + to_hashes = most_in_common_pairs[from_hash][dist] + while to_hashes: + to_hash = to_hashes.pop() + if to_hash not in used_to_hashes: + used_to_hashes.add(from_hash) + used_to_hashes.add(to_hash) + # Left for future debugging: + # print(f'{bcolors.FAIL}Adding {t2_hashtable[from_hash].item} as a pairs of {t1_hashtable[to_hash].item} with distance of {dist}{bcolors.ENDC}') + pairs[from_hash] = to_hash + + inverse_pairs = {v: k for k, v in pairs.items()} + pairs.update(inverse_pairs) + if cache_key and self._stats[DISTANCE_CACHE_ENABLED]: + self._distance_cache.set(cache_key, value=pairs) + return pairs.copy() + + def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None, local_tree=None): + """Diff of hashable or unhashable iterables. Only used when ignoring the order.""" + + full_t1_hashtable = self._create_hashtable(level, 't1') + full_t2_hashtable = self._create_hashtable(level, 't2') + t1_hashes = SetOrdered(full_t1_hashtable.keys()) + t2_hashes = SetOrdered(full_t2_hashtable.keys()) + hashes_added = t2_hashes - t1_hashes + hashes_removed = t1_hashes - t2_hashes + + # Deciding whether to calculate pairs or not. + if (len(hashes_added) + len(hashes_removed)) / (len(full_t1_hashtable) + len(full_t2_hashtable) + 1) > self.cutoff_intersection_for_pairs: + get_pairs = False else: - result.append(f".{element}") - return ''.join(result) + get_pairs = True + # reduce the size of hashtables + if self.report_repetition: + t1_hashtable = full_t1_hashtable + t2_hashtable = full_t2_hashtable + else: + t1_hashtable = {k: v for k, v in full_t1_hashtable.items() if k in hashes_removed} + t2_hashtable = {k: v for k, v in full_t2_hashtable.items() if k in hashes_added} + if self._stats[PASSES_COUNT] < self.max_passes and get_pairs: + self._stats[PASSES_COUNT] += 1 + pairs = self._get_most_in_common_pairs_in_iterables( + hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type) + elif get_pairs: + if not self._stats[MAX_PASS_LIMIT_REACHED]: + self._stats[MAX_PASS_LIMIT_REACHED] = True + logger.warning(MAX_PASSES_REACHED_MSG.format(self.max_passes)) + pairs = dict_() + else: + pairs = dict_() + + def get_other_pair(hash_value, in_t1=True): + """ + Gets the other paired indexed hash item to the hash_value in the pairs dictionary + in_t1: are we looking for the other pair in t1 or t2? + """ + if in_t1: + hashtable = t1_hashtable + the_other_hashes = hashes_removed + else: + hashtable = t2_hashtable + the_other_hashes = hashes_added + other = pairs.pop(hash_value, notpresent) + if other is notpresent: + other = notpresent_indexed + else: + # The pairs are symmetrical. + # removing the other direction of pair + # so it does not get used. + del pairs[other] + the_other_hashes.remove(other) + other = hashtable[other] + return other + + if self.report_repetition: + for hash_value in hashes_added: + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition (when report_repetition=False). + other = get_other_pair(hash_value) + item_id = id(other.item) + indexes = t2_hashtable[hash_value].indexes if other.item is notpresent else other.indexes + # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. + # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). + # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. + if len(t2_hashtable[hash_value].indexes) == 1: + index2 = t2_hashtable[hash_value].indexes[0] + else: + index2 = None + for i in indexes: + change_level = level.branch_deeper( + other.item, + t2_hashtable[hash_value].item, + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=i, + child_relationship_param2=index2, + ) + if other.item is notpresent: + self._report_result('iterable_item_added', change_level, local_tree=local_tree) + else: + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + self._diff(change_level, parents_ids_added, local_tree=local_tree) + for hash_value in hashes_removed: + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + other = get_other_pair(hash_value, in_t1=False) + item_id = id(other.item) + # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. + # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). + # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. + if other.item is notpresent or len(other.indexes > 1): + index2 = None + else: + index2 = other.indexes[0] + for i in t1_hashtable[hash_value].indexes: + change_level = level.branch_deeper( + t1_hashtable[hash_value].item, + other.item, + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=i, + child_relationship_param2=index2, + ) + if other.item is notpresent: + self._report_result('iterable_item_removed', change_level, local_tree=local_tree) + else: + # I was not able to make a test case for the following 2 lines since the cases end up + # getting resolved above in the hashes_added calcs. However I am leaving these 2 lines + # in case things change in future. + parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. + self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. + + items_intersect = t2_hashes.intersection(t1_hashes) + + for hash_value in items_intersect: + t1_indexes = t1_hashtable[hash_value].indexes + t2_indexes = t2_hashtable[hash_value].indexes + t1_indexes_len = len(t1_indexes) + t2_indexes_len = len(t2_indexes) + if t1_indexes_len != t2_indexes_len: # this is a repetition change! + # create "change" entry, keep current level untouched to handle further changes + repetition_change_level = level.branch_deeper( + t1_hashtable[hash_value].item, + t2_hashtable[hash_value].item, # nb: those are equal! + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=t1_hashtable[hash_value] + .indexes[0]) + repetition_change_level.additional['repetition'] = RemapDict( + old_repeat=t1_indexes_len, + new_repeat=t2_indexes_len, + old_indexes=t1_indexes, + new_indexes=t2_indexes) + self._report_result('repetition_change', + repetition_change_level, local_tree=local_tree) -# Regex to detect wildcard segments in a raw path string. -# Matches [*], [**], .*, .** that are NOT inside quotes. -_WILDCARD_RE = re.compile( - r'\[\*\*?\]' # [*] or [**] - r'|\.\*\*?(?=[.\[]|$)' # .* or .** followed by . or [ or end of string -) + else: + for hash_value in hashes_added: + if self._count_diff() is StopIteration: + return + other = get_other_pair(hash_value) + item_id = id(other.item) + index = t2_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0] + index2 = t2_hashtable[hash_value].indexes[0] + change_level = level.branch_deeper( + other.item, + t2_hashtable[hash_value].item, + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=index, + child_relationship_param2=index2, + ) + if other.item is notpresent: + self._report_result('iterable_item_added', change_level, local_tree=local_tree) + else: + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + self._diff(change_level, parents_ids_added, local_tree=local_tree) + + for hash_value in hashes_removed: + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. + other = get_other_pair(hash_value, in_t1=False) + item_id = id(other.item) + index = t1_hashtable[hash_value].indexes[0] + index2 = t1_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0] + change_level = level.branch_deeper( + t1_hashtable[hash_value].item, + other.item, + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=index, + child_relationship_param2=index2, + ) + if other.item is notpresent: + self._report_result('iterable_item_removed', change_level, local_tree=local_tree) + else: + # Just like the case when report_repetition = True, these lines never run currently. + # However they will stay here in case things change in future. + parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. + self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. + + def _diff_booleans(self, level, local_tree=None): + if level.t1 != level.t2: + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_numbers(self, level, local_tree=None, report_type_change=True): + """Diff Numbers""" + if report_type_change: + t1_type = "number" if self.ignore_numeric_type_changes else level.t1.__class__.__name__ + t2_type = "number" if self.ignore_numeric_type_changes else level.t2.__class__.__name__ + else: + t1_type = t2_type = '' + + if self.use_log_scale: + if not logarithmic_similarity(level.t1, level.t2, threshold=self.log_scale_similarity_threshold): + self._report_result('values_changed', level, local_tree=local_tree) + elif self.math_epsilon is not None: + if not is_close(level.t1, level.t2, abs_tol=self.math_epsilon): + self._report_result('values_changed', level, local_tree=local_tree) + elif self.significant_digits is None: + if level.t1 != level.t2: + self._report_result('values_changed', level, local_tree=local_tree) + else: + # Bernhard10: I use string formatting for comparison, to be consistent with usecases where + # data is read from files that were previously written from python and + # to be consistent with on-screen representation of numbers. + # Other options would be abs(t1-t2)<10**-self.significant_digits + # or math.is_close (python3.5+) + # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001 + # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number) + t1_s = self.number_to_string(level.t1, + significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) # type: ignore + t2_s = self.number_to_string(level.t2, + significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) # type: ignore + + t1_s = KEY_TO_VAL_STR.format(t1_type, t1_s) + t2_s = KEY_TO_VAL_STR.format(t2_type, t2_s) + if t1_s != t2_s: + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_ipranges(self, level, local_tree=None): + """Diff IP ranges""" + if str(level.t1) != str(level.t2): + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_datetime(self, level, local_tree=None): + """Diff DateTimes""" + level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone) + level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone) + + if level.t1 != level.t2: + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_time(self, level, local_tree=None): + """Diff DateTimes""" + if self.truncate_datetime: + level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone) + level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone) + + if level.t1 != level.t2: + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_uuids(self, level, local_tree=None): + """Diff UUIDs""" + if level.t1.int != level.t2.int: + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None): + """Diff numpy arrays""" + if level.path() not in self._numpy_paths: + self._numpy_paths[level.path()] = get_type(level.t2).__name__ + if np is None: + # This line should never be run. If it is ever called means the type check detected a numpy array + # which means numpy module needs to be available. So np can't be None. + raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover + + if (self.ignore_order_func and not self.ignore_order_func(level)) or not self.ignore_order: + # fast checks + if self.significant_digits is None: + if np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality): + return # all good + else: + try: + np.testing.assert_almost_equal(level.t1, level.t2, decimal=self.significant_digits) + except TypeError: + np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality) + except AssertionError: + pass # do detailed checking below + else: + return # all good + + # compare array meta-data + _original_type = level.t1.dtype + if level.t1.shape != level.t2.shape: + # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier. + # They will be converted back to Numpy at their final dimension. + level.t1 = level.t1.tolist() + level.t2 = level.t2.tolist() + self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) + else: + # metadata same -- the difference is in the content + shape = level.t1.shape + dimensions = len(shape) + if dimensions == 1: + self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) + elif (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order: + # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier. + # They will be converted back to Numpy at their final dimension. + level.t1 = level.t1.tolist() + level.t2 = level.t2.tolist() + self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree) + else: + for (t1_path, t1_row), (t2_path, t2_row) in zip( + get_numpy_ndarray_rows(level.t1, shape), + get_numpy_ndarray_rows(level.t2, shape)): + + new_level = level.branch_deeper( + t1_row, + t2_row, + child_relationship_class=NumpyArrayRelationship, + child_relationship_param=t1_path, + child_relationship_param2=t2_path, + ) + + self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type, local_tree=local_tree) + + def _diff_types(self, level, local_tree=None): + """Diff types""" + level.report_type = 'type_changes' + self._report_result('type_changes', level, local_tree=local_tree) + + def _count_diff(self): + if (self.max_diffs is not None and self._stats[DIFF_COUNT] > self.max_diffs): + if not self._stats[MAX_DIFF_LIMIT_REACHED]: + self._stats[MAX_DIFF_LIMIT_REACHED] = True + logger.warning(MAX_DIFFS_REACHED_MSG.format(self.max_diffs)) + return StopIteration + self._stats[DIFF_COUNT] += 1 + if self.cache_size and self.cache_tuning_sample_size: + self._auto_tune_cache() + + def _auto_tune_cache(self): + take_sample = (self._stats[DIFF_COUNT] % self.cache_tuning_sample_size == 0) + if self.cache_tuning_sample_size: + if self._stats[DISTANCE_CACHE_ENABLED]: + if take_sample: + self._auto_off_cache() + # Turn on the cache once in a while + elif self._stats[DIFF_COUNT] % self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] == 0: + self.progress_logger('Re-enabling the distance and level caches.') + # decreasing the sampling frequency + self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] *= 10 + self._stats[DISTANCE_CACHE_ENABLED] = True + if take_sample: + for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT): + self._stats[key] = self._stats[key[9:]] + + def _auto_off_cache(self): + """ + Auto adjust the cache based on the usage + """ + if self._stats[DISTANCE_CACHE_ENABLED]: + angle = (self._stats[DISTANCE_CACHE_HIT_COUNT] - self._stats['PREVIOUS {}'.format(DISTANCE_CACHE_HIT_COUNT)]) / (self._stats[DIFF_COUNT] - self._stats[PREVIOUS_DIFF_COUNT]) + if angle < self.CACHE_AUTO_ADJUST_THRESHOLD: + self._stats[DISTANCE_CACHE_ENABLED] = False + self.progress_logger('Due to minimal cache hits, {} is disabled.'.format('distance cache')) + def _use_custom_operator(self, level): + """ + For each level we check all custom operators. + If any one of them was a match for the level, we run the diff of the operator. + If the operator returned True, the operator must have decided these objects should not + be compared anymore. It might have already reported their results. + In that case the report will appear in the final results of this diff. + Otherwise basically the 2 objects in the level are being omitted from the results. + """ -def path_has_wildcard(path): - """Check if a path string contains wildcard segments (* or **).""" - return bool(_WILDCARD_RE.search(path)) + for operator in self.custom_operators: + if operator.match(level): + prevent_default = operator.give_up_diffing(level=level, diff_instance=self) + if prevent_default: + return True + return False -class GlobPathMatcher: - """Pre-compiled matcher for a single glob pattern path. + def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None): + """ + The main diff method - Parses a pattern like ``root['users'][*]['password']`` into segments - and matches concrete path strings against it. + **parameters** - ``*`` matches exactly one path segment (any key, index, or attribute). - ``**`` matches zero or more path segments. - """ + level: the tree level or tree node + parents_ids: the ids of all the parent objects in the tree from the current node. + _original_type: If the objects had an original type that was different than what currently exists in the level.t1 and t2 + """ + if self._count_diff() is StopIteration: + return - def __init__(self, pattern_path): - self.original_pattern = pattern_path - elements = _path_to_elements(pattern_path, root_element=('root', GETATTR)) - # Skip the root element for matching - self._pattern = elements[1:] - - def match(self, path_string): - """Return True if *path_string* matches this pattern exactly.""" - elements = _path_to_elements(path_string, root_element=('root', GETATTR)) - target = elements[1:] - return self._match_segments(self._pattern, target, 0, 0) - - def match_or_is_ancestor(self, path_string): - """Return True if *path_string* matches OR is an ancestor of a potential match. - - This is needed for ``include_paths``: we must not prune a path that - could lead to a matching descendant. - """ - elements = _path_to_elements(path_string, root_element=('root', GETATTR)) - target = elements[1:] - return (self._match_segments(self._pattern, target, 0, 0) or - self._could_match_descendant(self._pattern, target, 0, 0)) - - def match_or_is_descendant(self, path_string): - """Return True if *path_string* matches OR is a descendant of a matching path. - - This checks whether the pattern matches any prefix of *path_string*, - meaning the path is "inside" a matched subtree. - """ - elements = _path_to_elements(path_string, root_element=('root', GETATTR)) - target = elements[1:] - # Check exact match first - if self._match_segments(self._pattern, target, 0, 0): - return True - # Check if any prefix of target matches (making this path a descendant) - for length in range(len(target)): - if self._match_segments(self._pattern, target[:length], 0, 0): - return True - return False + if self._use_custom_operator(level): + return - @staticmethod - def _match_segments(pattern, target, pi, ti): - """Recursive segment matcher with backtracking for ``**``.""" - while pi < len(pattern) and ti < len(target): - pat_elem = pattern[pi][0] - - if pat_elem == MULTI_WILDCARD: - # ** matches zero or more segments — try every suffix - for k in range(ti, len(target) + 1): - if GlobPathMatcher._match_segments(pattern, target, pi + 1, k): - return True - return False - elif pat_elem == SINGLE_WILDCARD: - # * matches exactly one segment regardless of value/action - pi += 1 - ti += 1 + if level.t1 is level.t2: + return + + if self._skip_this(level): + return + + report_type_change = True + if get_type(level.t1) != get_type(level.t2): + for type_group in self.ignore_type_in_groups: + if self.type_check_func(level.t1, type_group) and self.type_check_func(level.t2, type_group): + report_type_change = False + break + if self.use_enum_value and isinstance(level.t1, Enum): + level.t1 = level.t1.value + report_type_change = False + if self.use_enum_value and isinstance(level.t2, Enum): + level.t2 = level.t2.value + report_type_change = False + if report_type_change: + self._diff_types(level, local_tree=local_tree) + return + # This is an edge case where t1=None or t2=None and None is in the ignore type group. + if level.t1 is None or level.t2 is None: + self._report_result('values_changed', level, local_tree=local_tree) + return + + if self.ignore_nan_inequality and isinstance(level.t1, (float, np_floating)) and str(level.t1) == str(level.t2) == 'nan': + return + + if isinstance(level.t1, booleans): + self._diff_booleans(level, local_tree=local_tree) + + elif isinstance(level.t1, strings): + # Special handling when comparing string with UUID and ignore_uuid_types is True + if self.ignore_uuid_types and isinstance(level.t2, uuids): + try: + # Convert string to UUID for comparison + t1_uuid = uuid.UUID(level.t1) + if t1_uuid.int != level.t2.int: + self._report_result('values_changed', level, local_tree=local_tree) + except (ValueError, AttributeError): + # If string is not a valid UUID, report as changed + self._report_result('values_changed', level, local_tree=local_tree) else: - tgt_elem = target[ti][0] - if pat_elem != tgt_elem: - return False - pi += 1 - ti += 1 + self._diff_str(level, local_tree=local_tree) + + elif isinstance(level.t1, datetime.datetime): + self._diff_datetime(level, local_tree=local_tree) + + elif isinstance(level.t1, ipranges): + self._diff_ipranges(level, local_tree=local_tree) + + elif isinstance(level.t1, (datetime.date, datetime.timedelta, datetime.time)): + self._diff_time(level, local_tree=local_tree) + + elif isinstance(level.t1, uuids): + # Special handling when comparing UUID with string and ignore_uuid_types is True + if self.ignore_uuid_types and isinstance(level.t2, str): + try: + # Convert string to UUID for comparison + t2_uuid = uuid.UUID(level.t2) + if level.t1.int != t2_uuid.int: + self._report_result('values_changed', level, local_tree=local_tree) + except (ValueError, AttributeError): + # If string is not a valid UUID, report as changed + self._report_result('values_changed', level, local_tree=local_tree) + else: + self._diff_uuids(level, local_tree=local_tree) - # Consume any trailing ** (they can match zero segments) - while pi < len(pattern) and pattern[pi][0] == MULTI_WILDCARD: - pi += 1 + elif isinstance(level.t1, numbers): + self._diff_numbers(level, local_tree=local_tree, report_type_change=report_type_change) - return pi == len(pattern) and ti == len(target) + elif isinstance(level.t1, Mapping): + self._diff_dict(level, parents_ids, local_tree=local_tree) - @staticmethod - def _could_match_descendant(pattern, target, pi, ti): - """Check if *target* is a prefix that could lead to a match deeper down.""" - if ti == len(target): - # Target exhausted — it's an ancestor if pattern has remaining segments - return pi < len(pattern) + elif isinstance(level.t1, tuple): + self._diff_tuple(level, parents_ids, local_tree=local_tree) - if pi >= len(pattern): - return False + elif isinstance(level.t1, (set, frozenset, SetOrdered)): + self._diff_set(level, local_tree=local_tree) + + elif isinstance(level.t1, np_ndarray): + self._diff_numpy_array(level, parents_ids, local_tree=local_tree) - pat_elem = pattern[pi][0] + elif isinstance(level.t1, PydanticBaseModel): + self._diff_obj(level, parents_ids, local_tree=local_tree, is_pydantic_object=True) + + elif isinstance(level.t1, Iterable): + self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) + + elif isinstance(level.t1, Enum): + self._diff_enum(level, parents_ids, local_tree=local_tree) - if pat_elem == MULTI_WILDCARD: - return (GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti) or - GlobPathMatcher._could_match_descendant(pattern, target, pi, ti + 1)) - elif pat_elem == SINGLE_WILDCARD: - return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) else: - tgt_elem = target[ti][0] - if pat_elem != tgt_elem: - return False - return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + self._diff_obj(level, parents_ids) + + def _get_view_results(self, view, verbose_level=None): + """ + Get the results based on the view + """ + result = self.tree + if not self.report_repetition: # and self.is_root: + result.mutual_add_removes_to_become_value_changes() + if view == TREE_VIEW: + pass + elif view == TEXT_VIEW: + effective_verbose_level = verbose_level if verbose_level is not None else self.verbose_level + result = TextResult(tree_results=self.tree, verbose_level=effective_verbose_level) + result.remove_empty_keys() + elif view == DELTA_VIEW: + result = self._to_delta_dict(report_repetition_required=False) + elif view == COLORED_VIEW: + result = ColoredView(t2=self.t2, tree_result=self.tree, compact=False) + elif view == COLORED_COMPACT_VIEW: + result = ColoredView(t2=self.t2, tree_result=self.tree, compact=True) + else: + raise ValueError(INVALID_VIEW_MSG.format(view)) + return result + @staticmethod + def _get_key_for_group_by(row, group_by, item_name): + """ + Get the key value to group a row by, using the specified group_by parameter. + + Example + >>> row = {'first': 'John', 'middle': 'Joe', 'last': 'Smith'} + >>> DeepDiff._get_key_for_group_by(row, 'first', 't1') + 'John' + >>> nested_row = {'id': 123, 'demographics': {'names': {'first': 'John', 'middle': 'Joe', 'last': 'Smith'}}} + >>> group_by = lambda x: x['demographics']['names']['first'] + >>> DeepDiff._get_key_for_group_by(nested_row, group_by, 't1') + 'John' + + Args: + row (dict): The dictionary (row) to extract the group by key from. + group_by (str or callable): The key name or function to call to get to the key value to group by. + item_name (str): The name of the item, used for error messages. + + Returns: + str: The key value to group by. + + Raises: + KeyError: If the specified key is not found in the row. + """ + try: + if callable(group_by): + return group_by(row) + return row.pop(group_by) + except KeyError: + logger.error("Unable to group {} by {}. The key is missing in {}".format(item_name, group_by, row)) + raise + + def _group_iterable_to_dict(self, item, group_by, item_name): + """ + Convert a list of dictionaries into a dictionary of dictionaries + where the key is the value of the group_by key in each dictionary. + """ + group_by_level2 = None + if isinstance(group_by, (list, tuple)): + group_by_level1 = group_by[0] + if len(group_by) > 1: + group_by_level2 = group_by[1] + else: + group_by_level1 = group_by + if isinstance(item, Iterable) and not isinstance(item, Mapping): + result = {} + item_copy = deepcopy(item) + for row in item_copy: + if isinstance(row, Mapping): + key1 = self._get_key_for_group_by(row, group_by_level1, item_name) + # Track keys created by group_by to avoid type prefixing later + if hasattr(self, 'group_by_keys'): + self.group_by_keys.add(key1) + if group_by_level2: + key2 = self._get_key_for_group_by(row, group_by_level2, item_name) + # Track level 2 keys as well + if hasattr(self, 'group_by_keys'): + self.group_by_keys.add(key2) + if key1 not in result: + result[key1] = {} + if self.group_by_sort_key: + if key2 not in result[key1]: + result[key1][key2] = [] + result_key1_key2 = result[key1][key2] + if row not in result_key1_key2: + result_key1_key2.append(row) + else: + result[key1][key2] = row + else: + if self.group_by_sort_key: + if key1 not in result: + result[key1] = [] + if row not in result[key1]: + result[key1].append(row) + else: + result[key1] = row + else: + msg = "Unable to group {} by {} since the item {} is not a dictionary.".format(item_name, group_by_level1, row) + logger.error(msg) + raise ValueError(msg) + if self.group_by_sort_key: + if group_by_level2: + for key1, row1 in result.items(): + for key2, row in row1.items(): + row.sort(key=self.group_by_sort_key) + else: + for key, row in result.items(): + row.sort(key=self.group_by_sort_key) + return result + msg = "Unable to group {} by {}".format(item_name, group_by) + logger.error(msg) + raise ValueError(msg) + + def get_stats(self): + """ + Get some stats on internals of the DeepDiff run. + """ + return self._stats -def compile_glob_paths(paths): - """Compile a list of glob pattern strings into GlobPathMatcher objects. + @property + def affected_paths(self): + """ + Get the list of paths that were affected. + Whether a value was changed or they were added or removed. + + Example + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2, 3: [3], 4: 4} + >>> t2 = {1: 1, 2: 4, 3: [3, 4], 5: 5, 6: 6} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=4) + { 'dictionary_item_added': ['root[5]', 'root[6]'], + 'dictionary_item_removed': ['root[4]'], + 'iterable_item_added': {'root[3][1]': 4}, + 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} + >>> sorted(ddiff.affected_paths) + ['root[2]', 'root[3][1]', 'root[4]', 'root[5]', 'root[6]'] + >>> sorted(ddiff.affected_root_keys) + [2, 3, 4, 5, 6] - Returns a list of ``GlobPathMatcher`` or ``None`` if *paths* is empty/None. - """ - if not paths: - return None - return [GlobPathMatcher(p) for p in paths] + """ + result = SetOrdered() + for key in REPORT_KEYS: + value = self.get(key) + if value: + if isinstance(value, SetOrdered): + result |= value + else: + result |= SetOrdered(value.keys()) + return result + + @property + def affected_root_keys(self): + """ + Get the list of root keys that were affected. + Whether a value was changed or they were added or removed. + + Example + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2, 3: [3], 4: 4} + >>> t2 = {1: 1, 2: 4, 3: [3, 4], 5: 5, 6: 6} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=4) + { 'dictionary_item_added': ['root[5]', 'root[6]'], + 'dictionary_item_removed': ['root[4]'], + 'iterable_item_added': {'root[3][1]': 4}, + 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} + >>> sorted(ddiff.affected_paths) + ['root[2]', 'root[3][1]', 'root[4]', 'root[5]', 'root[6]'] + >>> sorted(ddiff.affected_root_keys) + [2, 3, 4, 5, 6] + """ + result = SetOrdered() + for key in REPORT_KEYS: + value = self.tree.get(key) + if value: + if isinstance(value, SetOrdered): + values_list = value + else: + values_list = value.keys() + for item in values_list: + root_key = item.get_root_key() + if root_key is not notpresent: + result.add(root_key) + return result + + def __str__(self): + if hasattr(self, '_colored_view') and self.view in {COLORED_VIEW, COLORED_COMPACT_VIEW}: + return str(self._colored_view) + return super().__str__() + + +if __name__ == "__main__": # pragma: no cover + import doctest + doctest.testmod() diff --git a/deepdiff/docstrings/authors.rst b/deepdiff/docstrings/authors.rst new file mode 100644 index 00000000..bb2193e0 --- /dev/null +++ b/deepdiff/docstrings/authors.rst @@ -0,0 +1,176 @@ +:doc:`/index` + +Authors +======= + +Authors in order of the timeline of their contributions: + +- `Sep Dehpour (Seperman)`_ +- `Victor Hahn Castell`_ for the tree view and major contributions: +- `nfvs`_ for Travis-CI setup script. +- `brbsix`_ for initial Py3 porting. +- `WangFenjin`_ for unicode support. +- `timoilya`_ for comparing list of sets when ignoring order. +- `Bernhard10`_ for significant digits comparison. +- `b-jazz`_ for PEP257 cleanup, Standardize on full names, fixing line + endings. +- `finnhughes`_ for fixing **slots** +- `moloney`_ for Unicode vs. Bytes default +- `serv-inc`_ for adding help(deepdiff) +- `movermeyer`_ for updating docs +- `maxrothman`_ for search in inherited class attributes +- `maxrothman`_ for search for types/objects +- `MartyHub`_ for exclude regex paths +- `sreecodeslayer`_ for DeepSearch match_string +- Brian Maissy `brianmaissy`_ for weakref fix, enum tests +- Bartosz Borowik `boba-2`_ for Exclude types fix when ignoring order +- Brian Maissy `brianmaissy `__ for + fixing classes which inherit from classes with slots didn’t have all + of their slots compared +- Juan Soler `Soleronline`_ for adding ignore_type_number +- `mthaddon`_ for adding timedelta diffing support +- `Necrophagos`_ for Hashing of the number 1 vs. True +- `gaal-dev`_ for adding exclude_obj_callback +- Ivan Piskunov `van-ess0`_ for deprecation warning enhancement. +- Michał Karaś `MKaras93`_ for the pretty view +- Christian Kothe `chkothe`_ for the basic support for diffing numpy + arrays +- `Timothy`_ for truncate_datetime +- `d0b3rm4n`_ for bugfix to not apply format to non numbers. +- `MyrikLD`_ for Bug Fix NoneType in ignore type groups +- Stian Jensen `stianjensen`_ for improving ignoring of NoneType in + diff +- Florian Klien `flowolf`_ for adding math_epsilon +- Tim Klein `timjklein36`_ for retaining the order of multiple + dictionary items added via Delta. +- Wilhelm Schürmann\ `wbsch`_ for fixing the typo with yml files. +- `lyz-code`_ for adding support for regular expressions in DeepSearch + and strict_checking feature in DeepSearch. +- `dtorres-sf`_ for adding the option for custom compare function +- Tony Wang `Tony-Wang`_ for bugfix: verbose_level==0 should disable + values_changes. +- Sun Ao `eggachecat`_ for adding custom operators. +- Sun Ao `eggachecat`_ for adding ignore_order_func. +- `SlavaSkvortsov`_ for fixing unprocessed key error. +- Håvard Thom `havardthom`_ for adding UUID support. +- Dhanvantari Tilak `Dhanvantari`_ for Bug-Fix: + ``TypeError in _get_numbers_distance() when ignore_order = True``. +- Yael Mintz `yaelmi3`_ for detailed pretty print when verbose_level=2. +- Mikhail Khviyuzov `mskhviyu`_ for Exclude obj callback strict. +- `dtorres-sf`_ for the fix for diffing using iterable_compare_func with nested objects. +- `Enric Pou `__ for bug fix of ValueError + when using Decimal 0.x +- `Uwe Fladrich `__ for fixing bug when diff'ing non-sequence iterables +- `Michal Ozery-Flato `__ for + setting equal_nan=ignore_nan_inequality in the call for + np.array_equal +- `martin-kokos `__ for using Pytest’s + tmp_path fixture instead of /tmp/ +- Håvard Thom `havardthom `__ for adding + include_obj_callback and include_obj_callback_strict. +- `Noam Gottlieb `__ for fixing a corner + case where numpy’s ``np.float32`` nans are not ignored when using + ``ignore_nan_equality``. +- `maggelus `__ for the bugfix deephash + for paths. +- `maggelus `__ for the bugfix deephash + compiled regex. +- `martin-kokos `__ for fixing the + tests dependent on toml. +- `kor4ik `__ for the bugfix for + ``include_paths`` for nested dictionaries. +- `martin-kokos `__ for using tomli + and tomli-w for dealing with tomli files. +- `Alex Sauer-Budge `__ for the bugfix for + ``datetime.date``. +- `William Jamieson `__ for `NumPy 2.0 compatibility `__ +- `Leo Sin `__ for Supporting Python 3.12 in + the build process +- `sf-tcalhoun `__ for fixing + “Instantiating a Delta with a flat_dict_list unexpectedly mutates the + flat_dict_list” +- `dtorres-sf `__ for fixing iterable + moved items when iterable_compare_func is used. +- `Florian Finkernagel `__ for pandas + and polars support. +- Mathis Chenuet `artemisart `__ for + fixing slots classes comparison and PR review. +- Sherjeel Shabih `sherjeelshabih `__ + for fixing the issue where the key deep_distance is not returned when + both compared items are equal #510 +- `Juergen Skrotzky `__ for adding + empty ``py.typed`` +- `Mate Valko `__ for fixing the issue so we + lower only if clean_key is instance of str via #504 +- `jlaba `__ for fixing #493 include_paths, + when only certain keys are included via #499 +- `Doron Behar `__ for fixing DeepHash + for numpy booleans via #496 +- `Aaron D. Marasco `__ for adding + print() options which allows a user-defined string (or callback + function) to prefix every output when using the pretty() call. +- `David Hotham `__ for relaxing + orderly-set dependency via #486 +- `dtorres-sf `__ for the fix for moving + nested tables when using iterable_compare_func. +- `Jim Cipar `__ for the fix recursion depth + limit when hashing numpy.datetime64 +- `Enji Cooper `__ for converting legacy + setuptools use to pyproject.toml +- `Diogo Correia `__ for reporting security vulnerability in Delta and DeepDiff that could allow remote code execution. +- `am-periphery `__ for reporting CVE-2026-33155: denial-of-service via crafted pickle payloads triggering massive memory allocation. +- `echan5 `__ for adding callable ``group_by`` support. +- `yannrouillard `__ for fixing colored view display when all list items are removed. +- `tpvasconcelos `__ for fixing ``__slots__`` handling for objects with ``__getattr__``. +- `devin13cox `__ for always using t1 path for reporting. +- `vitalis89 `__ for fixing ``ignore_keys`` issue in ``detailed__dict__``. +- `ljames8 `__ for fixing logarithmic similarity type hint. +- `srini047 `__ for fixing README typo. +- `Nagato-Yuzuru `__ for colored view tests. +- `akshat62 `__ for adding Fraction numeric support. + + +.. _Sep Dehpour (Seperman): http://www.zepworks.com +.. _Victor Hahn Castell: http://hahncastell.de +.. _nfvs: https://github.com/nfvs +.. _brbsix: https://github.com/brbsix +.. _WangFenjin: https://github.com/WangFenjin +.. _timoilya: https://github.com/timoilya +.. _Bernhard10: https://github.com/Bernhard10 +.. _b-jazz: https://github.com/b-jazz +.. _finnhughes: https://github.com/finnhughes +.. _moloney: https://github.com/moloney +.. _serv-inc: https://github.com/serv-inc +.. _movermeyer: https://github.com/movermeyer +.. _maxrothman: https://github.com/maxrothman +.. _MartyHub: https://github.com/MartyHub +.. _sreecodeslayer: https://github.com/sreecodeslayer +.. _brianmaissy: https://github.com/ +.. _boba-2: https://github.com/boba-2 +.. _Soleronline: https://github.com/Soleronline +.. _mthaddon: https://github.com/mthaddon +.. _Necrophagos: https://github.com/Necrophagos +.. _gaal-dev: https://github.com/gaal-dev +.. _van-ess0: https://github.com/van-ess0 +.. _MKaras93: https://github.com/MKaras93 +.. _chkothe: https://github.com/chkothe +.. _Timothy: https://github.com/timson +.. _d0b3rm4n: https://github.com/d0b3rm4n +.. _MyrikLD: https://github.com/MyrikLD +.. _stianjensen: https://github.com/stianjensen +.. _flowolf: https://github.com/flowolf +.. _timjklein36: https://github.com/timjklein36 +.. _wbsch: https://github.com/wbsch +.. _lyz-code: https://github.com/lyz-code +.. _dtorres-sf: https://github.com/dtorres-sf +.. _Tony-Wang: https://github.com/Tony-Wang +.. _eggachecat: https://github.com/eggachecat +.. _SlavaSkvortsov: https://github.com/SlavaSkvortsov +.. _havardthom: https://github.com/havardthom +.. _Dhanvantari: https://github.com/Dhanvantari +.. _yaelmi3: https://github.com/yaelmi3 +.. _mskhviyu: https://github.com/mskhviyu + +Thank you for contributing to DeepDiff! + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/basics.rst b/deepdiff/docstrings/basics.rst new file mode 100644 index 00000000..6eba5507 --- /dev/null +++ b/deepdiff/docstrings/basics.rst @@ -0,0 +1,345 @@ +:doc:`/index` + +Basics +====== + + +Importing + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + +Same object returns empty + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = t1 + >>> print(DeepDiff(t1, t2)) + {} + +Type of an item has changed + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'new_value': '2', + 'old_type': , + 'old_value': 2}}} + +Value of an item has changed + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:4, 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} + +Item added and/or removed + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff) + {'dictionary_item_added': [root[5], root[6]], + 'dictionary_item_removed': [root[4]]} + +Set verbose level to 2 in order to see the added or removed items with their values + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(ddiff, indent=2) + { 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, + 'dictionary_item_removed': {'root[4]': 4}} + +Set verbose level to 2 includes new_path when the path has changed for a report between t1 and t2: + >>> t1 = [1, 3] + >>> t2 = [3, 2] + >>> + >>> + >>> diff = DeepDiff(t1, t2, ignore_order=True, verbose_level=2) + >>> pprint(diff) + {'values_changed': {'root[0]': {'new_path': 'root[1]', + 'new_value': 2, + 'old_value': 1}}} + +String difference + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} + >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, + "root[4]['b']": { 'new_value': 'world!', + 'old_value': 'world'}}} + + +String difference 2 + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' + '+++ \n' + '@@ -1,5 +1,4 @@\n' + '-world!\n' + '-Goodbye!\n' + '+world\n' + ' 1\n' + ' 2\n' + ' End', + 'new_value': 'world\n1\n2\nEnd', + 'old_value': 'world!\n' + 'Goodbye!\n' + '1\n' + '2\n' + 'End'}}} + + >>> + >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) + --- + +++ + @@ -1,5 +1,4 @@ + -world! + -Goodbye! + +world + 1 + 2 + End + +List difference + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} + +List that contains dictionary: + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'dictionary_item_removed': [root[4]['b'][2][2]], + 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} + +Sets: + >>> t1 = {1, 2, 8} + >>> t2 = {1, 2, 3, 5} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff) + {'set_item_added': [root[3], root[5]], 'set_item_removed': [root[8]]} + +Named Tuples: + >>> from collections import namedtuple + >>> Point = namedtuple('Point', ['x', 'y']) + >>> t1 = Point(x=11, y=22) + >>> t2 = Point(x=11, y=23) + >>> pprint (DeepDiff(t1, t2)) + {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} + +Custom objects: + >>> class ClassA(object): + ... a = 1 + ... def __init__(self, b): + ... self.b = b + ... + >>> t1 = ClassA(1) + >>> t2 = ClassA(2) + >>> + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} + +Object attribute added: + >>> t2.c = "new attribute" + >>> pprint(DeepDiff(t1, t2)) + {'attribute_added': [root.c], + 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} + + +Datetime + DeepDiff converts all datetimes into UTC. If a datetime is timezone naive, we assume it is in UTC too. + That is different than what Python does. Python assumes your timezone naive datetime is in your local timezone. + >>> from deepdiff import DeepDiff + >>> from datetime import datetime, timezone + >>> d1 = datetime(2020, 8, 31, 13, 14, 1) + >>> d2 = datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc) + >>> d1 == d2 + False + >>> DeepDiff(d1, d2) + {} + + +.. note:: + All the examples above use the default :ref:`text_view_label`. + If you want traversing functionality in the results, use the :ref:`tree_view_label`. + You just need to set view='tree' to get it in tree form. + + +.. _group_by_label: + +Group By +-------- + +group_by can be used when dealing with the list of dictionaries. It converts them from lists to a single dictionary with the key defined by group_by. The common use case is when reading data from a flat CSV, and the primary key is one of the columns in the CSV. We want to use the primary key instead of the CSV row number to group the rows. The group_by can do 2D group_by by passing a list of 2 keys. It is also possible to have a callable group_by, which can be used to access keys in more nested data structures. + +For example: + >>> [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue'}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, + ... ] + +Becomes: + >>> t1 = { + ... 'AA': {'name': 'Joe', 'last_name': 'Nobody'}, + ... 'BB': {'name': 'James', 'last_name': 'Blue'}, + ... 'CC': {'name': 'Mike', 'last_name': 'Apple'}, + ... } + + +With that in mind, let's take a look at the following: + >>> from deepdiff import DeepDiff + >>> t1 = [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue'}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, + ... ] + >>> + >>> t2 = [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Brown'}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, + ... ] + >>> + >>> DeepDiff(t1, t2) + {'values_changed': {"root[1]['last_name']": {'new_value': 'Brown', 'old_value': 'Blue'}}} + + +Now we use group_by='id': + >>> DeepDiff(t1, t2, group_by='id') + {'values_changed': {"root['BB']['last_name']": {'new_value': 'Brown', 'old_value': 'Blue'}}} + +.. note:: + group_by actually changes the structure of the t1 and t2. You can see this by using the tree view: + + >>> diff = DeepDiff(t1, t2, group_by='id', view='tree') + >>> diff + {'values_changed': []} + >>> diff['values_changed'][0] + + >>> diff['values_changed'][0].up + + >>> diff['values_changed'][0].up.up + + >>> diff['values_changed'][0].up.up.t1 + {'AA': {'name': 'Joe', 'last_name': 'Nobody'}, 'BB': {'name': 'James', 'last_name': 'Blue'}, 'CC': {'name': 'Mike', 'last_name': 'Apple'}} + +2D Example: + >>> from pprint import pprint + >>> from deepdiff import DeepDiff + >>> + >>> t1 = [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue'}, + ... {'id': 'BB', 'name': 'Jimmy', 'last_name': 'Red'}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, + ... ] + >>> + >>> t2 = [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Brown'}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, + ... ] + >>> + >>> diff = DeepDiff(t1, t2, group_by=['id', 'name']) + >>> pprint(diff) + {'dictionary_item_removed': [root['BB']['Jimmy']], + 'values_changed': {"root['BB']['James']['last_name']": {'new_value': 'Brown', + 'old_value': 'Blue'}}} + +Callable group_by Example: + >>> from deepdiff import DeepDiff + >>> + >>> t1 = [ + ... {'id': 'AA', 'demographics': {'names': {'first': 'Joe', 'middle': 'John', 'last': 'Nobody'}}}, + ... {'id': 'BB', 'demographics': {'names': {'first': 'James', 'middle': 'Joyce', 'last': 'Blue'}}}, + ... {'id': 'CC', 'demographics': {'names': {'first': 'Mike', 'middle': 'Mark', 'last': 'Apple'}}}, + ... ] + >>> + >>> t2 = [ + ... {'id': 'AA', 'demographics': {'names': {'first': 'Joe', 'middle': 'John', 'last': 'Nobody'}}}, + ... {'id': 'BB', 'demographics': {'names': {'first': 'James', 'middle': 'Joyce', 'last': 'Brown'}}}, + ... {'id': 'CC', 'demographics': {'names': {'first': 'Mike', 'middle': 'Charles', 'last': 'Apple'}}}, + ... ] + >>> + >>> diff = DeepDiff(t1, t2, group_by=lambda x: x['demographics']['names']['first']) + >>> pprint(diff) + {'values_changed': {"root['James']['demographics']['names']['last']": {'new_value': 'Brown', + 'old_value': 'Blue'}, + "root['Mike']['demographics']['names']['middle']": {'new_value': 'Charles', + 'old_value': 'Mark'}}} + +.. _group_by_sort_key_label: + +Group By - Sort Key +------------------- + +group_by_sort_key is used to define how dictionaries are sorted if multiple ones fall under one group. When this parameter is used, group_by converts the lists of dictionaries into a dictionary of keys to lists of dictionaries. Then, group_by_sort_key is used to sort between the list. + +For example, there are duplicate id values. If we only use group_by='id', one of the dictionaries with id of 'BB' will overwrite the other. However, if we also set group_by_sort_key='name', we keep both dictionaries with the id of 'BB'. + +Example: + >>> [{'id': 'AA', 'int_id': 2, 'last_name': 'Nobody', 'name': 'Joe'}, + ... {'id': 'BB', 'int_id': 20, 'last_name': 'Blue', 'name': 'James'}, + ... {'id': 'BB', 'int_id': 3, 'last_name': 'Red', 'name': 'Jimmy'}, + ... {'id': 'CC', 'int_id': 4, 'last_name': 'Apple', 'name': 'Mike'}] + + +Becomes: + >>> {'AA': [{'int_id': 2, 'last_name': 'Nobody', 'name': 'Joe'}], + ... 'BB': [{'int_id': 20, 'last_name': 'Blue', 'name': 'James'}, + ... {'int_id': 3, 'last_name': 'Red', 'name': 'Jimmy'}], + ... 'CC': [{'int_id': 4, 'last_name': 'Apple', 'name': 'Mike'}]} + + +Example of using group_by_sort_key + >>> t1 = [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody', 'int_id': 2}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue', 'int_id': 20}, + ... {'id': 'BB', 'name': 'Jimmy', 'last_name': 'Red', 'int_id': 3}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple', 'int_id': 4}, + ... ] + >>> + >>> t2 = [ + ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody', 'int_id': 2}, + ... {'id': 'BB', 'name': 'James', 'last_name': 'Brown', 'int_id': 20}, + ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple', 'int_id': 4}, + ... ] + >>> + >>> diff = DeepDiff(t1, t2, group_by='id', group_by_sort_key='name') + >>> + >>> pprint(diff) + {'iterable_item_removed': {"root['BB'][1]": {'int_id': 3, + 'last_name': 'Red', + 'name': 'Jimmy'}}, + 'values_changed': {"root['BB'][0]['last_name']": {'new_value': 'Brown', + 'old_value': 'Blue'}}} + + +.. _default_timezone_label: + +Default Time Zone +----------------- + +default_timezone defines the default timezone. If a datetime is timezone naive, which means it doesn't have a timezone, we assume the datetime is in this timezone. Also any datetime that has a timezone will be converted to this timezone so the datetimes can be compared properly all in the same timezone. Note that Python's default behavior assumes the default timezone is your local timezone. DeepDiff's default is UTC, not your local time zone. + + +Note that if we change the default_timezone, the output timezone changes accordingly + >>> from deepdiff import DeepDiff + >>> import pytz + >>> from datetime import date, datetime, time, timezone + >>> dt_utc = datetime(2025, 2, 3, 12, 0, 0, tzinfo=pytz.utc) # UTC timezone + >>> dt_utc2 = datetime(2025, 2, 3, 11, 0, 0, tzinfo=pytz.utc) # UTC timezone + >>> dt_ny = dt_utc.astimezone(pytz.timezone('America/New_York')) + >>> dt_ny2 = dt_utc2.astimezone(pytz.timezone('America/New_York')) + >>> diff = DeepDiff(dt_ny, dt_ny2) + >>> diff + {'values_changed': {'root': {'new_value': datetime.datetime(2025, 2, 3, 11, 0, tzinfo=datetime.timezone.utc), 'old_value': datetime.datetime(2025, 2, 3, 12, 0, tzinfo=datetime.timezone.utc)}}} + >>> diff2 = DeepDiff(dt_ny, dt_ny2, default_timezone=pytz.timezone('America/New_York')) + >>> diff2 + {'values_changed': {'root': {'new_value': datetime.datetime(2025, 2, 3, 6, 0, tzinfo=), 'old_value': datetime.datetime(2025, 2, 3, 7, 0, tzinfo=)}}} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/changelog.rst b/deepdiff/docstrings/changelog.rst new file mode 100644 index 00000000..8257cd9e --- /dev/null +++ b/deepdiff/docstrings/changelog.rst @@ -0,0 +1,341 @@ +:doc:`/index` + +Changelog +========= + +DeepDiff Changelog + +- v8-7-0 + - migration note: + - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. + - Dropping support for Python 3.9 + - Support for python 3.14 + - Added support for callable ``group_by`` thanks to `echan5 `__ + - Added ``FlatDeltaDict`` TypedDict for ``to_flat_dicts`` return type + - Fixed colored view display when all list items are removed thanks to `yannrouillard `__ + - Fixed ``hasattr()`` swallowing ``AttributeError`` in ``__slots__`` handling for objects with ``__getattr__`` thanks to `tpvasconcelos `__ + - Fixed ``ignore_order=True`` missing int-vs-float type changes + - Always use t1 path for reporting thanks to `devin13cox `__ + - Fixed ``_convert_oversized_ints`` failing on NamedTuples + - Fixed orjson ``TypeError`` for integers exceeding 64-bit range + - Fixed parameter bug in ``to_flat_dicts`` where ``include_action_in_path`` and ``report_type_changes`` were not being passed through + - Fixed ``ignore_keys`` issue in ``detailed__dict__`` thanks to `vitalis89 `__ + - Fixed logarithmic similarity type hint thanks to `ljames8 `__ + - Added ``Fraction`` numeric support thanks to `akshat62 `__ +- v8-6-2 + - Security fix (CVE-2026-33155): Prevent denial-of-service via crafted pickle payloads that trigger massive memory allocation through the REDUCE opcode. Size-sensitive callables like ``bytes()`` and ``bytearray()`` are now wrapped to reject allocations exceeding 128 MB. +- v8-6-1 + - Patched security vulnerability in the Delta class which was vulnerable to class pollution via its constructor, and when combined with a gadget available in DeltaDiff itself, it could lead to Denial of Service and Remote Code Execution (via insecure Pickle deserialization). + +- v8-6-0 + - Added Colored View thanks to @mauvilsa + - Added support for applying deltas to NamedTuple thanks to @paulsc + - Fixed test_delta.py with Python 3.14 thanks to @Romain-Geissler-1A + - Added python property serialization to json + - Added ip address serialization + - Switched to UV from pip + - Added Claude.md + - Added uuid hashing thanks to @akshat62 + - Added ``ignore_uuid_types`` flag to DeepDiff to avoid type reports + when comparing UUID and string. + - Added comprehensive type hints across the codebase (multiple commits + for better type safety) + - Added support for memoryview serialization + - Added support for bytes serialization (non-UTF8 compatible) + - Fixed bug where group_by with numbers would leak type info into group + path reports + - Fixed bug in ``_get_clean_to_keys_mapping`` without explicit + significant digits + - Added support for python dict key serialization + - Enhanced support for IP address serialization with safe module imports + - Added development tooling improvements (pyright config, .envrc + example) + - Updated documentation and development instructions + +- v8-5-0 + - Updating deprecated pydantic calls + - Switching to pyproject.toml + - Fix for moving nested tables when using iterable_compare_func. by + - Fix recursion depth limit when hashing numpy.datetime64 + - Moving from legacy setuptools use to pyproject.toml + +- v8-4-2 + - fixes the type hints for the base + - fixes summarize so if json dumps fails, we can still get a repr of the results + - adds ipaddress support + +- v8-4-1 + - Adding BaseOperatorPlus base class for custom operators + - default_timezone can be passed now to set your default timezone to something other than UTC. + - New summarization algorithm that produces valid json + - Better type hint support + - Breaking change in DeepHash where we raise Exception instead of logging if we can't hash a value. + - Added the log_stacktrace parameter to DeepDiff. When True, it will log the stacktrace along with the error. + +- v8-3-0 + - Fixed some static typing issues + - Added the summarize module for better repr of nested values + + +- v8-2-0 + - Small optimizations so we don't load functions that are not needed + - Updated the minimum version of Orderly-set + - Normalize all datetimes into UTC. Assume timezone naive datetimes are UTC. + + +- v8-1-0 + + - Removing deprecated lines from setup.py + - Added ``prefix`` option to ``pretty()`` + - Fixes hashing of numpy boolean values. + - Fixes **slots** comparison when the attribute doesn’t exist. + - Relaxing orderly-set reqs + - Added Python 3.13 support + - Only lower if clean_key is instance of str + - Fixes issue where the key deep_distance is not returned when both + compared items are equal + - Fixes exclude_paths fails to work in certain cases + - exclude_paths fails to work + - Fixes to_json() method chokes on standard json.dumps() kwargs such as + sort_keys + - to_dict() method chokes on standard json.dumps() kwargs + - Fixes accessing the affected_root_keys property on the diff object + returned by DeepDiff fails when one of the dicts is empty + - Fixes accessing the affected_root_keys property on the + diff object returned by DeepDiff fails when one of the dicts is empty + + +- v8-0-1 + + - Bugfix. Numpy should be optional. + +- v8-0-0 + + - With the introduction of `threshold_to_diff_deeper`, the values returned are different than in previous versions of DeepDiff. You can still get the older values by setting `threshold_to_diff_deeper=0`. However to signify that enough has changed in this release that the users need to update the parameters passed to DeepDiff, we will be doing a major version update. + - `use_enum_value=True` makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. + - `threshold_to_diff_deeper=float` is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a `new_value` instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. + - Deprecated `ordered-set` and switched to `orderly-set`. The `ordered-set` package was not being maintained anymore and starting Python 3.6, there were better options for sets that ordered. I forked one of the new implementations, modified it, and published it as `orderly-set`. + - Added `use_log_scale:bool` and `log_scale_similarity_threshold:float`. They can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. + - json serialization of reversed lists. + - Fix for iterable moved items when `iterable_compare_func` is used. + - Pandas and Polars support + +- v7-0-1 + + - Fixes the translation between Difflib opcodes and Delta flat rows. + +- v7-0-0 + + - When verbose=2, return ``new_path`` when the ``path`` and + ``new_path`` are different (for example when ignore_order=True and + the index of items have changed). + - Dropping support for Python 3.7 + - Introducing serialize to flat rows for delta objects. + - fixes the issue with hashing ``datetime.date`` objects where it + treated them as numbers instead of dates (fixes #445). + - upgrading orjson to the latest version + - Fix for bug when diffing two lists with ignore_order and providing + compare_func + - Fixes “Wrong diff on list of strings” #438 + - Supporting Python 3.12 in the build process by `Leo + Sin `__ + - Fixes “Instantiating a Delta with a flat_dict_list unexpectedly + mutates the flat_dict_list” #457 by + `sf-tcalhoun `__ + - Fixes “Error on Delta With None Key and Removed Item from List” + #441 + - Fixes “Error when comparing two nested dicts with 2 added fields” + #450 + - Fixes “Error when subtracting Delta from a dictionary” #443 + +- v6-7-1 + + - Support for subtracting delta objects when iterable_compare_func + is used. + - Better handling of force adding a delta to an object. + - Fix for + ```Can't compare dicts with both single and double quotes in keys`` `__ + - Updated docs for Inconsistent Behavior with math_epsilon and + ignore_order = True + +- v6-7-0 + + - Delta can be subtracted from other objects now. + - verify_symmetry is deprecated. Use bidirectional instead. + - always_include_values flag in Delta can be enabled to include + values in the delta for every change. + - Fix for Delta.\__add\_\_ breaks with esoteric dict keys. + +- v6-6-1 + + - Fix for `DeepDiff raises decimal exception when using significant + digits `__ + - Introducing group_by_sort_key + - Adding group_by 2D. For example + ``group_by=['last_name', 'zip_code']`` + +- v6-6-0 + + - Numpy 2.0 support + - Adding + `Delta.to_flat_dicts `__ + +- v6-5-0 + + - Adding + ```parse_path`` `__ + +- v6-4-1 + + - Bugfix: Keep Numpy Optional + +- v6-4-0 + + - `Add Ignore List Order Option to + DeepHash `__ by + `Bobby Morck `__ + - `pyyaml to 6.0.1 to fix cython build + problems `__ by + `Robert Bo Davis `__ + - `Precompiled regex simple + diff `__ by + `cohml `__ + - New flag: ``zip_ordered_iterables`` for forcing iterable items to + be compared one by one. + +- v6-3-1 + + - Bugfix deephash for paths by + `maggelus `__ + - Bugfix deephash compiled regex + `maggelus `__ + - Fix tests dependent on toml by + `martin-kokos `__ + - Bugfix for ``include_paths`` for nested dictionaries by + `kor4ik `__ + - Use tomli and tomli-w for dealing with tomli files by + `martin-kokos `__ + - Bugfix for ``datetime.date`` by `Alex + Sauer-Budge `__ + +- v6-3-0 + + - ``PrefixOrSuffixOperator``: This operator will skip strings that + are suffix or prefix of each other. + - ``include_obj_callback`` and ``include_obj_callback_strict`` are + added by `Håvard Thom `__. + - Fixed a corner case where numpy’s ``np.float32`` nans are not + ignored when using ``ignore_nan_equality`` by `Noam + Gottlieb `__ + - ``orjson`` becomes optional again. + - Fix for ``ignore_type_in_groups`` with numeric values so it does + not report number changes when the number types are different. + +- v6-2-3 + + - Switching to Orjson for serialization to improve the performance. + - Setting ``equal_nan=ignore_nan_inequality`` in the call for + ``np.array_equal`` + - Using Pytest’s tmp_path fixture instead of ``/tmp/`` + +- v6-2-2 + + - Enum test fix for python 3.11 + - Adding support for dateutils rrules + +- v6-2-1 + + - Removed the print statements. + +- v6-2-0 + + - Major improvement in the diff report for lists when items are all + hashable and the order of items is important. + +- v6-1-0 + + - DeepDiff.affected_paths can be used to get the list of all paths + where a change, addition, or deletion was reported for. + - DeepDiff.affected_root_keys can be used to get the list of all + paths where a change, addition, or deletion was reported for. + - Bugfix: ValueError when using Decimal 0.x #339 by `Enric + Pou `__ + - Serialization of UUID + +- v6-0-0 + + - `Exclude obj callback + strict `__ + parameter is added to DeepDiff by Mikhail Khviyuzov + `mskhviyu `__. + - A fix for diffing using ``iterable_compare_func`` with nested + objects by `dtorres-sf `__ who + originally contributed this feature. +- v5-7-0: + + - https://github.com/seperman/deepdiff/pull/284 Bug-Fix: TypeError + in \_get_numbers_distance() when ignore_order = True by + @Dhanvantari + - https://github.com/seperman/deepdiff/pull/280 Add support for + UUIDs by @havardthom + - Major bug in delta when it comes to iterable items added or + removed is investigated by @uwefladrich and resolved by @seperman +- v5-6-0: Adding custom operators, and ignore_order_func. Bugfix: verbose_level==0 should disable values_changes. Bugfix: unprocessed key error. +- v5-5-0: adding iterable_compare_func for DeepDiff, adding output_format of list for path() in tree view. +- v5-4-0: adding strict_checking for numbers in DeepSearch. +- v5-3-0: add support for regular expressions in DeepSearch. +- v5-2-3: Retaining the order of multiple dictionary items added via Delta. Fixed the typo with yml files in deep cli. Fixing Grep RecursionError where using non UTF-8 character. Allowing kwargs to be passed to to_json method. +- v5-2-2: Fixed Delta serialization when None type is present. +- v5-2-0: Removed Murmur3 as the preferred hashing method. Using SHA256 by default now. Added commandline for deepdiff. Added group_by. Added math_epsilon. Improved ignoring of NoneType. +- v5-0-2: Bug Fix NoneType in ignore type groups https://github.com/seperman/deepdiff/issues/207 +- v5-0-1: Bug fix to not apply format to non numbers. +- v5-0-0: Introducing the Delta object, Improving Numpy support, Fixing tuples comparison when ignore_order=True, Dramatically improving the results when ignore_order=True by running in passes, Introducing pretty print view, deep_distance, purge, progress logging, cache and truncate_datetime. +- v4-3-3: Adds support for datetime.time +- v4-3-2: Deprecation Warning Enhancement +- v4-3-1: Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174 +- v4-3-0: adding exclude_obj_callback +- v4-2-0: .json property is finally removed. Fix for Py3.10. Dropping support for EOL Python 3.4. Ignoring private keys when calculating hashes. For example __init__ is not a part of hash calculation anymore. Fix for #166 Problem with comparing lists, with an boolean as element. +- v4-1-0: .json property is finally removed. +- v4-0-9: Fixing the bug for hashing custom unhashable objects +- v4-0-8: Adding ignore_nan_inequality for float('nan') +- v4-0-7: Hashing of the number 1 vs. True +- v4-0-6: found a tiny bug in Python formatting of numbers in scientific notation. Added a workaround. +- v4-0-5: Fixing number diffing. Adding number_format_notation and number_to_string_func. +- v4-0-4: Adding ignore_string_case and ignore_type_subclasses +- v4-0-3: Adding versionbump tool for release +- v4-0-2: Fixing installation issue where rst files are missing. +- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. +- v3-5-0: Exclude regex path +- v3-3-0: Searching for objects and class attributes +- v3-2-2: Adding help(deepdiff) +- v3-2-1: Fixing hash of None +- v3-2-0: Adding grep for search: object | grep(item) +- v3-1-3: Unicode vs. Bytes default fix +- v3-1-2: NotPresent Fix when item is added or removed. +- v3-1-1: Bug fix when item value is None (#58) +- v3-1-0: Serialization to/from json +- v3-0-0: Introducing Tree View +- v2-5-3: Bug fix on logging for content hash. +- v2-5-2: Bug fixes on content hash. +- v2-5-0: Adding ContentHash module to fix ignore_order once and for all. +- v2-1-0: Adding Deep Search. Now you can search for item in an object. +- v2-0-0: Exclusion patterns better coverage. Updating docs. +- v1-8-0: Exclusion patterns. +- v1-7-0: Deep Set comparison. +- v1-6-0: Unifying key names. i.e newvalue is new_value now. For backward compatibility, newvalue still works. +- v1-5-0: Fixing ignore order containers with unordered items. Adding significant digits when comparing decimals. Changes property is deprecated. +- v1-1-0: Changing Set, Dictionary and Object Attribute Add/Removal to be reported as Set instead of List. Adding Pypy compatibility. +- v1-0-2: Checking for ImmutableMapping type instead of dict +- v1-0-1: Better ignore order support +- v1-0-0: Restructuring output to make it more useful. This is NOT backward compatible. +- v0-6-1: Fixing iterables with unhashable when order is ignored +- v0-6-0: Adding unicode support +- v0-5-9: Adding decimal support +- v0-5-8: Adding ignore order for unhashables support +- v0-5-7: Adding ignore order support +- v0-5-6: Adding slots support +- v0-5-5: Adding loop detection + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/colored_view.rst b/deepdiff/docstrings/colored_view.rst new file mode 100644 index 00000000..16f49ab7 --- /dev/null +++ b/deepdiff/docstrings/colored_view.rst @@ -0,0 +1,101 @@ +.. _colored_view_label: + +Colored View +============ + +The `ColoredView` feature in `deepdiff` provides a human-readable, color-coded JSON output of the +differences between two objects. This feature is particularly useful for visualizing changes in a +clear and intuitive manner. + +- **Color-Coded Differences:** + + - **Added Elements:** Shown in green. + - **Removed Elements:** Shown in red. + - **Changed Elements:** The old value is shown in red, and the new value is shown in green. + +Usage +----- + +To use the `ColoredView`, simply pass the `COLORED_VIEW` option to the `DeepDiff` function: + +.. code-block:: python + + from deepdiff import DeepDiff + from deepdiff.helper import COLORED_VIEW + + t1 = {"name": "John", "age": 30, "scores": [1, 2, 3], "address": {"city": "New York", "zip": "10001"}} + t2 = {"name": "John", "age": 31, "scores": [1, 2, 4], "address": {"city": "Boston", "zip": "10001"}, "new": "value"} + + diff = DeepDiff(t1, t2, view=COLORED_VIEW) + print(diff) + +Or from command line: + +.. code-block:: bash + + deep diff --view colored t1.json t2.json + +The output will look something like this: + +.. raw:: html + +
+    {
+      "name": "John",
+      "age": 30 -> 31,
+      "scores": [
+        1,
+        2,
+        3 -> 4
+      ],
+      "address": {
+        "city": "New York" -> "Boston",
+        "zip": "10001"
+      },
+      "new": "value"
+    }
+    
+ +Colored Compact View +-------------------- + +For a more concise output, especially with deeply nested objects where many parts are unchanged, +the `ColoredView` with the compact option can be used. This view is similar but collapses +unchanged nested dictionaries to `{...}` and unchanged lists/tuples to `[...]`. To use the compact +option do: + +.. code-block:: python + + from deepdiff import DeepDiff + from deepdiff.helper import COLORED_COMPACT_VIEW + + t1 = {"name": "John", "age": 30, "scores": [1, 2, 3], "address": {"city": "New York", "zip": "10001"}} + t2 = {"name": "John", "age": 31, "scores": [1, 2, 4], "address": {"city": "New York", "zip": "10001"}, "new": "value"} + + diff = DeepDiff(t1, t2, view=COLORED_COMPACT_VIEW) + print(diff) + +Or from command line: + +.. code-block:: bash + + deep diff --view colored_compact t1.json t2.json + + +The output will look something like this: + +.. raw:: html + +
+    {
+      "name": "John",
+      "age": 30 -> 31,
+      "scores": [
+        1,
+        2,
+        3 -> 4
+      ],
+      "address": {...},
+      "new": "value"
+    }
+    
diff --git a/deepdiff/docstrings/commandline.rst b/deepdiff/docstrings/commandline.rst new file mode 100644 index 00000000..e7853dd6 --- /dev/null +++ b/deepdiff/docstrings/commandline.rst @@ -0,0 +1,320 @@ +:doc:`/index` + +Command Line +============ + +`New in DeepDiff 5.2.0` + +DeepDiff provides commandline interface to a subset of functionality that it provides through its Python API. + +The commands are: + +- :ref:`deep_diff_command` +- :ref:`deep_grep_command` +- :ref:`deep_extract_command` +- :ref:`deep_patch_command` + + +.. _deep_diff_command: + +deep diff command +----------------- + +Run + +.. code:: bash + + $ deep diff + +to get the options: + +.. code-block:: bash + + $ deep diff --help + Usage: deep diff [OPTIONS] T1 T2 + + Deep Diff Commandline + + Deep Difference of content in files. + It can read csv, tsv, json, yaml, and toml files. + + T1 and T2 are the path to the files to be compared with each other. + + Options: + --cutoff-distance-for-pairs FLOAT + [default: 0.3] + --cutoff-intersection-for-pairs FLOAT + [default: 0.7] + --cache-size INTEGER [default: 0] + --cache-tuning-sample-size INTEGER + [default: 0] + --cache-purge-level INTEGER RANGE + [default: 1] + --create-patch [default: False] + --exclude-paths TEXT + --exclude-regex-paths TEXT + --math-epsilon DECIMAL + --get-deep-distance [default: False] + --group-by TEXT + --ignore-order [default: False] + --ignore-string-type-changes [default: False] + --ignore-numeric-type-changes [default: False] + --ignore-type-subclasses [default: False] + --ignore-string-case [default: False] + --ignore-nan-inequality [default: False] + --include-private-variables [default: False] + --log-frequency-in-sec INTEGER [default: 0] + --max-passes INTEGER [default: 10000000] + --max_diffs INTEGER + --number-format-notation [f|e] [default: f] + --progress-logger [info|error] [default: info] + --report-repetition [default: False] + --significant-digits INTEGER + --truncate-datetime [second|minute|hour|day] + --verbose-level INTEGER RANGE [default: 1] + --view [-|colored|colored_compact] + [default: -] + Format for displaying differences. + --help Show this message and exit. + + +Example usage: + +Let's imagine we have t1.csv and t2.csv: + +.. csv-table:: t1.csv + :file: ../tests/fixtures/t1.csv + :header-rows: 1 + + +.. csv-table:: t2.csv + :file: ../tests/fixtures/t2.csv + :header-rows: 1 + +We can run: + +.. code-block:: bash + + $ deep diff t1.csv t2.csv --ignore-order + {'values_changed': {"root[2]['zip']": {'new_value': 90002, 'old_value': 90001}}} + +As you can see here the path to the item that is being changed is `root[2]['zip']` which is ok but +what if we assume last names are unique and group by last_name? + +.. code-block:: bash + + $ deep diff t1.csv t2.csv --ignore-order --group-by last_name + { 'values_changed': { "root['Molotov']['zip']": { 'new_value': 90002, + 'old_value': 90001}}} + +The path is perhaps more readable now: `root['Molotov']['zip']`. It is more clear that the zip code of Molotov has changed. + +.. Note:: + The parameters in the deep diff commandline are a subset of those in :ref:`deepdiff_label` 's Python API. + +To output in a specific format, for example the colored compact view (see :doc:`colored_view` for output details): + +.. code-block:: bash + + $ deep diff t1.json t2.json --view colored_compact + + +.. _deep_grep_command: + +deep grep command +----------------- + +Run + +.. code:: bash + + $ deep grep + +to get the options: + +.. code-block:: bash + + $ deep grep --help + Usage: deep grep [OPTIONS] ITEM PATH + + Deep Grep Commandline + + Grep through the contents of a file and find the path to the item. + It can read csv, tsv, json, yaml, and toml files. + + Options: + -i, --ignore-case [default: False] + --exact-match [default: False] + --exclude-paths TEXT + --exclude-regex-paths TEXT + --verbose-level INTEGER RANGE [default: 1] + --help Show this message and exit. + + +.. csv-table:: t1.csv + :file: ../tests/fixtures/t1.csv + :header-rows: 1 + +.. code-block:: bash + + $ deep grep --ignore-case james t1.csv + {'matched_values': ["root[2]['first_name']"]} + + +.. _deep_extract_command: + +deep extract command +-------------------- + +Run + +.. code:: bash + + $ deep extract + +to get the options: + +.. code-block:: bash + + $ deep extract --help + Usage: deep extract [OPTIONS] PATH_INSIDE PATH + + Deep Extract Commandline + + Extract an item from a file based on the path that is passed. It can read + csv, tsv, json, yaml, and toml files. + + Options: + --help Show this message and exit. + +.. csv-table:: t1.csv + :file: ../tests/fixtures/t1.csv + :header-rows: 1 + +.. code-block:: bash + + $ deep extract "root[2]['first_name']" t1.csv + 'James' + + +.. _deep_patch_command: + +deep patch command +------------------ + +Run + +.. code:: bash + + $ deep patch --help + +to get the options: + +.. code-block:: text + + $ deep patch --help + Usage: deep patch [OPTIONS] PATH DELTA_PATH + + Deep Patch Commandline + + Patches a file based on the information in a delta file. The delta file + can be created by the deep diff command and passing the --create-patch + argument. + + Deep Patch is similar to Linux's patch command. The difference is that it + is made for patching data. It can read csv, tsv, json, yaml, and toml + files. + + Options: + -b, --backup [default: False] + --raise-errors [default: False] + --help Show this message and exit. + +Imagine if we have the following files: + + +.. csv-table:: t1.csv + :file: ../tests/fixtures/t1.csv + :header-rows: 1 + +.. csv-table:: t2.csv + :file: ../tests/fixtures/t2.csv + :header-rows: 1 + + +First we need to create a "delta" file which represents the difference between the 2 files. + +.. code-block:: bash + + $ deep diff t1.csv t2.csv --ignore-order + {'values_changed': {"root[2]['zip']": {'new_value': 90002, 'old_value': 90001}}} + +We create the delta by using the deep diff command and passing the `--create-patch` argument. +However since we are using `--ignore-order`, `deep diff` will ask us to also use `--report-repetition`: + +.. code-block:: bash + + deep diff t1.csv t2.csv --ignore-order --report-repetition --create-patch + =}values_changed}root[2]['zip']} new_valueJ_sss.% + +Note that the delta is not human readable. It is meant for us to pass it into a file: + +.. code-block:: bash + + deep diff t1.csv t2.csv --ignore-order --report-repetition --create-patch > patch1.pickle + +Now this delta file is ready to be applied by the `deep patch` command to any json, csv, toml or yaml file! +It is expecting the structure of the file to be similar to the one in the csv file though. + +Let's look at this yaml file: + +`another.yaml` + +.. code-block:: yaml + + --- + - + first_name: Joe + last_name: Nobody + zip: 90011 + - + first_name: Jack + last_name: Doit + zip: 22222 + - + first_name: Sara + last_name: Stanley + zip: 11111 + +All that our delta knows is that `root[2]['zip']` has changed to `90002`. + +Let's apply the delta: + +.. code-block:: bash + + deep patch --backup another.yaml patch1.pickle --raise-errors + +And looking at the `another.yaml` file, the zip code is indeed updated! + +.. code-block:: yaml + + - first_name: Joe + last_name: Nobody + zip: 90011 + - first_name: Jack + last_name: Doit + zip: 22222 + - first_name: Sara + last_name: Stanley + zip: 90002 + +As you can see the formatting of the yaml file is changed. +This is due to the fact that DeepDiff loads the file into a Python dictionary, modifies it and then writes it back to disk. +During this operation, the file loses its original formatting. + +.. note:: + The deep patch command only provides a subset of what DeepDiff's :ref:`delta_label`'s Python API provides. + The deep patch command is minimalistic and is designed to have a similar interface to Linux's patch command + rather than DeepDiff's :ref:`delta_label`. + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/custom.rst b/deepdiff/docstrings/custom.rst new file mode 100644 index 00000000..e2ff1d96 --- /dev/null +++ b/deepdiff/docstrings/custom.rst @@ -0,0 +1,440 @@ +:doc:`/index` + +Customized Diff +=============== + +.. _iterable_compare_func_label: + +Iterable Compare Func +--------------------- + +New in DeepDiff 5.5.0 + +There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. + + +For example take the following objects: + + +Now let's define a compare_func that takes 3 parameters: x, y and level. + + >>> from deepdiff import DeepDiff + >>> from deepdiff.helper import CannotCompare + >>> + >>> t1 = [ + ... { + ... 'id': 1, + ... 'value': [1] + ... }, + ... { + ... 'id': 2, + ... 'value': [7, 8, 1] + ... }, + ... { + ... 'id': 3, + ... 'value': [7, 8], + ... }, + ... ] + >>> + >>> t2 = [ + ... { + ... 'id': 2, + ... 'value': [7, 8] + ... }, + ... { + ... 'id': 3, + ... 'value': [7, 8, 1], + ... }, + ... { + ... 'id': 1, + ... 'value': [1] + ... }, + ... ] + >>> + >>> DeepDiff(t1, t2) + {'values_changed': {"root[0]['id']": {'new_value': 2, 'old_value': 1}, "root[0]['value'][0]": {'new_value': 7, 'old_value': 1}, "root[1]['id']": {'new_value': 3, 'old_value': 2}, "root[2]['id']": {'new_value': 1, 'old_value': 3}, "root[2]['value'][0]": {'new_value': 1, 'old_value': 7}}, 'iterable_item_added': {"root[0]['value'][1]": 8}, 'iterable_item_removed': {"root[2]['value'][1]": 8}} + +As you can see the results are different. Now items with the same ids are compared with each other. + + >>> def compare_func(x, y, level=None): + ... try: + ... return x['id'] == y['id'] + ... except Exception: + ... raise CannotCompare() from None + ... + >>> DeepDiff(t1, t2, iterable_compare_func=compare_func) + {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}} + +If we set the verbose_level=2, we can see more details. + + >>> DeepDiff(t1, t2, iterable_compare_func=compare_func, verbose_level=2) + {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}, 'iterable_item_moved': {'root[0]': {'new_path': 'root[2]', 'value': {'id': 1, 'value': [1]}}, 'root[1]': {'new_path': 'root[0]', 'value': {'id': 2, 'value': [7, 8]}}, 'root[2]': {'new_path': 'root[1]', 'value': {'id': 3, 'value': [7, 8, 1]}}}} + + +We can also use the level parameter. Levels are explained in the :ref:`tree_view_label`. + +For example you could use the level object to further determine if the 2 objects should be matches or not. + + + >>> t1 = { + ... 'path1': [], + ... 'path2': [ + ... { + ... 'id': 1, + ... 'value': [1] + ... }, + ... { + ... 'id': 2, + ... 'value': [7, 8, 1] + ... }, + ... ] + ... } + >>> + >>> t2 = { + ... 'path1': [{'pizza'}], + ... 'path2': [ + ... { + ... 'id': 2, + ... 'value': [7, 8, 1] + ... }, + ... { + ... 'id': 1, + ... 'value': [1, 2] + ... }, + ... ] + ... } + >>> + >>> + >>> def compare_func2(x, y, level): + ... if (not isinstance(x, dict) or not isinstance(y, dict)): + ... raise CannotCompare + ... if(level.path() == "root['path2']"): + ... if (x["id"] == y["id"]): + ... return True + ... return False + ... + >>> + >>> DeepDiff(t1, t2, iterable_compare_func=compare_func2) + {'iterable_item_added': {"root['path1'][0]": {'pizza'}, "root['path2'][0]['value'][1]": 2}} + + +.. note:: + + The level parameter of the iterable_compare_func is only used when ignore_order=False which is the default value for ignore_order. + + +.. _custom_operators_label: + +Custom Operators +---------------- + +Whether two objects are different or not largely depends on the context. For example, apples and bananas are the same +if you are considering whether they are fruits or not. + +In that case, you can pass a *custom_operators* for the job. + +Custom operators give you a lot of power. In the following examples, we explore various use cases such as: + +- Making DeepDiff report the L2 Distance of items +- Only include specific paths in diffing +- Making DeepDiff stop diffing once we find the first diff. + +You can use one of the predefined custom operators that come with DeepDiff. Or you can define one yourself. + + +Built-In Custom Operators + +.. _prefix_or_suffix_operator_label: + +PrefixOrSuffixOperator +...................... + + +This operator will skip strings that are suffix or prefix of each other. + +For example when this operator is used, the two strings of "joe" and "joe's car" will not be reported as different. + + >>> from deepdiff import DeepDiff + >>> from deepdiff.operator import PrefixOrSuffixOperator + >>> t1 = { + ... "key1": ["foo", "bar's food", "jack", "joe"] + ... } + >>> t2 = { + ... "key1": ["foo", "bar", "jill", "joe'car"] + ... } + >>> + >>> DeepDiff(t1, t2) + {'values_changed': {"root['key1'][1]": {'new_value': 'bar', 'old_value': "bar's food"}, "root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}, "root['key1'][3]": {'new_value': "joe'car", 'old_value': 'joe'}}} + >>> DeepDiff(t1, t2, custom_operators=[ + ... PrefixOrSuffixOperator() + ... ]) + >>> + {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}} + + + + +Define A Custom Operator +------------------------ + + +To define a custom operator, you just need to inherit *BaseOperator* or *BaseOperatorPlus*. + + - *BaseOperatorPlus* is our new base operator that can be subclassed and provides the structure to build any custom operator. + - *BaseOperator* is our older base class for creating custom operators. It was designed mainly for simple string based regex comparison. + + +Base Operator Plus +.................. + +*BaseOperatorPlus* is our new base operator that can be subclassed and provides the structure to build any custom operator. + +.. code-block:: python + + class BaseOperatorPlus(metaclass=ABCMeta): + + @abstractmethod + def match(self, level) -> bool: + """ + Given a level which includes t1 and t2 in the tree view, is this operator a good match to compare t1 and t2? + If yes, we will run the give_up_diffing to compare t1 and t2 for this level. + """ + pass + + @abstractmethod + def give_up_diffing(self, level, diff_instance: "DeepDiff") -> bool: + """ + Given a level which includes t1 and t2 in the tree view, and the "distance" between l1 and l2. + do we consider t1 and t2 to be equal or not. The distance is a number between zero to one and is calculated by DeepDiff to measure how similar objects are. + """ + + @abstractmethod + def normalize_value_for_hashing(self, parent: Any, obj: Any) -> Any: + """ + You can use this function to normalize values for ignore_order=True + + For example, you may want to turn all the words to be lowercase. Then you return obj.lower() + """ + pass + + +**Example 1: We don't care about the exact GUID values. As long as pairs of strings match GUID regex, we want them to be considered as equals** + + >>> import re + ... from typing import Any + ... from deepdiff import DeepDiff + ... from deepdiff.operator import BaseOperatorPlus + ... + ... + ... d1 = { + ... "Name": "SUB_OBJECT_FILES", + ... "Values": { + ... "Value": [ + ... "{f254498b-b752-4f35-bef5-6f1844b61eb7}", + ... "{7fb2a550-1849-45c0-b273-9aa5e4eb9f2b}", + ... "{a9cbecc0-21dc-49ce-8b2c-d36352dae139}" + ... ] + ... } + ... } + ... + ... d2 = { + ... "Name": "SUB_OBJECT_FILES", + ... "Values": { + ... "Value": [ + ... "{e5d18917-1a2c-4abe-b601-8ec002629953}", + ... "{ea71ba1f-1339-4fae-bc28-a9ce9b8a8c67}", + ... "{66bb6192-9cd2-4074-8be1-f2ac52877c70}", + ... ] + ... } + ... } + ... + ... + ... class RemoveGUIDsOperator(BaseOperatorPlus): + ... _pattern = r"[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}" + ... _substitute = "guid" + ... + ... def match(self, level) -> bool: + ... return isinstance(level.t1, str) and isinstance(level.t2, str) + ... + ... @classmethod + ... def _remove_pattern(cls, t: str): + ... return re.sub(cls._pattern, cls._substitute, t) + ... + ... def give_up_diffing(self, level, diff_instance): + ... t1 = self._remove_pattern(level.t1) + ... t2 = self._remove_pattern(level.t2) + ... return t1 == t2 + ... + ... def normalize_value_for_hashing(self, parent: Any, obj: Any) -> Any: + ... """ + ... Used for ignore_order=True + ... """ + ... if isinstance(obj, str): + ... return self._remove_pattern(obj) + ... return obj + ... + ... + ... operator = RemoveGUIDsOperator() + ... + >>> diff1 = DeepDiff(d1, d2, custom_operators=[operator], log_stacktrace=True) + ... diff1 + {} + >>> diff2 = DeepDiff(d1, d2, ignore_order=True, custom_operators=[operator], log_stacktrace=True) + ... diff2 + {} + + + +Base Operator +............. + +*BaseOperator* is our older base class for creating custom operators. It was designed mainly for simple string based regex comparison. + + +.. code-block:: python + + class BaseOperator: + + def __init__(self, regex_paths:Optional[List[str]]=None, types:Optional[List[type]]=None): + if regex_paths: + self.regex_paths = convert_item_or_items_into_compiled_regexes_else_none(regex_paths) + else: + self.regex_paths = None + self.types = types + + def match(self, level) -> bool: + if self.regex_paths: + for pattern in self.regex_paths: + matched = re.search(pattern, level.path()) is not None + if matched: + return True + if self.types: + for type_ in self.types: + if isinstance(level.t1, type_) and isinstance(level.t2, type_): + return True + return False + + def give_up_diffing(self, level, diff_instance) -> bool: + raise NotImplementedError('Please implement the diff function.') + + + +**Example 2: An operator that mapping L2:distance as diff criteria and reports the distance** + + >>> import math + >>> + >>> from typing import List + >>> from deepdiff import DeepDiff + >>> from deepdiff.operator import BaseOperator + >>> + >>> + >>> class L2DistanceDifferWithPreventDefault(BaseOperator): + ... def __init__(self, regex_paths: List[str], distance_threshold: float): + ... super().__init__(regex_paths) + ... self.distance_threshold = distance_threshold + ... def _l2_distance(self, c1, c2): + ... return math.sqrt( + ... (c1["x"] - c2["x"]) ** 2 + (c1["y"] - c2["y"]) ** 2 + ... ) + ... def give_up_diffing(self, level, diff_instance): + ... l2_distance = self._l2_distance(level.t1, level.t2) + ... if l2_distance > self.distance_threshold: + ... diff_instance.custom_report_result('distance_too_far', level, { + ... "l2_distance": l2_distance + ... }) + ... return True + ... + >>> + >>> t1 = { + ... "coordinates": [ + ... {"x": 5, "y": 5}, + ... {"x": 8, "y": 8} + ... ] + ... } + >>> + >>> t2 = { + ... "coordinates": [ + ... {"x": 6, "y": 6}, + ... {"x": 88, "y": 88} + ... ] + ... } + >>> DeepDiff(t1, t2, custom_operators=[L2DistanceDifferWithPreventDefault( + ... ["^root\\['coordinates'\\]\\[\\d+\\]$"], + ... 1 + ... )]) + {'distance_too_far': {"root['coordinates'][0]": {'l2_distance': 1.4142135623730951}, "root['coordinates'][1]": {'l2_distance': 113.13708498984761}}} + + +**Example 3: If the objects are subclasses of a certain type, only compare them if their list attributes are not equal sets** + + >>> class CustomClass: + ... def __init__(self, d: dict, l: list): + ... self.dict = d + ... self.dict['list'] = l + ... + >>> + >>> custom1 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3]) + >>> custom2 = CustomClass(d=dict(c=3, d=4), l=[1, 2, 3, 2]) + >>> custom3 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3, 4]) + >>> + >>> + >>> class ListMatchOperator(BaseOperator): + ... def give_up_diffing(self, level, diff_instance): + ... if set(level.t1.dict['list']) == set(level.t2.dict['list']): + ... return True + ... + >>> + >>> DeepDiff(custom1, custom2, custom_operators=[ + ... ListMatchOperator(types=[CustomClass]) + ... ]) + {} + >>> + >>> + >>> DeepDiff(custom2, custom3, custom_operators=[ + ... ListMatchOperator(types=[CustomClass]) + ... ]) + {'dictionary_item_added': [root.dict['a'], root.dict['b']], 'dictionary_item_removed': [root.dict['c'], root.dict['d']], 'values_changed': {"root.dict['list'][3]": {'new_value': 4, 'old_value': 2}}} + >>> + +**Example 4: Only diff certain paths** + + >>> from deepdiff import DeepDiff + >>> class MyOperator: + ... def __init__(self, include_paths): + ... self.include_paths = include_paths + ... def match(self, level) -> bool: + ... return True + ... def give_up_diffing(self, level, diff_instance) -> bool: + ... return level.path() not in self.include_paths + ... + >>> + >>> t1 = {'a': [10, 11], 'b': [20, 21], 'c': [30, 31]} + >>> t2 = {'a': [10, 22], 'b': [20, 33], 'c': [30, 44]} + >>> + >>> DeepDiff(t1, t2, custom_operators=[ + ... MyOperator(include_paths="root['a'][1]") + ... ]) + {'values_changed': {"root['a'][1]": {'new_value': 22, 'old_value': 11}}} + +**Example 5: Give up further diffing once the first diff is found** + +Sometimes all you care about is that there is a difference between 2 objects and not all the details of what exactly is different. +In that case you may want to stop diffing as soon as the first diff is found. + + >>> from deepdiff import DeepDiff + >>> class MyOperator: + ... def match(self, level) -> bool: + ... return True + ... def give_up_diffing(self, level, diff_instance) -> bool: + ... return any(diff_instance.tree.values()) + ... + >>> t1 = [[1, 2], [3, 4], [5, 6]] + >>> t2 = [[1, 3], [3, 5], [5, 7]] + >>> + >>> DeepDiff(t1, t2, custom_operators=[ + ... MyOperator() + ... ]) + {'values_changed': {'root[0][1]': {'new_value': 3, 'old_value': 2}}} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/deep_distance.rst b/deepdiff/docstrings/deep_distance.rst new file mode 100644 index 00000000..09179b7b --- /dev/null +++ b/deepdiff/docstrings/deep_distance.rst @@ -0,0 +1,119 @@ +:doc:`/index` + +.. _deep_distance_label: + +Deep Distance +============= + + +Deep Distance is the distance between 2 objects. It is a floating point number between 0 and 1. Deep Distance in concept is inspired by `Levenshtein Edit Distance `_. + +At its core, the Deep Distance is the number of operations needed to convert one object to the other divided by the sum of the sizes of the 2 objects capped at 1. Note that unlike Levenshtein Distance, the Deep Distance is based on the number of operations and NOT the “minimum” number of operations to convert one object to the other. The number is highly dependent on the granularity of the diff results. And the granularity is controlled by the parameters passed to DeepDiff. + +.. _get_deep_distance_label: + +Get Deep Distance +----------------- + +get_deep_distance: Boolean, default = False + get_deep_distance will get you the deep distance between objects. The distance is a number between 0 and 1 where zero means there is no diff between the 2 objects and 1 means they are very different. Note that this number should only be used to compare the similarity of 2 objects and nothing more. The algorithm for calculating this number may or may not change in the future releases of DeepDiff. + + The value of Deep Distance will show up in the result diff object's deep_distance key. + + >>> from deepdiff import DeepDiff + >>> DeepDiff(10.0, 10.1, get_deep_distance=True) + {'values_changed': {'root': {'new_value': 10.1, 'old_value': 10.0}}, 'deep_distance': 0.0014925373134328302} + >>> DeepDiff(10.0, 100.1, get_deep_distance=True) + {'values_changed': {'root': {'new_value': 100.1, 'old_value': 10.0}}, 'deep_distance': 0.24550408719346048} + >>> DeepDiff(10.0, 1000.1, get_deep_distance=True) + {'values_changed': {'root': {'new_value': 1000.1, 'old_value': 10.0}}, 'deep_distance': 0.29405999405999406} + >>> DeepDiff([1], [1], get_deep_distance=True) + {} + >>> DeepDiff([1], [1, 2], get_deep_distance=True) + {'iterable_item_added': {'root[1]': 2}, 'deep_distance': 0.2} + >>> DeepDiff([1], [1, 2, 3], get_deep_distance=True) + {'iterable_item_added': {'root[1]': 2, 'root[2]': 3}, 'deep_distance': 0.3333333333333333} + >>> DeepDiff([[2, 1]], [[1, 2, 3]], ignore_order=True, get_deep_distance=True) + {'iterable_item_added': {'root[0][2]': 3}, 'deep_distance': 0.1111111111111111} + +.. _distance_and_diff_granularity_label: + +Distance And Diff Granularity +----------------------------- + +.. note:: + Deep Distance of objects are highly dependent on the diff object that is produced. A diff object that is more granular will give more accurate Deep Distance value too. + +Let's use the following 2 deeply nested objects as an example. If you ignore the order of items, they are very similar and only differ in a few elements. + +We will run 2 diffs and ask for the deep distance. The only difference between the below 2 diffs is that in the first one the :ref:`cutoff_intersection_for_pairs_label` is not passed so the default value of 0.3 is used while in the other one cutoff_intersection_for_pairs=1 is used which forces extra pass calculations. + +>>> from pprint import pprint +>>> t1 = [ +... { +... "key3": [[[[[[[[[[1, 2, 4, 5]]], [[[8, 7, 3, 5]]]]]]]]]], +... "key4": [7, 8] +... }, +... { +... "key5": "val5", +... "key6": "val6" +... } +... ] +>>> +>>> t2 = [ +... { +... "key5": "CHANGE", +... "key6": "val6" +... }, +... { +... "key3": [[[[[[[[[[1, 3, 5, 4]]], [[[8, 8, 1, 5]]]]]]]]]], +... "key4": [7, 8] +... } +... ] + +We don't pass cutoff_intersection_for_pairs in the first diff. + +>>> diff1=DeepDiff(t1, t2, ignore_order=True, cache_size=5000, get_deep_distance=True) +>>> pprint(diff1) +{'deep_distance': 0.36363636363636365, + 'values_changed': {'root[0]': {'new_value': {'key5': 'CHANGE', 'key6': 'val6'}, + 'old_value': {'key3': [[[[[[[[[[1, 2, 4, 5]]], + [[[8, + 7, + 3, + 5]]]]]]]]]], + 'key4': [7, 8]}}, + 'root[1]': {'new_value': {'key3': [[[[[[[[[[1, 3, 5, 4]]], + [[[8, + 8, + 1, + 5]]]]]]]]]], + 'key4': [7, 8]}, + 'old_value': {'key5': 'val5', 'key6': 'val6'}}}} + +Note that the stats show that only 5 set of objects were compared with each other according to the DIFF COUNT: + +>>> diff1.get_stats() +{'PASSES COUNT': 0, 'DIFF COUNT': 5, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} + +Let's pass cutoff_intersection_for_pairs=1 to enforce pass calculations. As you can see the results are way more granular and the deep distance value is way more accurate now. + +>>> diff2=DeepDiff(t1, t2, ignore_order=True, cache_size=5000, cutoff_intersection_for_pairs=1, get_deep_distance=True) +>>> from pprint import pprint +>>> pprint(diff2) +{'deep_distance': 0.06060606060606061, + 'iterable_item_removed': {"root[0]['key3'][0][0][0][0][0][0][1][0][0][1]": 7}, + 'values_changed': {"root[0]['key3'][0][0][0][0][0][0][0][0][0][1]": {'new_value': 3, + 'old_value': 2}, + "root[0]['key3'][0][0][0][0][0][0][1][0][0][2]": {'new_value': 1, + 'old_value': 3}, + "root[1]['key5']": {'new_value': 'CHANGE', + 'old_value': 'val5'}}} + +As you can see now way more calculations have happened behind the scene. Instead of only 5 set of items being compared with each other, we have 306 items that are compared with each other in 110 passes. + +>>> diff2.get_stats() +{'PASSES COUNT': 110, 'DIFF COUNT': 306, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/deephash.rst b/deepdiff/docstrings/deephash.rst new file mode 100644 index 00000000..fd5fbda4 --- /dev/null +++ b/deepdiff/docstrings/deephash.rst @@ -0,0 +1,14 @@ +:doc:`/index` + +DeepHash +======== + +.. toctree:: + :maxdepth: 3 + +.. automodule:: deepdiff.deephash + +.. autoclass:: DeepHash + :members: + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/deephash_doc.rst b/deepdiff/docstrings/deephash_doc.rst new file mode 100644 index 00000000..7039281f --- /dev/null +++ b/deepdiff/docstrings/deephash_doc.rst @@ -0,0 +1,388 @@ +:orphan: + +**DeepHash** + +DeepHash calculates the hash of objects based on their contents in a deterministic way. +This way 2 objects with the same content should have the same hash. + +The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. +For example you can use DeepHash to calculate the hash of a set or a dictionary! + +At the core of it, DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses SHA256. You have the option to pass any other hashing function to be used instead. + +**Import** + >>> from deepdiff import DeepHash + +**Parameters** + +obj : any object, The object to be hashed based on its content. + + +apply_hash: Boolean, default = True + DeepHash at its core is doing deterministic serialization of objects into strings. + Then it hashes the string. + The only time you want the apply_hash to be False is if you want to know what + the string representation of your object is BEFORE it gets hashed. + + +exclude_types: list, default = None + List of object types to exclude from hashing. + + +exclude_paths: list, default = None + List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. + + +include_paths: list, default = None + List of the only paths to include in the report. If only one item, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. + + +exclude_regex_paths: list, default = None + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one regex path. + + +exclude_obj_callback + function, default = None + A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. + This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. + + +encodings: List, default = None + Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out ignore_encoding_errors if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] + + +hashes: dictionary, default = empty dictionary + A dictionary of {object or object id: object hash} to start with. + Any object that is encountered and it is already in the hashes dictionary or its id is in the hashes dictionary, + will re-use the hash that is provided by this dictionary instead of re-calculating + its hash. This is typically used when you have a series of objects to be hashed and there might be repeats of the same object. + + +hasher: function. default = DeepHash.sha256hex + hasher is the hashing function. The default is DeepHash.sha256hex. + But you can pass another hash function to it if you want. + For example a cryptographic hash function or Python's builtin hash function. + All it needs is a function that takes the input in string format and returns the hash. + + You can use it by passing: hasher=hash for Python's builtin hash. + + The following alternative is already provided: + + - hasher=DeepHash.sha1hex + + Note that prior to DeepDiff 5.2, Murmur3 was the default hash function. + But Murmur3 is removed from DeepDiff dependencies since then. + + +ignore_repetition: Boolean, default = True + If repetitions in an iterable should cause the hash of iterable to be different. + Note that the deepdiff diffing functionality lets this to be the default at all times. + But if you are using DeepHash directly, you can set this parameter. + + +ignore_type_in_groups + Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. + The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. + + For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + + 1. Set ignore_string_type_changes=True which is the default. + 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + + Now what if you want also typeA and typeB to be ignored when comparing agains each other? + + 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] + 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + +ignore_string_type_changes: Boolean, default = True + string type conversions should not affect the hash output when this is set to True. + For example "Hello" and b"Hello" should produce the same hash. + + By setting it to True, both the string and bytes of hello return the same hash. + + +ignore_numeric_type_changes: Boolean, default = False + numeric type conversions should not affect the hash output when this is set to True. + For example 10, 10.0 and Decimal(10) should produce the same hash. + When ignore_numeric_type_changes is set to True, all numbers are converted + to strings with the precision of significant_digits parameter and number_format_notation notation. + If no significant_digits is passed by the user, a default value of 12 is used. + + +ignore_type_subclasses + Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. + + +ignore_string_case + Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. + + +ignore_private_variables: Boolean, default = True + Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). + + +ignore_encoding_errors: Boolean, default = False + If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. + +ignore_iterable_order: Boolean, default = True + If order of items in an iterable should not cause the hash of the iterable to be different. + +number_format_notation : string, default="f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + + +significant_digits : int >= 0, default=None + By default the significant_digits compares only that many digits AFTER the decimal point. However you can set override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. + + Important: This will affect ANY number comparison when it is set. + + Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 12. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. + + Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. + + Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + + For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + + When you set the number_format_notation="e", we use "{:.Xe}".format(Your Number) where X=significant_digits. + +truncate_datetime: string, default = None + Can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it + + + +**Returns** + A dictionary of {item: item hash}. + If your object is nested, it will build hashes of all the objects it contains too. + + +.. note:: + DeepHash output is not like conventional hash functions. It is a dictionary of object IDs to their hashes. This happens because DeepHash calculates the hash of the object and any other objects found within the object in a recursive manner. If you only need the hash of the object you are passing, all you need to do is to do: + + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj)[obj] # doctest: +SKIP + + +**Examples** + +Let's say you have a dictionary object. + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + +If you try to hash it: + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + +But with DeepHash: + + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) # doctest: +SKIP + + So what is exactly the hash of obj in this case? + DeepHash is calculating the hash of the obj and any other object that obj contains. + The output of DeepHash is a dictionary of object IDs to their hashes. + In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + + >>> hashes = DeepHash(obj) + >>> hashes[obj] + 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' + + Which you can write as: + + >>> hashes = DeepHash(obj)[obj] + + At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. + + If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. + + If you do a deep copy of the obj, it should still give you the same hash: + + >>> from copy import deepcopy + >>> obj2 = deepcopy(obj) + >>> DeepHash(obj2)[obj2] + 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' + + Note that by default DeepHash will include string type differences. So if your strings were bytes: + + >>> obj3 = {1: 2, b'a': b'b'} + >>> DeepHash(obj3)[obj3] + '71db3231177d49f78b52a356ca206e6179417b681604d00ed703a077049e3300' + + But if you want the same hash if string types are different, set ignore_string_type_changes to True: + + >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] + 'e60c2befb84be625037c75e1e26d0bfc85a0ffc1f3cde9500f68f6eac55e5ad6' + + ignore_numeric_type_changes is by default False too. + + >>> from decimal import Decimal + >>> obj1 = {4:10} + >>> obj2 = {4.0: Decimal(10.0)} + >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] + False + + But by setting it to True, we can get the same hash. + + >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] + True + +number_format_notation: String, default = "f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + + +ignore_string_type_changes: Boolean, default = True + By setting it to True, both the string and bytes of hello return the same hash. + + >>> DeepHash(b'hello', ignore_string_type_changes=True)[b'hello'] + '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' + >>> DeepHash('hello', ignore_string_type_changes=True)['hello'] + '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' + + +ignore_numeric_type_changes: Boolean, default = False + For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 + + That way they both produce the same hash. + + >>> t1 = {1: 1, 2: 2.22} + >>> DeepHash(t1)[1] + 'c1800a30c736483f13615542e7096f7973631fef8ca935ee1ed9f35fb06fd44e' + >>> DeepHash(t1, ignore_numeric_type_changes=True)[1] == DeepHash(t1, ignore_numeric_type_changes=True)[1.0] + True + + You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + + >>> from deepdiff import DeepDiff + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d1[burrito] == d2[taco] + True + + +ignore_type_subclasses + Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. + + >>> from deepdiff import DeepHash + >>> + >>> class ClassB: + ... def __init__(self, x): + ... self.x = x + ... def __repr__(self): + ... return "obj b" + ... + >>> + >>> class ClassC(ClassB): + ... def __repr__(self): + ... return "obj c" + ... + >>> obj_b = ClassB(1) + >>> obj_c = ClassC(1) + >>> + >>> # By default, subclasses are considered part of the type group. + ... # ignore_type_in_groups=[(ClassB, )] matches ClassC too since it's a subclass. + ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )]) + >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )]) + >>> hashes_b[obj_b] == hashes_c[obj_c] + True + >>> + >>> # With ignore_type_subclasses=True, only exact type matches count. + ... # ClassC no longer matches (ClassB, ) group, so hashes differ. + ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) + >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) + >>> hashes_b[obj_b] != hashes_c[obj_c] + True + +ignore_string_case + Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. + + >>> from deepdiff import DeepHash + >>> DeepHash('hello')['hello'] == DeepHash('heLLO')['heLLO'] + False + >>> DeepHash('hello', ignore_string_case=True)['hello'] == DeepHash('heLLO', ignore_string_case=True)['heLLO'] + True + +exclude_obj_callback + function, default = None + A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. + This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. + + >>> def exclude_obj_callback(obj, path): + ... return True if isinstance(obj, str) and obj in ('x', 'y') else False + ... + >>> dic1 = {"x": 1, "y": 2, "z": 3} + >>> t1 = [dic1] + >>> t1_hash = DeepHash(t1, exclude_obj_callback=exclude_obj_callback) + >>> + >>> dic2 = {"z": 3} + >>> t2 = [dic2] + >>> t2_hash = DeepHash(t2, exclude_obj_callback=exclude_obj_callback) + >>> + >>> t1_hash[t1] == t2_hash[t2] + True + +number_format_notation : string, default="f" + When numbers are converted to the string, you have the choices between "f" as fixed point and "e" as scientific notation: + + >>> t1=10002 + >>> t2=10004 + >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="f") + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="f") + >>> + >>> t1_hash[t1] == t2_hash[t2] + False + >>> + >>> + >>> # Now we use the scientific notation + ... t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e") + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e") + >>> + >>> t1_hash[t1] == t2_hash[t2] + True + +Defining your own number_to_string_func + Lets say you want the hash of numbers below 100 to be the same for some reason. + + >>> from deepdiff import DeepHash + >>> from deepdiff.helper import number_to_string + >>> def custom_number_to_string(number, *args, **kwargs): + ... number = 100 if number < 100 else number + ... return number_to_string(number, *args, **kwargs) + ... + >>> t1 = [10, 12, 100000] + >>> t2 = [50, 63, 100021] + >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) + >>> t1_hash[t1] == t2_hash[t2] + True + + So both lists produced the same hash thanks to the low significant digits for 100000 vs 100021 and also the custom_number_to_string that converted all numbers below 100 to be 100! diff --git a/deepdiff/docstrings/delta.rst b/deepdiff/docstrings/delta.rst new file mode 100644 index 00000000..a8ab0d3f --- /dev/null +++ b/deepdiff/docstrings/delta.rst @@ -0,0 +1,539 @@ +.. _delta_label: + +Delta +===== + +DeepDiff Delta is a directed delta that when applied to t1 can yield t2 where delta is the difference between t1 and t2. +Delta objects are like git commits but for structured data. +You can convert the diff results into Delta objects, store the deltas, and later apply to other objects. + +.. note:: + If you plan to generate Delta objects from the DeepDiff result, and ignore_order=True, you need to also set the report_repetition=True. + +**Parameters** + +diff : Delta dictionary, Delta dump payload or a DeepDiff object, default=None. + :ref:`delta_diff_label` is the content to be loaded. + +delta_path : String, default=None. + :ref:`delta_path_label` is the local path to the delta dump file to be loaded + +delta_file : File Object, default=None. + :ref:`delta_file_label` is the file object containing the delta data. + +delta_diff : Delta diff, default=None. + This is a slightly different diff than the output of DeepDiff. When Delta object is initiated from the DeepDiff output, it transforms the diff into a slightly different structure that is more suitable for delta. You can find that object via delta.diff. + It is the same object that is serialized when you create a delta dump. If you already have the delta_diff object, you can pass it to Delta via the delta_diff parameter. + +flat_dict_list : List of flat dictionaries, default=None, + :ref:`flat_dict_list_label` can be used to load the delta object from a list of flat dictionaries. + +.. note:: + You need to pass only one of the diff, delta_path, or delta_file parameters. + +deserializer : Deserializer function, default=pickle_load + :ref:`delta_deserializer_label` is the function to deserialize the delta content. The default is the pickle_load function that comes with DeepDiff. + +serializer : Serializer function, default=pickle_dump + :ref:`delta_serializer_label` is the function to serialize the delta content into a format that can be stored. The default is the pickle_dump function that comes with DeepDiff. + +log_errors : Boolean, default=True + Whether to log the errors or not when applying the delta object. + +raise_errors : Boolean, default=False + :ref:`raise_errors_label` + Whether to raise errors or not when applying a delta object. + +mutate : Boolean, default=False. + :ref:`delta_mutate_label` defines whether to mutate the original object when adding the delta to it or not. + Note that this parameter is not always successful in mutating. For example if your original object + is an immutable type such as a frozenset or a tuple, mutation will not succeed. + Hence it is recommended to keep this parameter as the default value of False unless you are sure + that you do not have immutable objects. There is a small overhead of doing deepcopy on the original + object when mutate=False. If performance is a concern and modifying the original object is not a big deal, + set the mutate=True but always reassign the output back to the original object. + +safe_to_import : Set, default=None. + :ref:`delta_safe_to_import_label` is a set of modules that needs to be explicitly white listed to be loaded + Example: {'mymodule.MyClass', 'decimal.Decimal'} + Note that this set will be added to the basic set of modules that are already white listed. + The set of what is already white listed can be found in deepdiff.serialization.SAFE_TO_IMPORT + +bidirectional : Boolean, default=False + :ref:`delta_verify_symmetry_label` is used to verify that the original value of items are the same as when the delta was created. Note that in order for this option to work, the delta object will need to store more data and thus the size of the object will increase. Let's say that the diff object says root[0] changed value from X to Y. If you create the delta with the default value of bidirectional=False, then what delta will store is root[0] = Y. And if this delta was applied to an object that has any root[0] value, it will still set the root[0] to Y. However if bidirectional=True, then the delta object will store also that the original value of root[0] was X and if you try to apply the delta to an object that has root[0] of any value other than X, it will notify you. + +force : Boolean, default=False + :ref:`delta_force_label` is used to force apply a delta to objects that have a different structure than what the delta was originally created from. + +always_include_values : Boolean, default=False + :ref:`always_include_values_label` is used to make sure the delta objects includes the values that were changed. Sometime Delta tries to be efficient not include the values when it can get away with it. By setting this parameter to True, you ensure that the Delta object will include the values. + +.. _delta_fill: + +fill : Any, default=No Fill + This is only relevant if `force` is set. This parameter only applies when force is set and trying to fill an existing array. If the index of the array being applied is larger than the length of the array this value will be used to fill empty spaces of the array to extend it in order to add the new value. If this parameter is not set, the items will get dropped and the array not extended. If this parameter is set with a callable function, it will get called each time a fill item is needed. It will be provided with three arguments: first argument is the array being filled, second argument is the value that is being added to the array, the third argument is the path that is being added. + Example function: `def fill(obj, value, path): return "Camry" if "car" in path else None` + + +**Returns** + + A delta object that can be added to t1 to recreate t2. + + Delta objects can contain the following vocabulary: + + iterable_item_added + iterable_item_moved + iterable_item_removed + set_item_added + set_item_removed + dictionary_item_added + dictionary_item_removed + attribute_added + attribute_removed + type_changes + values_changed + iterable_items_added_at_indexes + iterable_items_removed_at_indexes + + +.. _delta_diff_label: + +Diff to load in Delta +--------------------- + +diff : Delta dictionary, Delta dump payload or a DeepDiff object, default=None. + diff is the content to be loaded. + +>>> from deepdiff import DeepDiff, Delta +>>> from pprint import pprint +>>> +>>> t1 = [1, 2, 3] +>>> t2 = ['a', 2, 3, 4] +>>> diff = DeepDiff(t1, t2) +>>> diff +{'type_changes': {'root[0]': {'old_type': , 'new_type': , 'old_value': 1, 'new_value': 'a'}}, 'iterable_item_added': {'root[3]': 4}} +>>> delta = Delta(diff) +>>> delta # doctest: +SKIP + + +Applying the delta object to t1 will yield t2: + +>>> t1 + delta +['a', 2, 3, 4] +>>> t1 + delta == t2 +True + +If we want to subtract a delta, we need to create a bidirectional delta: + +>>> delta = Delta(diff, bidirectional=True) +>>> t2 - delta +[1, 2, 3] +>>> t2 - delta == t1 +True + +Now let's dump the delta object so we can store it. + +>>> dump = delta.dumps() +>>> +>>> dump # doctest: +SKIP + +The dumps() function gives us the serialized content of the delta in the form of bytes. We could store it however we want. Or we could use the dump(file_object) to write the dump to the file_object instead. But before we try the dump(file_object) method, let's create a new Delta object and reapply it to t1 and see if we still get t2: + +>>> delta2 = Delta(dump) +>>> t1 + delta2 == t2 +True +>>> + +.. _delta_path_label: + +Delta Path parameter +-------------------- + +Ok now we can try the dumps(file_object). It does what you expect: + +>>> with open('/tmp/delta1', 'wb') as dump_file: +... delta.dump(dump_file) +... + +And we use the delta_path parameter to load the delta + +>>> delta3 = Delta(delta_path='/tmp/delta1') + +It still gives us the same result when applied. + +>>> t1 + delta3 == t2 +True + + +.. _delta_file_label: + +Delta File parameter +-------------------- + +You can also pass a file object containing the delta dump: + +>>> with open('/tmp/delta1', 'rb') as dump_file: +... delta4 = Delta(delta_file=dump_file) +... +>>> t1 + delta4 == t2 +True + + +.. _flat_dict_list_label: + +Flat Dict List +-------------- + +You can create a delta object from the list of flat dictionaries that are produced via :ref:`to_flat_dicts_label`. Read more on :ref:`delta_from_flat_dicts_label`. + +.. _flat_rows_list_label: + +Flat Rows List +-------------- + +You can create a delta object from the list of flat dictionaries that are produced via :ref:`delta_to_flat_rows_label`. Read more on :ref:`delta_to_flat_rows_label`. + + +.. _delta_deserializer_label: + +Delta Deserializer +------------------ + +DeepDiff by default uses a restricted Python pickle function to deserialize the Delta dumps. Read more about :ref:`delta_dump_safety_label`. + +The user of Delta can decide to switch the serializer and deserializer to their custom ones. The serializer and deserializer parameters can be used exactly for that reason. The best way to come up with your own serializer and deserializer is to take a look at the `pickle_dump and pickle_load functions in the serializer module `_ + +.. _delta_json_deserializer_label: + +Json Deserializer for Delta +``````````````````````````` + +If all you deal with are Json serializable objects, you can use json for serialization. + +>>> from deepdiff import DeepDiff, Delta +>>> from deepdiff.serialization import json_dumps, json_loads +>>> t1 = {"a": 1} +>>> t2 = {"a": 2} +>>> +>>> diff = DeepDiff(t1, t2) +>>> delta = Delta(diff, serializer=json_dumps) +>>> dump = delta.dumps() +>>> dump +'{"values_changed":{"root[\'a\']":{"new_value":2}}}' +>>> delta_reloaded = Delta(dump, deserializer=json_loads) +>>> t2 == delta_reloaded + t1 +True + + +.. note:: + + Json is very limited and easily you can get to deltas that are not json serializable. You will probably want to extend the Python's Json serializer to support your needs. + + >>> import json + >>> t1 = {"a": 1} + >>> t2 = {"a": None} + >>> diff = DeepDiff(t1, t2) + >>> diff + {'type_changes': {"root['a']": {'old_type': , 'new_type': , 'old_value': 1, 'new_value': None}}} + >>> Delta(diff, serializer=json.dumps) # doctest: +SKIP + + >>> delta = Delta(diff, serializer=json.dumps) + >>> dump = delta.dumps() # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + TypeError: Object of type type is not JSON serializable... + +.. _delta_serializer_label: + +Delta Serializer +---------------- + +DeepDiff uses pickle to serialize delta objects by default. Please take a look at the :ref:`delta_deserializer_label` for more information. + + +.. _to_flat_dicts_label: + +Delta Serialize To Flat Dictionaries +------------------------------------ + +Read about :ref:`delta_to_flat_dicts_label` + +.. _delta_dump_safety_label: + +Delta Dump Safety +----------------- + +Delta by default uses Python's pickle to serialize and deserialize. While the unrestricted use of pickle is not safe as noted in the `pickle's documentation `_ , DeepDiff's Delta is written with extra care to `restrict the globals `_ and hence mitigate this security risk. + +In fact only a few Python object types are allowed by default. The user of DeepDiff can pass additional types using the :ref:`delta_safe_to_import_label` to allow further object types that need to be allowed. + + +.. _delta_mutate_label: + +Delta Mutate parameter +---------------------- + +mutate : Boolean, default=False. + delta_mutate defines whether to mutate the original object when adding the delta to it or not. + Note that this parameter is not always successful in mutating. For example if your original object + is an immutable type such as a frozenset or a tuple, mutation will not succeed. + Hence it is recommended to keep this parameter as the default value of False unless you are sure + that you do not have immutable objects. There is a small overhead of doing deepcopy on the original + object when mutate=False. If performance is a concern and modifying the original object is not a big deal, + set the mutate=True but always reassign the output back to the original object. + +For example: + +>>> t1 = [1, 2, [3, 5, 6]] +>>> t2 = [2, 3, [3, 6, 8]] + +>>> diff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) +>>> diff +{'values_changed': {'root[0]': {'new_value': 3, 'old_value': 1}, 'root[2][1]': {'new_value': 8, 'old_value': 5}}} +>>> delta = Delta(diff) +>>> delta # doctest: +SKIP + + +Note that we can apply delta to objects different than the original objects they were made from: + +>>> t3 = ["a", 2, [3, "b", "c"]] +>>> t3 + delta +[3, 2, [3, 8, 'c']] + +If we check t3, it is still the same as the original value of t3: + +>>> t3 +['a', 2, [3, 'b', 'c']] + +Now let's make the delta with mutate=True + +>>> delta2 = Delta(diff, mutate=True) +>>> t3 + delta2 +[3, 2, [3, 8, 'c']] +>>> t3 +[3, 2, [3, 8, 'c']] + +Applying the delta to t3 mutated the t3 itself in this case! + + +.. _delta_and_numpy_label: + +Delta and Numpy +--------------- + +>>> from deepdiff import DeepDiff, Delta +>>> import numpy as np +>>> t1 = np.array([1, 2, 3, 5]) +>>> t2 = np.array([2, 2, 7, 5]) +>>> diff = DeepDiff(t1, t2) +>>> diff +{'values_changed': {'root[0]': {'new_value': np.int64(2), 'old_value': np.int64(1)}, 'root[2]': {'new_value': np.int64(7), 'old_value': np.int64(3)}}} +>>> delta = Delta(diff) + +.. note:: + When applying delta to Numpy arrays, make sure to put the delta object first and the numpy array second. This is because Numpy array overrides the + operator and thus DeepDiff's Delta won't be able to be applied. + + >>> t1 + delta + Traceback (most recent call last): + File "", line 1, in + raise DeltaNumpyOperatorOverrideError(DELTA_NUMPY_OPERATOR_OVERRIDE_MSG) + deepdiff.delta.DeltaNumpyOperatorOverrideError: A numpy ndarray is most likely being added to a delta. Due to Numpy override the + operator, you can only do: delta + ndarray and NOT ndarray + delta + +Let's put the delta first then: + +>>> delta + t1 +array([2, 2, 7, 5]) +>>> delta + t2 == t2 +array([ True, True, True, True]) + + +.. note:: + You can apply a delta that was created from normal Python objects to Numpy arrays. But it is not recommended. + +.. _raise_errors_label: + +Delta Raise Errors parameter +---------------------------- + +raise_errors : Boolean, default=False + Whether to raise errors or not when applying a delta object. + +>>> from deepdiff import DeepDiff, Delta +>>> t1 = [1, 2, [3, 5, 6]] +>>> t2 = [2, 3, [3, 6, 8]] +>>> diff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) +>>> delta = Delta(diff, raise_errors=False) + +Now let's apply the delta to a very different object: + +>>> t3 = [1, 2, 3, 5] +>>> t4 = t3 + delta # doctest: +SKIP + +We get a log message that it was unable to get the item at root[2][1]. We get the message since by default log_errors=True + +Let's see what t4 is now: + +>>> t4 # doctest: +SKIP +[3, 2, 3, 5] + +So the delta was partially applied on t3. + +Now let's set the raise_errors=True + +>>> delta2 = Delta(diff, raise_errors=True) +>>> +>>> t3 + delta2 # doctest: +ELLIPSIS +Traceback (most recent call last): + ... +deepdiff.delta.DeltaError: Unable to get the item at root[2][1] + + +.. _delta_safe_to_import_label: + +Delta Safe To Import parameter +------------------------------ + +safe_to_import : Set, default=None. + safe_to_import is a set of modules that needs to be explicitly white listed to be loaded + Example: {'mymodule.MyClass', 'decimal.Decimal'} + Note that this set will be added to the basic set of modules that are already white listed. + + +As noted in :ref:`delta_dump_safety_label` and :ref:`delta_deserializer_label`, DeepDiff's Delta takes safety very seriously and thus limits the globals that can be deserialized when importing. However on occasions that you need a specific type (class) that needs to be used in delta objects, you need to pass it to the Delta via safe_to_import parameter. + +The set of what is already white listed can be found in deepdiff.serialization.SAFE_TO_IMPORT +At the time of writing this document, this list consists of: + +>>> from deepdiff.serialization import SAFE_TO_IMPORT +>>> from pprint import pprint +>>> pprint(SAFE_TO_IMPORT) # doctest: +SKIP +frozenset({'builtins.None', + 'builtins.bin', + 'builtins.bool', + ...}) + +If you want to pass any other argument to safe_to_import, you will need to put the full path to the type as it appears in the sys.modules + +For example let's say you have a package call mypackage and has a module called mymodule. If you check the sys.modules, the address to this module must be mypackage.mymodule. In order for Delta to be able to serialize this object via pickle, first of all it has to be `picklable `_. + +>>> diff = DeepDiff(t1, t2) +>>> delta = Delta(diff) +>>> dump = delta.dumps() + +The dump at this point is serialized via Pickle and can be written to disc if needed. + +Later when you want to load this dump, by default Delta will block you from importing anything that is NOT in deepdiff.serialization.SAFE_TO_IMPORT . In fact it will show you this error message when trying to load this dump: + + deepdiff.serialization.ForbiddenModule: Module 'builtins.type' is forbidden. You need to explicitly pass it by passing a safe_to_import parameter + +In order to let Delta know that this specific module is safe to import, you will need to pass it to Delta during loading of this dump: + +>>> delta = Delta(dump, safe_to_import={'mypackage.mymodule'}) + +.. note :: + + If you pass a custom deserializer to Delta, DeepDiff will pass safe_to_import parameter to the custom deserializer if that deserializer takes safe_to_import as a parameter in its definition. + For example if you just use json.loads as deserializer, the safe_to_import items won't be passed to it since json.loads does not have such a parameter. + + +.. _delta_verify_symmetry_label: + +Delta Verify Symmetry parameter +------------------------------- + +bidirectional : Boolean, default=False + bidirectional is used to to include all the required information so that we can use the delta object both for addition and subtraction. It will also check that the object you are adding the delta to, has the same values as the original object that the delta was created from. + + It complains if the object is not what it expected to be. + + +>>> from deepdiff import DeepDiff, Delta +>>> t1 = [1] +>>> t2 = [2] +>>> t3 = [3] +>>> +>>> diff = DeepDiff(t1, t2) +>>> +>>> delta2 = Delta(diff, raise_errors=False, bidirectional=True) +>>> t4 = delta2 + t3 # doctest: +SKIP +>>> t4 # doctest: +SKIP +[2] + +And if you had set raise_errors=True, then it would have raised the error in addition to logging it. + + +.. _delta_force_label: + +Delta Force +----------- + +force : Boolean, default=False + force is used to force apply a delta to objects that have a different structure than what the delta was originally created from. + + +>>> from deepdiff import DeepDiff, Delta +>>> t1 = { +... 'x': { +... 'y': [1, 2, 3] +... }, +... 'q': { +... 'r': 'abc', +... } +... } +>>> +>>> t2 = { +... 'x': { +... 'y': [1, 2, 3, 4] +... }, +... 'q': { +... 'r': 'abc', +... 't': 0.5, +... } +... } +>>> +>>> diff = DeepDiff(t1, t2) +>>> diff +{'dictionary_item_added': ["root['q']['t']"], 'iterable_item_added': {"root['x']['y'][3]": 4}} +>>> delta = Delta(diff) +>>> {} + delta # doctest: +SKIP +{} + +Once we set the force to be True + +>>> delta = Delta(diff, force=True) +>>> {} + delta +{'x': {'y': {3: 4}}, 'q': {'t': 0.5}} + +Notice that the force attribute does not know the original object at ['x']['y'] was supposed to be a list, so it assumes it was a dictionary. + + +.. _always_include_values_label: + +Always Include Values +--------------------- + +always_include_values is used to make sure the delta objects includes the values that were changed. Sometime Delta tries to be efficient not include the values when it can get away with it. By setting this parameter to True, you ensure that the Delta object will include the values. + +For example, when the type of an object changes, if we can easily convert from one type to the other, the Delta object does not include the values: + + +>>> from deepdiff import DeepDiff, Delta +>>> diff = DeepDiff(t1=[1, 2], t2=[1, '2']) +>>> diff +{'type_changes': {'root[1]': {'old_type': , 'new_type': , 'old_value': 2, 'new_value': '2'}}} +>>> delta=Delta(diff) +>>> delta # doctest: +SKIP + + +As you can see the delta object does not include the values that were changed. Now let's pass always_include_values=True: + +>>> delta=Delta(diff, always_include_values=True) +>>> delta.diff +{'type_changes': {'root[1]': {'old_type': , 'new_type': , 'new_value': '2'}}} + +If we want to make sure the old values stay with delta, we pass bidirectional=True. By doing so we can also use the delta object to subtract from other objects. + +>>> delta=Delta(diff, always_include_values=True, bidirectional=True) +>>> delta.diff +{'type_changes': {'root[1]': {'old_type': , 'new_type': , 'old_value': 2, 'new_value': '2'}}} + diff --git a/deepdiff/docstrings/diff.rst b/deepdiff/docstrings/diff.rst new file mode 100644 index 00000000..23d67c9c --- /dev/null +++ b/deepdiff/docstrings/diff.rst @@ -0,0 +1,30 @@ +:doc:`/index` + +.. _deepdiff_label: + +DeepDiff +======== + +.. automodule:: deepdiff.diff + +.. autoclass:: DeepDiff + :members: + +.. toctree:: + :maxdepth: 3 + + basics + custom + deep_distance + exclude_paths + ignore_order + ignore_types_or_values + numbers + optimizations + other + serialization + stats + troubleshoot + view + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/diff_doc.rst b/deepdiff/docstrings/diff_doc.rst new file mode 100644 index 00000000..0f13bd9b --- /dev/null +++ b/deepdiff/docstrings/diff_doc.rst @@ -0,0 +1,233 @@ +:orphan: + +DeepDiff Module +=============== + +.. |qluster_link| raw:: html + + Qluster + +DeepDiff is now part of |qluster_link|. + +*If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* + +Deep Difference of dictionaries, iterables, strings and almost any other object. +It will recursively look for all the changes. + +.. Note:: + |:mega:| **Please fill out our** `fast 5-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| + +**Parameters** + +t1 : A dictionary, list, string or any python object that has __dict__ or __slots__ + This is the first item to be compared to the second item + +t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ + The second item is to be compared to the first one + +cutoff_distance_for_pairs : 1 >= float >= 0, default=0.3 + :ref:`cutoff_distance_for_pairs_label` What is the threshold to consider 2 items as pairs. + Note that it is only used when ignore_order = True. + +cutoff_intersection_for_pairs : 1 >= float >= 0, default=0.7 + :ref:`cutoff_intersection_for_pairs_label` What is the threshold to calculate pairs of items between 2 iterables. + For example 2 iterables that have nothing in common, do not need their pairs to be calculated. + Note that it is only used when ignore_order = True. + +cache_size : int >= 0, default=0 + :ref:`cache_size_label` Cache size to be used to improve the performance. A cache size of zero means it is disabled. + Using the cache_size can dramatically improve the diff performance especially for the nested objects at the cost of more memory usage. + +cache_purge_level: int, 0, 1, or 2. default=1 + :ref:`cache_purge_level` defines what objects in DeepDiff should be deleted to free the memory once the diff object is calculated. If this value is set to zero, most of the functionality of the diff object is removed and the most memory is released. A value of 1 preserves all the functionalities of the diff object. A value of 2 also preserves the cache and hashes that were calculated during the diff calculations. In most cases the user does not need to have those objects remained in the diff unless for investigation purposes. + +cache_tuning_sample_size : int >= 0, default = 0 + :ref:`cache_tuning_sample_size_label` This is an experimental feature. It works hands in hands with the :ref:`cache_size_label`. When cache_tuning_sample_size is set to anything above zero, it will sample the cache usage with the passed sample size and decide whether to use the cache or not. And will turn it back on occasionally during the diffing process. This option can be useful if you are not sure if you need any cache or not. However you will gain much better performance with keeping this parameter zero and running your diff with different cache sizes and benchmarking to find the optimal cache size. + +custom_operators : BaseOperator subclasses, default = None + :ref:`custom_operators_label` if you are considering whether they are fruits or not. In that case, you can pass a *custom_operators* for the job. + +default_timezone : datetime.timezone subclasses or pytz datetimes, default = datetime.timezone.utc + :ref:`default_timezone_label` defines the default timezone. If a datetime is timezone naive, which means it doesn't have a timezone, we assume the datetime is in this timezone. Also any datetime that has a timezone will be converted to this timezone so the datetimes can be compared properly all in the same timezone. Note that Python's default behavior assumes the default timezone is your local timezone. DeepDiff's default is UTC, not your local time zone. + +encodings: List, default = None + :ref:`encodings_label` Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out :ref:`ignore_encoding_errors_label` if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] + +exclude_paths: list, default = None + :ref:`exclude_paths_label` + List of paths to exclude from the report. If only one item, you can path it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. + +exclude_regex_paths: list, default = None + :ref:`exclude_regex_paths_label` + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. + +exclude_types: list, default = None + :ref:`exclude_types_label` + List of object types to exclude from the report. + +exclude_obj_callback: function, default = None + :ref:`exclude_obj_callback_label` + A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. + This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. + +exclude_obj_callback_strict: function, default = None + :ref:`exclude_obj_callback_strict_label` + A function that works the same way as exclude_obj_callback, but excludes elements from the result only if the function returns True for both elements. + +include_paths: list, default = None + :ref:`include_paths_label` + List of the only paths to include in the report. If only one item is in the list, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. + +include_obj_callback: function, default = None + :ref:`include_obj_callback_label` + A function that takes the object and its path and returns a Boolean. If True is returned, the object is included in the results, otherwise it is excluded. + This is to give the user a higher level of control than one can achieve via include_paths. + +include_obj_callback_strict: function, default = None + :ref:`include_obj_callback_strict_label` + A function that works the same way as include_obj_callback, but includes elements in the result only if the function returns True for both elements. + +get_deep_distance: Boolean, default = False + :ref:`get_deep_distance_label` will get you the deep distance between objects. The distance is a number between 0 and 1 where zero means there is no diff between the 2 objects and 1 means they are very different. Note that this number should only be used to compare the similarity of 2 objects and nothing more. The algorithm for calculating this number may or may not change in the future releases of DeepDiff. + +group_by: String or a list of size 2, default=None + :ref:`group_by_label` can be used when dealing with the list of dictionaries. It converts them from lists to a single dictionary with the key defined by group_by. The common use case is when reading data from a flat CSV, and the primary key is one of the columns in the CSV. We want to use the primary key instead of the CSV row number to group the rows. The group_by can do 2D group_by by passing a list of 2 keys. + +group_by_sort_key: String or a function + :ref:`group_by_sort_key_label` is used to define how dictionaries are sorted if multiple ones fall under one group. When this parameter is used, group_by converts the lists of dictionaries into a dictionary of keys to lists of dictionaries. Then, :ref:`group_by_sort_key_label` is used to sort between the list. + +hasher: default = DeepHash.sha256hex + Hash function to be used. If you don't want SHA256, you can use your own hash function + by passing hasher=hash. This is for advanced usage and normally you don't need to modify it. + +ignore_order : Boolean, default=False + :ref:`ignore_order_label` ignores order of elements when comparing iterables (lists) + Normally ignore_order does not report duplicates and repetition changes. + In order to report repetitions, set report_repetition=True in addition to ignore_order=True + +ignore_order_func : Function, default=None + :ref:`ignore_order_func_label` Sometimes single *ignore_order* parameter is not enough to do a diff job, + you can use *ignore_order_func* to determine whether the order of certain paths should be ignored + +ignore_string_type_changes: Boolean, default = False + :ref:`ignore_string_type_changes_label` + Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. + +ignore_numeric_type_changes: Boolean, default = False + :ref:`ignore_numeric_type_changes_label` + Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. + +ignore_type_in_groups: Tuple or List of Tuples, default = None + :ref:`ignore_type_in_groups_label` + ignores types when t1 and t2 are both within the same type group. + +ignore_type_subclasses: Boolean, default = False + :ref:`ignore_type_subclasses_label` + ignore type (class) changes when dealing with the subclasses of classes that were marked to be ignored. + +.. Note:: + ignore_type_subclasses was incorrectly doing the reverse of its job up until DeepDiff 6.7.1 + Please make sure to flip it in your use cases, when upgrading from older versions to 7.0.0 or above. + +ignore_uuid_types: Boolean, default = False + :ref:`ignore_uuid_types_label` + Whether to ignore UUID vs string type differences when comparing. When set to True, comparing a UUID object with its string representation will not report as a type change. + +ignore_string_case: Boolean, default = False + :ref:`ignore_string_case_label` + Whether to be case-sensitive or not when comparing strings. By setting ignore_string_case=True, strings will be compared case-insensitively. + +ignore_nan_inequality: Boolean, default = False + :ref:`ignore_nan_inequality_label` + Whether to ignore float('nan') inequality in Python. + + +ignore_private_variables: Boolean, default = True + :ref:`ignore_private_variables_label` + Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). + + +ignore_encoding_errors: Boolean, default = False + :ref:`ignore_encoding_errors_label` If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the :ref:`encodings_label` parameter. + + +zip_ordered_iterables: Boolean, default = False + :ref:`zip_ordered_iterables_label`: + When comparing ordered iterables such as lists, DeepDiff tries to find the smallest difference between the two iterables to report. That means that items in the two lists are not paired individually in the order of appearance in the iterables. Sometimes, that is not the desired behavior. Set this flag to True to make DeepDiff pair and compare the items in the iterables in the order they appear. + +iterable_compare_func: + :ref:`iterable_compare_func_label`: + There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a iterable_compare_func that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return True if it is a match, False if it is not a match or raise CannotCompare if it is unable to compare the two. + + +log_frequency_in_sec: Integer, default = 0 + :ref:`log_frequency_in_sec_label` + How often to log the progress. The default of 0 means logging progress is disabled. + If you set it to 20, it will log every 20 seconds. This is useful only when running DeepDiff + on massive objects that will take a while to run. If you are only dealing with small objects, keep it at 0 to disable progress logging. + +log_scale_similarity_threshold: float, default = 0.1 + :ref:`use_log_scale_label` along with :ref:`log_scale_similarity_threshold_label` can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. + +log_stacktrace: Boolean, default = False + If True, we log the stacktrace when logging errors. Otherwise we only log the error message. + +max_passes: Integer, default = 10000000 + :ref:`max_passes_label` defined the maximum number of passes to run on objects to pin point what exactly is different. This is only used when ignore_order=True. A new pass is started each time 2 iterables are compared in a way that every single item that is different from the first one is compared to every single item that is different in the second iterable. + +max_diffs: Integer, default = None + :ref:`max_diffs_label` defined the maximum number of diffs to run on objects to pin point what exactly is different. This is only used when ignore_order=True + +math_epsilon: Decimal, default = None + :ref:`math_epsilon_label` uses Python's built in Math.isclose. It defines a tolerance value which is passed to math.isclose(). Any numbers that are within the tolerance will not report as being different. Any numbers outside of that tolerance will show up as different. + +number_format_notation : string, default="f" + :ref:`number_format_notation_label` is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + +number_to_string_func : function, default=None + :ref:`number_to_string_func_label` is an advanced feature to give the user the full control into overriding how numbers are converted to strings for comparison. The default function is defined in https://github.com/seperman/deepdiff/blob/master/deepdiff/helper.py and is called number_to_string. You can define your own function to do that. + +progress_logger: log function, default = logger.info + :ref:`progress_logger_label` defines what logging function to use specifically for progress reporting. This function is only used when progress logging is enabled which happens by setting log_frequency_in_sec to anything above zero. + +report_repetition : Boolean, default=False + :ref:`report_repetition_label` reports repetitions when set True + It only works when ignore_order is set to True too. + +significant_digits : int >= 0, default=None + :ref:`significant_digits_label` defines the number of digits AFTER the decimal point to be used in the comparison. However you can override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. + +truncate_datetime: string, default = None + :ref:`truncate_datetime_label` can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it + +threshold_to_diff_deeper: float, default = 0.33 + :ref:`threshold_to_diff_deeper_label` is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a new_value instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. + +use_enum_value: Boolean, default=False + :ref:`use_enum_value_label` makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. + +use_log_scale: Boolean, default=False + :ref:`use_log_scale_label` along with :ref:`log_scale_similarity_threshold_label` can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. + +verbose_level: 2 >= int >= 0, default = 1 + Higher verbose level shows you more details. + For example verbose level 1 shows what dictionary item are added or removed. + And verbose level 2 shows the value of the items that are added or removed too. + +view: string, default = text + :ref:`view_label` + Views are different "formats" of results. Each view comes with its own features. + The choices are text (the default) and tree. + The text view is the original format of the results. + The tree view allows you to traverse through the tree of results. So you can traverse through the tree and see what items were compared to what. + + +**Returns** + + A DeepDiff object that has already calculated the difference of the 2 items. The format of the object is chosen by the view parameter. + +**Supported data types** + +int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! diff --git a/deepdiff/docstrings/dsearch.rst b/deepdiff/docstrings/dsearch.rst new file mode 100644 index 00000000..4466c7a6 --- /dev/null +++ b/deepdiff/docstrings/dsearch.rst @@ -0,0 +1,19 @@ +:doc:`/index` + +.. _deepsearch_label: + +DeepSearch +========== + +.. toctree:: + :maxdepth: 3 + +.. automodule:: deepdiff.search + +.. autoclass:: grep + :members: + +.. autoclass:: DeepSearch + :members: + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/exclude_paths.rst b/deepdiff/docstrings/exclude_paths.rst new file mode 100644 index 00000000..0c9b78a6 --- /dev/null +++ b/deepdiff/docstrings/exclude_paths.rst @@ -0,0 +1,130 @@ +:doc:`/index` + +.. _exclude_paths_label: + +Exclude Paths +============= + +Exclude part of your object tree from comparison. +use exclude_paths and pass a set or list of paths to exclude, if only one item is being passed, then just put it there as a string. No need to pass it as a list then. + +Example + >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} + >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} + >>> print (DeepDiff(t1, t2, exclude_paths="root['ingredients']")) # one item pass it as a string + {} + >>> print (DeepDiff(t1, t2, exclude_paths=["root['ingredients']", "root['ingredients2']"])) # multiple items pass as a list or a set. + {} + +Also for root keys you don't have to pass as "root['key']". You can instead just pass the key: + +Example + >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} + >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} + >>> print (DeepDiff(t1, t2, exclude_paths="ingredients")) # one item pass it as a string + {} + >>> print (DeepDiff(t1, t2, exclude_paths=["ingredients", "ingredients2"])) # multiple items pass as a list or a set. + {} + + +.. _include_paths_label: + +Include Paths +============= + +Only include this part of your object tree in the comparison. +Use include_paths and pass a set or list of paths to limit diffing to only those paths. If only one item is being passed, just put it there as a string—no need to pass it as a list then. + +Example + >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} + >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} + >>> print (DeepDiff(t1, t2, include_paths="root['for life']")) # one item pass it as a string + {} + >>> print (DeepDiff(t1, t2, include_paths=["for life", "ingredients2"])) # multiple items pass as a list or a set and you don't need to pass the full path when dealing with root keys. So instead of "root['for life']" you can pass "for life" + {} + + +When passing include_paths, all the children of that path will be included too. + +Example + >>> t1 = { + ... "foo": {"bar": "potato"}, + ... "ingredients": ["no meat", "no eggs", "no dairy"] + ... } + >>> t2 = { + ... "foo": {"bar": "banana"}, + ... "ingredients": ["bread", "cheese"] + ... } + >>> DeepDiff(t1, t2, include_paths="foo") + {'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}} + + +.. _wildcard_paths_label: + +Wildcard (Glob) Paths +--------------------- + +Both ``exclude_paths`` and ``include_paths`` support wildcard patterns for matching multiple paths at once: + +- ``[*]`` or ``.*`` matches exactly **one** path segment (any key, index, or attribute). +- ``[**]`` or ``.**`` matches **zero or more** path segments at any depth. + +Wildcard patterns must use the full ``root`` prefix (shorthand keys are not supported for wildcards). + +Exclude all ``password`` fields regardless of the parent key: + >>> t1 = {"users": {"alice": {"name": "Alice", "password": "s1"}, "bob": {"name": "Bob", "password": "s2"}}} + >>> t2 = {"users": {"alice": {"name": "Alice", "password": "x1"}, "bob": {"name": "Bob", "password": "x2"}}} + >>> DeepDiff(t1, t2, exclude_paths=["root['users'][*]['password']"]) + {} + +Include only ``name`` fields at any depth: + >>> t1 = {"a": {"name": "A", "secret": 1}, "b": {"name": "B", "secret": 2}} + >>> t2 = {"a": {"name": "X", "secret": 1}, "b": {"name": "Y", "secret": 2}} + >>> result = DeepDiff(t1, t2, include_paths=["root[*]['name']"]) + >>> set(result.get('values_changed', {}).keys()) == {"root['a']['name']", "root['b']['name']"} + True + +Use ``[**]`` to match at any depth: + >>> t1 = {"config": {"db": {"password": "old"}, "cache": {"password": "old"}}} + >>> t2 = {"config": {"db": {"password": "new"}, "cache": {"password": "new"}}} + >>> DeepDiff(t1, t2, exclude_paths=["root[**]['password']"]) + {} + +Literal keys named ``*`` or ``**`` are not treated as wildcards when quoted: + >>> t1 = {"*": 1, "a": 2} + >>> t2 = {"*": 10, "a": 20} + >>> result = DeepDiff(t1, t2, exclude_paths=["root['*']"]) + >>> "root['a']" in result.get('values_changed', {}) + True + +When both ``exclude_paths`` and ``include_paths`` apply to the same path, exclusion takes precedence. + +Wildcards also work with ``DeepHash`` and ``DeepSearch`` exclude_paths. + + +.. _exclude_regex_paths_label: + +Exclude Regex Paths +------------------- + +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. + >>> import re + >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + >>> print(DeepDiff(t1, t2, exclude_regex_paths=r"root\[\d+\]\['b'\]")) + {} + >>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") + >>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) + {} + +example 2: + >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} + >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} + >>> DeepDiff(t1, t2, exclude_regex_paths="\['foo.'\]") # since it is one item in exclude_regex_paths, you don't have to put it in a list or a set. + {} + +Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. + + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/extract.rst b/deepdiff/docstrings/extract.rst new file mode 100644 index 00000000..1c4ed730 --- /dev/null +++ b/deepdiff/docstrings/extract.rst @@ -0,0 +1,13 @@ +:doc:`/index` + +.. _extract_label: + + +Extract +======= + +.. automodule:: deepdiff.path + +.. autofunction:: extract + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/faq.rst b/deepdiff/docstrings/faq.rst new file mode 100644 index 00000000..e6f30044 --- /dev/null +++ b/deepdiff/docstrings/faq.rst @@ -0,0 +1,185 @@ +:doc:`/index` + +F.A.Q +===== + +.. |qluster_link| raw:: html + + Qluster + +DeepDiff is now part of |qluster_link|. + +*If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* + +.. Note:: + |:mega:| **Please fill out our** `fast 5-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| + + +Q: DeepDiff report is not precise when ignore_order=True +-------------------------------------------------------- + + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [ + ... { + ... "key": "some/pathto/customers/foo/", + ... "flags": 0, + ... "value": "" + ... }, + ... { + ... "key": "some/pathto/customers/foo/account_number", + ... "flags": 0, + ... "value": "somevalue1" + ... } + ... ] + >>> + >>> t2 = [ + ... { + ... "key": "some/pathto/customers/foo/account_number", + ... "flags": 0, + ... "value": "somevalue2" + ... }, + ... { + ... "key": "some/pathto/customers/foo/", + ... "flags": 0, + ... "value": "new" + ... } + ... ] + >>> + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {"root[0]['key']": {'new_value': 'some/pathto/customers/foo/account_number', + 'old_value': 'some/pathto/customers/foo/'}, + "root[0]['value']": {'new_value': 'somevalue2', + 'old_value': ''}, + "root[1]['key']": {'new_value': 'some/pathto/customers/foo/', + 'old_value': 'some/pathto/customers/foo/account_number'}, + "root[1]['value']": {'new_value': 'new', + 'old_value': 'somevalue1'}}} + +**Answer** + +This is explained in :ref:`cutoff_distance_for_pairs_label` and :ref:`cutoff_intersection_for_pairs_label` + +Bump up these 2 parameters to 1 and you get what you want: + + >>> pprint(DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=1, cutoff_intersection_for_pairs=1)) + {'values_changed': {"root[0]['value']": {'new_value': 'new', 'old_value': ''}, + "root[1]['value']": {'new_value': 'somevalue2', + 'old_value': 'somevalue1'}}} + + +Q: The report of changes in a nested dictionary is too granular +--------------------------------------------------------------- + +**Answer** + +Use :ref:`threshold_to_diff_deeper_label` + + >>> from deepdiff import DeepDiff + >>> t1 = {"veggie": "carrots"} + >>> t2 = {"meat": "carrots"} + >>> + >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0) + {'dictionary_item_added': ["root['meat']"], 'dictionary_item_removed': ["root['veggie']"]} + >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0.33) + {'values_changed': {'root': {'new_value': {'meat': 'carrots'}, 'old_value': {'veggie': 'carrots'}}}} + + + +Q: TypeError: Object of type type is not JSON serializable +---------------------------------------------------------- + +I'm trying to serialize the DeepDiff results into json and I'm getting the TypeError. + + >>> diff=DeepDiff(1, "a") + >>> diff + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 1, 'new_value': 'a'}}} + >>> json.dumps(diff) + Traceback (most recent call last): + File "", line 1, in + File ".../json/__init__.py", line 231, in dumps + return _default_encoder.encode(obj) + File ".../json/encoder.py", line 199, in encode + chunks = self.iterencode(o, _one_shot=True) + File ".../json/encoder.py", line 257, in iterencode + return _iterencode(o, 0) + File ".../json/encoder.py", line 179, in default + raise TypeError(f'Object of type {o.__class__.__name__} ' + TypeError: Object of type type is not JSON serializable + +**Answer** + +In order to serialize DeepDiff results into json, use to_json() + + >>> diff.to_json() + '{"type_changes": {"root": {"old_type": "int", "new_type": "str", "old_value": 1, "new_value": "a"}}}' + + +Q: How do I parse DeepDiff result paths? +---------------------------------------- + +**Answer** + +Use parse_path: + + >>> from deepdiff import parse_path + >>> parse_path("root[1][2]['age']") + [1, 2, 'age'] + >>> parse_path("root[1][2]['age']", include_actions=True) + [{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}] + >>> + >>> parse_path("root['joe'].age") + ['joe', 'age'] + >>> parse_path("root['joe'].age", include_actions=True) + [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}] + +Or use the tree view so you can use path(output_format='list'): + + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff + {'iterable_item_removed': [, ]} + >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. + >>> # One way to get one item from the set is to convert it to a list + >>> # And then get the first item of the list: + >>> removed = list(ddiff['iterable_item_removed'])[0] + >>> removed + + >>> + >>> parent = removed.up + >>> parent + + >>> parent.path() # gives you the string representation of the path + "root[4]['b']" + >>> parent.path(output_format='list') # gives you the list of keys and attributes that make up the path + [4, 'b'] + + +Q: Why my datetimes are reported in UTC? +---------------------------------------- + +**Answer** + +DeepDiff converts all datetimes into UTC. If a datetime is timezone naive, we assume it is in UTC too. +That is different than what Python does. Python assumes your timezone naive datetime is in your local timezone. However, you can override it to any other time zone such as your :ref:`default_timezone_label`. + + >>> from deepdiff import DeepDiff + >>> from datetime import datetime, timezone + >>> d1 = datetime(2020, 8, 31, 13, 14, 1) + >>> d2 = datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc) + >>> d1 == d2 + False + >>> DeepDiff(d1, d2) + {} + + >>> d3 = d2.astimezone(pytz.timezone('America/New_York')) + >>> DeepDiff(d1, d3) + {} + >>> d1 == d3 + False + + +--------- + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/ignore_order.rst b/deepdiff/docstrings/ignore_order.rst new file mode 100644 index 00000000..5ca84318 --- /dev/null +++ b/deepdiff/docstrings/ignore_order.rst @@ -0,0 +1,318 @@ +:doc:`/index` + +.. _ignore_order_label: + +Ignore Order +============ + +DeepDiff by default compares objects in the order that it iterates through them in iterables. +In other words if you have 2 lists, then the first item of the lists are compared to each other, then the 2nd items and so on. +That makes DeepDiff be able to run in linear time. + +However, there are often times when you don't care about the order in which the items have appeared. +In such cases DeepDiff needs to do way more work in order to find the differences. + +There are a couple of parameters provided to you to have full control over. + + +List difference with ignore_order=False which is the default: + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'iterable_item_added': {"root[4]['b'][3]": 3}, + 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, + "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} + +Ignore Order +------------ + +List difference ignoring order or duplicates: (with the same dictionaries as above) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2, ignore_order=True) + >>> print (ddiff) + {} + +.. _ignore_order_func_label: + +Dynamic Ignore Order +-------------------- + +Sometimes single *ignore_order* parameter is not enough to do a diff job, +you can use *ignore_order_func* to determine whether the order of certain paths should be ignored + +List difference ignoring order with *ignore_order_func* + >>> t1 = {"set": [1,2,3], "list": [1,2,3]} + >>> t2 = {"set": [3,2,1], "list": [3,2,1]} + >>> ddiff = DeepDiff(t1, t2, ignore_order_func=lambda level: "set" in level.path()) + >>> print (ddiff) + { 'values_changed': { "root['list'][0]": {'new_value': 3, 'old_value': 1}, + "root['list'][2]": {'new_value': 1, 'old_value': 3}}} + + +Ignoring order when certain word in the path + >>> from deepdiff import DeepDiff + >>> t1 = {'a': [1, 2], 'b': [3, 4]} + >>> t2 = {'a': [2, 1], 'b': [4, 3]} + >>> DeepDiff(t1, t2, ignore_order=True) + {} + >>> def ignore_order_func(level): + ... return 'a' in level.path() + ... + >>> DeepDiff(t1, t2, ignore_order=True, ignore_order_func=ignore_order_func) + {'values_changed': {"root['b'][0]": {'new_value': 4, 'old_value': 3}, "root['b'][1]": {'new_value': 3, 'old_value': 4}}} + + +.. _report_repetition_label: + +Reporting Repetitions +--------------------- + +List difference ignoring order and reporting repetitions: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 3, 1, 4] + >>> t2 = [4, 4, 1] + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) + >>> pprint(ddiff, indent=2) + { 'iterable_item_removed': {'root[1]': 3}, + 'repetition_change': { 'root[0]': { 'new_indexes': [2], + 'new_repeat': 1, + 'old_indexes': [0, 2], + 'old_repeat': 2, + 'value': 1}, + 'root[3]': { 'new_indexes': [0, 1], + 'new_repeat': 2, + 'old_indexes': [3], + 'old_repeat': 1, + 'value': 4}}} + +.. _max_passes_label: + +Max Passes +---------- + +max_passes: Integer, default = 10000000 + Maximum number of passes to run on objects to pin point what exactly is different. This is only used when ignore_order=True + +If you have deeply nested objects, DeepDiff needs to run multiple passes in order to pin point the difference. +That can dramatically increase the time spent to find the difference. +You can control the maximum number of passes that can be run via the max_passes parameter. + +.. note:: + The definition of pass is whenever 2 iterable objects are being compared with each other and deepdiff decides to compare every single element of one iterable with every single element of the other iterable. + Refer to :ref:`cutoff_distance_for_pairs_label` and :ref:`cutoff_intersection_for_pairs_label` for more info on how DeepDiff decides to start a new pass. + +Max Passes Example + >>> from pprint import pprint + >>> from deepdiff import DeepDiff + >>> + >>> t1 = [ + ... { + ... 'key3': [[[[[1, 2, 4, 5]]]]], + ... 'key4': [7, 8], + ... }, + ... { + ... 'key5': 'val5', + ... 'key6': 'val6', + ... }, + ... ] + >>> + >>> t2 = [ + ... { + ... 'key5': 'CHANGE', + ... 'key6': 'val6', + ... }, + ... { + ... 'key3': [[[[[1, 3, 5, 4]]]]], + ... 'key4': [7, 8], + ... }, + ... ] + >>> + >>> for max_passes in (1, 2, 62, 65): + ... diff = DeepDiff(t1, t2, ignore_order=True, max_passes=max_passes, verbose_level=2) + ... print('-\n----- Max Passes = {} -----'.format(max_passes)) + ... pprint(diff) + ... + DeepDiff has reached the max number of passes of 1. You can possibly get more accurate results by increasing the max_passes parameter. + - + ----- Max Passes = 1 ----- + {'values_changed': {'root[0]': {'new_value': {'key5': 'CHANGE', 'key6': 'val6'}, + 'old_value': {'key3': [[[[[1, 2, 4, 5]]]]], + 'key4': [7, 8]}}, + 'root[1]': {'new_value': {'key3': [[[[[1, 3, 5, 4]]]]], + 'key4': [7, 8]}, + 'old_value': {'key5': 'val5', 'key6': 'val6'}}}} + DeepDiff has reached the max number of passes of 2. You can possibly get more accurate results by increasing the max_passes parameter. + - + ----- Max Passes = 2 ----- + {'values_changed': {"root[0]['key3'][0]": {'new_value': [[[[1, 3, 5, 4]]]], + 'old_value': [[[[1, 2, 4, 5]]]]}, + "root[1]['key5']": {'new_value': 'CHANGE', + 'old_value': 'val5'}}} + DeepDiff has reached the max number of passes of 62. You can possibly get more accurate results by increasing the max_passes parameter. + - + ----- Max Passes = 62 ----- + {'values_changed': {"root[0]['key3'][0][0][0][0]": {'new_value': [1, 3, 5, 4], + 'old_value': [1, 2, 4, 5]}, + "root[1]['key5']": {'new_value': 'CHANGE', + 'old_value': 'val5'}}} + DeepDiff has reached the max number of passes of 65. You can possibly get more accurate results by increasing the max_passes parameter. + - + ----- Max Passes = 65 ----- + {'values_changed': {"root[0]['key3'][0][0][0][0][1]": {'new_value': 3, + 'old_value': 2}, + "root[1]['key5']": {'new_value': 'CHANGE', + 'old_value': 'val5'}}} + + +.. note:: + If there are potential passes left to be run and the max_passes value is reached, DeepDiff will issue a warning. + However the most accurate result might have already been found when there are still potential passes left to be run. + + For example in the above example at max_passes=64, DeepDiff finds the optimal result however it has one more pass + to go before it has run all the potential passes. Hence just for the sake of example we are using max_passes=65 + as an example of a number that doesn't issue warnings. + +.. note:: + Also take a look at :ref:`max_passes_label` + +.. _cutoff_distance_for_pairs_label: + +Cutoff Distance For Pairs +------------------------- + +cutoff_distance_for_pairs : 1 >= float >= 0, default=0.3 + What is the threshold to consider 2 items as potential pairs. + Note that it is only used when ignore_order = True. + +cutoff_distance_for_pairs in combination with :ref:`cutoff_intersection_for_pairs_label` are the parameters that decide whether 2 objects to be paired with each other during ignore_order=True algorithm or not. Note that these parameters are mainly used for nested iterables. + +For example by going from the default of cutoff_distance_for_pairs=0.3 to 0.1, we have essentially disallowed the 1.0 and 20.0 to be paired with each other. As you can see, DeepDiff has decided that the :ref:`deep_distance_label` of 1.0 and 20.0 to be around 0.27. Since that is way above cutoff_distance_for_pairs of 0.1, the 2 items are not paired. As a result the lists containing the 2 numbers are directly compared with each other: + + >>> from deepdiff import DeepDiff + >>> t1 = [[1.0]] + >>> t2 = [[20.0]] + >>> DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=0.3) + {'values_changed': {'root[0][0]': {'new_value': 20.0, 'old_value': 1.0}}} + >>> DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=0.1) + {'values_changed': {'root[0]': {'new_value': [20.0], 'old_value': [1.0]}}} + >>> DeepDiff(1.0, 20.0, get_deep_distance=True) + {'values_changed': {'root': {'new_value': 20.0, 'old_value': 1.0}}, 'deep_distance': 0.2714285714285714} + + +.. _cutoff_intersection_for_pairs_label: + +Cutoff Intersection For Pairs +----------------------------- + +cutoff_intersection_for_pairs : 1 >= float >= 0, default=0.7 + What is the threshold to calculate pairs of items between 2 iterables. + For example 2 iterables that have nothing in common, do not need their pairs to be calculated. + Note that it is only used when ignore_order = True. + +Behind the scene DeepDiff takes the :ref:`deep_distance_label` of objects when running ignore_order=True. +The distance is between zero and 1. +A distance of zero means the items are equal. A distance of 1 means they are 100% different. +When comparing iterables, the cutoff_intersection_for_pairs is used to decide whether to compare every single item in each iterable +with every single item in the other iterable or not. If the distance between the 2 iterables is equal or bigger than the +cutoff_intersection_for_pairs, then the 2 iterables items are only compared as added or removed items and NOT modified items. +However, if the distance between 2 iterables is below the cutoff, every single item from each iterable will be compared to every +single item from the other iterable to find the closest "pair" of each item. + +.. note:: + The process of comparing every item to the other is very expensive so :ref:`cutoff_intersection_for_pairs_label` in combination with :ref:`cutoff_distance_for_pairs_label` is used to give acceptable results with much higher speed. + +With a low cutoff_intersection_for_pairs, the 2 iterables above will be considered too +far off from each other to get the individual pairs of items. +So numbers that are not only related to each other via their positions in the lists +and not their values are paired together in the results. + + >>> t1 = [1.0, 2.0, 3.0, 4.0, 5.0] + >>> t2 = [5.0, 3.01, 1.2, 2.01, 4.0] + >>> + >>> DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=0.1) + {'values_changed': {'root[1]': {'new_value': 3.01, 'old_value': 2.0}, 'root[2]': {'new_value': 1.2, 'old_value': 3.0}}, 'iterable_item_added': {'root[3]': 2.01}, 'iterable_item_removed': {'root[0]': 1.0}} + +With the cutoff_intersection_for_pairs of 0.7 (which is the default value), +the 2 iterables will be considered close enough to get pairs of items between the 2. +So 2.0 and 2.01 are paired together for example. + + >>> t1 = [1.0, 2.0, 3.0, 4.0, 5.0] + >>> t2 = [5.0, 3.01, 1.2, 2.01, 4.0] + >>> + >>> DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=0.7) + {'values_changed': {'root[2]': {'new_value': 3.01, 'old_value': 3.0}, 'root[0]': {'new_value': 1.2, 'old_value': 1.0}, 'root[1]': {'new_value': 2.01, 'old_value': 2.0}}} + + +As an example of how much this parameter can affect the results in deeply nested objects, please take a look at :ref:`distance_and_diff_granularity_label`. + + +.. _iterable_compare_func_label2: + +Iterable Compare Func +--------------------- + +New in DeepDiff 5.5.0 + +There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. + + +For example take the following objects: + + >>> from deepdiff import DeepDiff + >>> from deepdiff.helper import CannotCompare + >>> + >>> t1 = [ + ... { + ... 'id': 1, + ... 'value': [1] + ... }, + ... { + ... 'id': 2, + ... 'value': [7, 8, 1] + ... }, + ... { + ... 'id': 3, + ... 'value': [7, 8], + ... }, + ... ] + >>> + >>> t2 = [ + ... { + ... 'id': 2, + ... 'value': [7, 8] + ... }, + ... { + ... 'id': 3, + ... 'value': [7, 8, 1], + ... }, + ... { + ... 'id': 1, + ... 'value': [1] + ... }, + ... ] + >>> + >>> DeepDiff(t1, t2, ignore_order=True) + {'values_changed': {"root[2]['id']": {'new_value': 2, 'old_value': 3}, "root[1]['id']": {'new_value': 3, 'old_value': 2}}} + + +Now let's define a compare_func that takes 3 parameters: x, y and level. + + >>> def compare_func(x, y, level=None): + ... try: + ... return x['id'] == y['id'] + ... except Exception: + ... raise CannotCompare() from None + ... + >>> DeepDiff(t1, t2, ignore_order=True, iterable_compare_func=compare_func) + {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}} + +As you can see the results are different. Now items with the same ids are compared with each other. + +.. note:: + + The level parameter of the iterable_compare_func is only used when ignore_order=False. + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/ignore_types_or_values.rst b/deepdiff/docstrings/ignore_types_or_values.rst new file mode 100644 index 00000000..da0d140f --- /dev/null +++ b/deepdiff/docstrings/ignore_types_or_values.rst @@ -0,0 +1,442 @@ +:doc:`/index` + +Ignore Types Or Values +====================== + +DeepDiff provides numerous functionalities for the user to be able to define what paths, item types etc. to be included or ignored during the diffing process. + +As an example, you may have a type change in your objects: + +Type change + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'type_changes': { "root[4]['b']": { 'new_type': , + 'new_value': 'world\n\n\nEnd', + 'old_type': , + 'old_value': [1, 2, 3]}}} + +And if you don't care about the value of items that have changed type, you can set verbose level to 0 + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'old_type': }}} + + +But what if you did not care about the integer becoming a string with the same value? What if you didn't want 2 -> "2" to be considered a type or value change? Throughout this page you will find different examples of functionalities that can help you achieve what you want. + + +.. _exclude_types_label: + +Exclude Types +------------- + +exclude_types: list, default = None + List of object types to exclude from the report. + +Exclude certain types from comparison: + >>> l1 = logging.getLogger("test") + >>> l2 = logging.getLogger("test2") + >>> t1 = {"log": l1, 2: 1337} + >>> t2 = {"log": l2, 2: 1337} + >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) + {} + +.. _ignore_string_type_changes_label: + +Ignore String Type Changes +-------------------------- + +ignore_string_type_changes: Boolean, default = False + Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. + + >>> DeepDiff(b'hello', 'hello', ignore_string_type_changes=True) + {} + >>> DeepDiff(b'hello', 'hello') + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': b'hello', 'new_value': 'hello'}}} + +.. _ignore_numeric_type_changes_label: + +Ignore Numeric Type Changes +--------------------------- + +ignore_numeric_type_changes: Boolean, default = False + Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. + +Example with Decimal + >>> from decimal import Decimal + >>> from deepdiff import DeepDiff + >>> + >>> t1 = Decimal('10.01') + >>> t2 = 10.01 + >>> + >>> DeepDiff(t1, t2) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': Decimal('10.01'), 'new_value': 10.01}}} + >>> DeepDiff(t1, t2, ignore_numeric_type_changes=True) + {} + +Note that this parameter only works for comparing numbers with numbers. If you compare a number to a string value of the number, this parameter does not solve your problem: + +Example with Fraction + >>> from fractions import Fraction + >>> from deepdiff import DeepDiff + >>> + >>> t1 = Fraction(1, 2) + >>> t2 = 0.5 + >>> + >>> DeepDiff(t1, t2) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': Fraction(1, 2), 'new_value': 0.5}}} + >>> DeepDiff(t1, t2, ignore_numeric_type_changes=True) + {} + +Example: + >>> t1 = Decimal('10.01') + >>> t2 = "10.01" + >>> + >>> DeepDiff(t1, t2, ignore_numeric_type_changes=True) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': Decimal('10.01'), 'new_value': '10.01'}}} + +If you face repeated patterns of comparing numbers to string values of numbers, you will want to preprocess your input to convert the strings into numbers before feeding it into DeepDiff. + + +.. _ignore_type_in_groups_label: + +Ignore Type In Groups +--------------------- + +ignore_type_in_groups: Tuple or List of Tuples, default = None + Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. While this feature is production ready for strings and numbers, it is still experimental with other custom lists of types, Hence it is recommended to use the shortcuts provided to you which are :ref:`ignore_string_type_changes_label` and :ref:`ignore_numeric_type_changes_label` unless you have a specific need beyond those 2 cases and you need do define your own ignore_type_in_groups. + + For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + + 1. Set ignore_string_type_changes=True. + 2. Or set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + + Now what if you want also typeA and typeB to be ignored when comparing against each other? + + 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] + 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + + +Note: The example below shows you have to use this feature. For enum types, however, you can just use :ref:`use_enum_value_label` + +Example: Ignore Enum to string comparison + >>> from deepdiff import DeepDiff + >>> from enum import Enum + >>> class MyEnum1(Enum): + ... book = "book" + ... cake = "cake" + ... + >>> DeepDiff("book", MyEnum1.book) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 'book', 'new_value': }}} + >>> DeepDiff("book", MyEnum1.book, ignore_type_in_groups=[(Enum, str)]) + {} + + +Example: Ignore Type Number - Dictionary that contains float and integer. Note that this is exactly the same as passing ignore_numeric_type_changes=True. + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[1]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +Example: Ignore Type Number - List that contains float and integer. Note that this is exactly the same as passing ignore_numeric_type_changes=True. + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 2, 3] + >>> t2 = [1.0, 2.0, 3.0] + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[0]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> DeepDiff(burritos, tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + {} + +.. note:: + You can pass list of tuples of types to ignore_type_in_groups or you can put actual values in the tuples and ignore_type_in_groups will extract the type from them. The example below has used (1, 1.0) instead of (int, float), + +Ignoring string to None comparison: + >>> from deepdiff import DeepDiff + >>> import datetime + >>> + >>> t1 = [1, 2, 3, 'a', None] + >>> t2 = [1.0, 2.0, 3.3, b'a', 'hello'] + >>> DeepDiff(t1, t2, ignore_type_in_groups=[(1, 1.0), (None, str, bytes)]) + {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} + >>> + +Ignoring datetime to string comparison + >>> now = datetime.datetime(2020, 5, 5) + >>> t1 = [1, 2, 3, 'a', now] + >>> t2 = [1, 2, 3, 'a', 'now'] + >>> DeepDiff(t1, t2, ignore_type_in_groups=[(str, bytes, datetime.datetime)]) + {'values_changed': {'root[4]': {'new_value': 'now', 'old_value': datetime.datetime(2020, 5, 5, 0, 0)}}} + + +.. _ignore_type_subclasses_label: + +Ignore Type Subclasses +---------------------- + +ignore_type_subclasses: Boolean, default = False + Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. + +.. Note:: + ignore_type_subclasses was incorrectly doing the reverse of its job up until DeepDiff 6.7.1 + Please make sure to flip it in your use cases, when upgrading from older versions to 7.0.0 or above. + + >>> from deepdiff import DeepDiff + >>> class ClassA: + ... def __init__(self, x, y): + ... self.x = x + ... self.y = y + ... + >>> class ClassB: + ... def __init__(self, x): + ... self.x = x + ... + >>> class ClassC(ClassB): + ... pass + ... + >>> obj_a = ClassA(1, 2) + >>> obj_c = ClassC(3) + >>> + >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=True) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': <__main__.ClassA object at 0x10076a2e8>, 'new_value': <__main__.ClassC object at 0x10082f630>}}} + >>> + >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=False) + {'values_changed': {'root.x': {'new_value': 3, 'old_value': 1}}, 'attribute_removed': [root.y]} + + +.. _ignore_uuid_types_label: + +Ignore UUID Types +------------------ + +ignore_uuid_types: Boolean, default = False + Whether to ignore UUID vs string type differences when comparing. When set to True, comparing a UUID object with its string representation will not report as a type change. + +Without ignore_uuid_types: + >>> import uuid + >>> from deepdiff import DeepDiff + >>> test_uuid = uuid.UUID('12345678-1234-5678-1234-567812345678') + >>> uuid_str = '12345678-1234-5678-1234-567812345678' + >>> DeepDiff(test_uuid, uuid_str) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': UUID('12345678-1234-5678-1234-567812345678'), 'new_value': '12345678-1234-5678-1234-567812345678'}}} + +With ignore_uuid_types=True: + >>> DeepDiff(test_uuid, uuid_str, ignore_uuid_types=True) + {} + +This works in both directions: + >>> DeepDiff(uuid_str, test_uuid, ignore_uuid_types=True) + {} + +The parameter works with nested structures like dictionaries and lists: + >>> dict1 = {'id': test_uuid, 'name': 'test'} + >>> dict2 = {'id': uuid_str, 'name': 'test'} + >>> DeepDiff(dict1, dict2, ignore_uuid_types=True) + {} + +Note that if the UUID and string represent different values, it will still report as a value change: + >>> different_uuid = uuid.UUID('87654321-4321-8765-4321-876543218765') + >>> DeepDiff(different_uuid, uuid_str, ignore_uuid_types=True) + {'values_changed': {'root': {'old_value': UUID('87654321-4321-8765-4321-876543218765'), 'new_value': '12345678-1234-5678-1234-567812345678'}}} + +This parameter can be combined with other ignore flags: + >>> data1 = {'id': test_uuid, 'name': 'TEST', 'count': 42} + >>> data2 = {'id': uuid_str, 'name': 'test', 'count': 42.0} + >>> DeepDiff(data1, data2, ignore_uuid_types=True, ignore_string_case=True, ignore_numeric_type_changes=True) + {} + + +.. _ignore_string_case_label: + +Ignore String Case +------------------ + +ignore_string_case: Boolean, default = False + Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. + + >>> DeepDiff(t1='Hello', t2='heLLO') + {'values_changed': {'root': {'new_value': 'heLLO', 'old_value': 'Hello'}}} + >>> DeepDiff(t1='Hello', t2='heLLO', ignore_string_case=True) + {} + +Ignore Nan Inequality +--------------------- + +ignore_nan_inequality: Boolean, default = False + Read more at :ref:`ignore_nan_inequality_label` + Whether to ignore float('nan') inequality in Python. + + +.. _ignore_private_variables_label: + +Ignore Private Variables +------------------------ + +ignore_private_variables: Boolean, default = True + Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). + + +.. _exclude_obj_callback_label: + +Exclude Obj Callback +-------------------- + +exclude_obj_callback: function, default = None + A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. + This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. + + >>> def exclude_obj_callback(obj, path): + ... return True if "skip" in path or isinstance(obj, int) else False + ... + >>> t1 = {"x": 10, "y": "b", "z": "c", "skip_1": 0} + >>> t2 = {"x": 12, "y": "b", "z": "c", "skip_2": 0} + >>> DeepDiff(t1, t2, exclude_obj_callback=exclude_obj_callback) + {} + + +.. _exclude_obj_callback_strict_label: + +Exclude Obj Callback Strict +--------------------------- + +exclude_obj_callback_strict: function, default = None + A function that works the same way as exclude_obj_callback, but excludes elements from the result only if the function returns True for both elements + + >>> def exclude_obj_callback_strict(obj, path): + ... return True if isinstance(obj, int) and obj > 10 else False + ... + >>> t1 = {"x": 10, "y": "b", "z": "c"} + >>> t2 = {"x": 12, "y": "b", "z": "c"} + >>> DeepDiff(t1, t2, exclude_obj_callback=exclude_obj_callback_strict) + {} + >>> DeepDiff(t1, t2, exclude_obj_callback_strict=exclude_obj_callback_strict) + {'values_changed': {"root['x']": {'new_value': 12, 'old_value': 10}}} + + +.. _include_obj_callback_label: + +Include Obj Callback +-------------------- + +include_obj_callback: function, default = None + A function that takes the object and its path and returns a Boolean. If True is returned, the object is included in the results, otherwise it is excluded. + This is to give the user a higher level of control than one can achieve via include_paths. + + >>> def include_obj_callback(obj, path): + ... return True if "include" in path or isinstance(obj, int) else False + ... + >>> t1 = {"x": 10, "y": "b", "z": "c", "include_me": "a"} + >>> t2 = {"x": 10, "y": "b", "z": "c", "include_me": "b"} + >>> DeepDiff(t1, t2, include_obj_callback=include_obj_callback) + {'values_changed': {"root['include_me']": {'new_value': "b", 'old_value': "a"}}} + + +.. _include_obj_callback_strict_label: + +Include Obj Callback Strict +--------------------------- + +include_obj_callback_strict: function, default = None + A function that works the same way as include_obj_callback, but includes elements in the result only if the function returns True for both elements. + + >>> def include_obj_callback_strict(obj, path): + ... return True if isinstance(obj, int) and obj > 10 else False + ... + >>> t1 = {"x": 10, "y": "b", "z": "c"} + >>> t2 = {"x": 12, "y": "b", "z": "c"} + >>> DeepDiff(t1, t2, include_obj_callback=include_obj_callback_strict) + {'values_changed': {"root['x']": {'new_value': 12, 'old_value': 10}}} + >>> DeepDiff(t1, t2, include_obj_callback_strict=include_obj_callback_strict) + {} + + +.. _truncate_datetime_label: + +Truncate Datetime +----------------- + +truncate_datetime: string, default = None + truncate_datetime can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it + + >>> import datetime + >>> from deepdiff import DeepDiff + >>> d1 = {'a': datetime.datetime(2020, 5, 17, 22, 15, 34, 913070)} + >>> d2 = {'a': datetime.datetime(2020, 5, 17, 22, 15, 39, 296583)} + >>> DeepDiff(d1, d2, truncate_datetime='minute') + {} + + +.. _use_enum_value_label: + +Use Enum Value +-------------- + +use_enum_value: Boolean, default=False + Makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. + + >>> from enum import Enum + >>> from deepdiff import DeepDiff + + >>> + >>> class MyEnum2(str, Enum): + ... book = "book" + ... cake = "cake" + ... + >>> DeepDiff("book", MyEnum2.book) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 'book', 'new_value': }}} + >>> DeepDiff("book", MyEnum2.book, use_enum_value=True) + {} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/index.rst b/deepdiff/docstrings/index.rst new file mode 100644 index 00000000..f23408fe --- /dev/null +++ b/deepdiff/docstrings/index.rst @@ -0,0 +1,153 @@ +.. DeepDiff documentation master file, created by + sphinx-quickstart on Mon Jul 20 06:06:44 2015. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +DeepDiff 9.0.0 documentation! +============================= + +******* +Modules +******* + +The DeepDiff library includes the following modules: + +- **DeepDiff** For Deep Difference of 2 objects. :doc:`/diff` + + It returns the deep difference of python objects. It can also be used to take the distance between objects. :doc:`/deep_distance` + +- **DeepSearch** Search for objects within other objects. :doc:`/dsearch` + +- **DeepHash** Hash any object based on their content even if they are not "hashable" in Python's eyes. :doc:`/deephash` + +- **Delta** Delta of objects that can be applied to other objects. Imagine git commits but for structured data. :doc:`/delta` + +- **Extract** For extracting a path from an object :doc:`/extract` + +- **Commandline** Most of the above functionality is also available via the commandline module :doc:`/commandline` + +*********** +What Is New +*********** + +DeepDiff 9-0-0 +-------------- + + - migration note: + - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. + - Dropping support for Python 3.9 + - Support for python 3.14 + - Added support for callable ``group_by`` thanks to `echan5 `__ + - Added ``FlatDeltaDict`` TypedDict for ``to_flat_dicts`` return type + - Fixed colored view display when all list items are removed thanks to `yannrouillard `__ + - Fixed ``hasattr()`` swallowing ``AttributeError`` in ``__slots__`` handling for objects with ``__getattr__`` thanks to `tpvasconcelos `__ + - Fixed ``ignore_order=True`` missing int-vs-float type changes + - Always use t1 path for reporting thanks to `devin13cox `__ + - Fixed ``_convert_oversized_ints`` failing on NamedTuples + - Fixed orjson ``TypeError`` for integers exceeding 64-bit range + - Fixed parameter bug in ``to_flat_dicts`` where ``include_action_in_path`` and ``report_type_changes`` were not being passed through + - Fixed ``ignore_keys`` issue in ``detailed__dict__`` thanks to `vitalis89 `__ + - Fixed logarithmic similarity type hint thanks to `ljames8 `__ + - Added ``Fraction`` numeric support thanks to `akshat62 `__ + +********* +Tutorials +********* + +Tutorials can be found on `Zepworks blog `_ + + +************ +Installation +************ + +Install from PyPi:: + + pip install deepdiff + +If you want to use DeepDiff from commandline:: + + pip install "deepdiff[cli]" + +If you want to improve the performance of DeepDiff with certain processes such as json serialization:: + + pip install "deepdiff[optimize]" + +Read about DeepDiff optimizations at :ref:`optimizations_label` + +Importing +--------- + +.. code:: python + + >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects + >>> from deepdiff import grep, DeepSearch # For finding if item exists in an object + >>> from deepdiff import DeepHash # For hashing objects based on their contents + >>> from deepdiff import Delta # For creating delta of objects that can be applied later to other objects. + >>> from deepdiff import extract # For extracting a path from an object + + +.. note:: + if you want to use DeepDiff via commandline, make sure to run:: + + pip install "deepdiff[cli]" + +Then you can access the commands via: + +- DeepDiff + +.. code:: bash + + $ deep diff --help + +- Delta + +.. code:: bash + + $ deep patch --help + +- grep + +.. code:: bash + + $ deep grep --help + +- extract + +.. code:: bash + + $ deep extract --help + + +Supported data types +-------------------- + +int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! + + +References +========== + +.. toctree:: + :maxdepth: 4 + + diff + dsearch + deephash + delta + extract + colored_view + commandline + changelog + authors + faq + support + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/deepdiff/docstrings/numbers.rst b/deepdiff/docstrings/numbers.rst new file mode 100644 index 00000000..c14fe5ef --- /dev/null +++ b/deepdiff/docstrings/numbers.rst @@ -0,0 +1,202 @@ +:doc:`/index` + +Numbers +======= + +When dealing with numbers, DeepDiff provides the following functionalities: + +.. _significant_digits_label: + +Significant Digits +------------------ + +significant_digits : int >= 0, default=None + +significant_digits defines the number of digits AFTER the decimal point to be used in the comparison. However you can override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. + +.. note:: + Setting significant_digits will affect ANY number comparison. + +If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 55. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. + +.. note:: + significant_digits by default uses "{:.Xf}".format(Your Number) behind the scene to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. + + As a side note, please pay attention that adding digits to your floating point can result in small differences in the results. For example: + "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + + For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + +.. note:: + To override what significant digits mean and switch it to scientific notation, use number_format_notation="e" + Behind the scene that switches DeepDiff to use "{:.Xe}".format(Your Number) where X=significant_digits. + +**Examples:** + +Approximate decimals comparison (Significant digits after the point): + >>> t1 = Decimal('1.52') + >>> t2 = Decimal('1.57') + >>> DeepDiff(t1, t2, significant_digits=0) + {} + >>> DeepDiff(t1, t2, significant_digits=1) + {'values_changed': {'root': {'new_value': Decimal('1.57'), 'old_value': Decimal('1.52')}}} + +Approximate fractions comparison (Significant digits after the point): + >>> from fractions import Fraction + >>> t1 = Fraction(22, 7) # 3.142857... + >>> t2 = Fraction(355, 113) # 3.141592... + >>> DeepDiff(t1, t2, significant_digits=2) + {} + >>> DeepDiff(t1, t2, significant_digits=3) + {'values_changed': {'root': {'new_value': Fraction(355, 113), 'old_value': Fraction(22, 7)}}} + +Approximate float comparison (Significant digits after the point): + >>> t1 = [ 1.1129, 1.3359 ] + >>> t2 = [ 1.113, 1.3362 ] + >>> pprint(DeepDiff(t1, t2, significant_digits=3)) + {} + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, + 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} + >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) + {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} + + +.. _number_format_notation_label: + +Number Format Notation +---------------------- + +number_format_notation : string, default="f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + +**Examples:** + +Approximate number comparison (significant_digits after the decimal point in scientific notation) + >>> DeepDiff(1024, 1020, significant_digits=2, number_format_notation="f") # default is "f" + {'values_changed': {'root': {'new_value': 1020, 'old_value': 1024}}} + >>> DeepDiff(1024, 1020, significant_digits=2, number_format_notation="e") + {} + +.. _number_to_string_func_label: + +Number To String Function +------------------------- + +number_to_string_func : function, default=None + In many cases DeepDiff converts numbers to strings in order to compare them. For example when ignore_order=True, when significant digits parameter is defined or when the ignore_numeric_type_changes=True. + In its simplest form, the number_to_string_func is "{:.Xf}".format(Your Number) where X is the significant digits and the number_format_notation is left as the default of "f" meaning fixed point. + The number_to_string_func parameter gives the user the full control into overriding how numbers are converted to strings for comparison. The default function is defined in https://github.com/seperman/deepdiff/blob/master/deepdiff/helper.py and is called number_to_string. You can define your own custom function instead of the default one in the helper module. + +Defining your own number_to_string_func + Lets say you want the numbers comparison happen only for numbers above 100 for some reason. + + >>> from deepdiff import DeepDiff + >>> from deepdiff.helper import number_to_string + >>> def custom_number_to_string(number, *args, **kwargs): + ... number = 100 if number < 100 else number + ... return number_to_string(number, *args, **kwargs) + ... + >>> t1 = [10, 12, 100000] + >>> t2 = [50, 63, 100021] + >>> DeepDiff(t1, t2, significant_digits=3, number_format_notation="e") + {'values_changed': {'root[0]': {'new_value': 50, 'old_value': 10}, 'root[1]': {'new_value': 63, 'old_value': 12}}} + >>> + >>> DeepDiff(t1, t2, significant_digits=3, number_format_notation="e", + ... number_to_string_func=custom_number_to_string) + {} + + +Ignore Numeric Type Changes +--------------------------- + +ignore_numeric_type_changes: Boolean, default = False +read more at :ref:`ignore_numeric_type_changes_label` + +.. _ignore_nan_inequality_label: + +Ignore Nan Inequality +--------------------- + +ignore_nan_inequality: Boolean, default = False + Whether to ignore float('nan') inequality in Python. Note that this is a cPython "feature". Some versions of Pypy3 have nan==nan where in cPython nan!=nan + + >>> float('nan') == float('nan') + False + >>> DeepDiff(float('nan'), float('nan')) + {'values_changed': {'root': {'new_value': nan, 'old_value': nan}}} + >>> DeepDiff(float('nan'), float('nan'), ignore_nan_inequality=True) + {} + +.. _math_epsilon_label: + +Math Epsilon +------------ + +math_epsilon: Decimal, default = None + math_epsilon uses Python's built in Math.isclose. It defines a tolerance value which is passed to math.isclose(). Any numbers that are within the tolerance will not report as being different. Any numbers outside of that tolerance will show up as different. + + For example for some sensor data derived and computed values must lie in a certain range. It does not matter that they are off by e.g. 1e-5. + + To check against that the math core module provides the valuable isclose() function. It evaluates the being close of two numbers to each other, with reference to an epsilon (abs_tol). This is superior to the format function, as it evaluates the mathematical representation and not the string representation. + +Example with Decimal: + >>> from decimal import Decimal + >>> d1 = {"a": Decimal("7.175")} + >>> d2 = {"a": Decimal("7.174")} + >>> DeepDiff(d1, d2, math_epsilon=0.01) + {} + +Example with Fraction: + >>> from fractions import Fraction + >>> d1 = {"a": Fraction(7175, 1000)} + >>> d2 = {"a": Fraction(7174, 1000)} + >>> DeepDiff(d1, d2, math_epsilon=0.01) + {} + +.. note:: + math_epsilon cannot currently handle the hashing of values, which is done when :ref:`ignore_order_label` is True. + + +.. _use_log_scale_label: + +Use Log Scale +------------- + +use_log_scale: Boolean, default=False + use_log_scale along with :ref:`log_scale_similarity_threshold_label` can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. + + + >>> from deepdiff import DeepDiff + + >>> t1 = {'foo': 110, 'bar': 306} + >>> t2 = {'foo': 140, 'bar': 298} + >>> + >>> DeepDiff(t1, t2) + {'values_changed': {"root['foo']": {'new_value': 140, 'old_value': 110}, "root['bar']": {'new_value': 298, 'old_value': 306}}} + + >>> DeepDiff(t1, t2, use_log_scale=True, log_scale_similarity_threshold=0.01) + {'values_changed': {"root['foo']": {'new_value': 140, 'old_value': 110}, "root['bar']": {'new_value': 298, 'old_value': 306}}} + + >>> DeepDiff(t1, t2, use_log_scale=True, log_scale_similarity_threshold=0.1) + {'values_changed': {"root['foo']": {'new_value': 140, 'old_value': 110}}} + + >>> DeepDiff(t1, t2, use_log_scale=True, log_scale_similarity_threshold=0.3) + {} + + +.. _log_scale_similarity_threshold_label: + +Log Scale Similarity Threshold +------------------------------ + +log_scale_similarity_threshold: float, default = 0.1 + :ref:`use_log_scale_label` along with log_scale_similarity_threshold can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. See the example above. + + +Performance Improvement of Numbers diffing +------------------------------------------ + +Take a look at :ref:`diffing_numbers_optimizations_label` + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/optimizations.rst b/deepdiff/docstrings/optimizations.rst new file mode 100644 index 00000000..5bc77682 --- /dev/null +++ b/deepdiff/docstrings/optimizations.rst @@ -0,0 +1,287 @@ +:doc:`/index` + +.. _optimizations_label: + +Optimizations +============= + +If you are dealing with large nested objects and ignore_order=True, chances are DeepDiff takes a while to calculate the diff. Here are some tips that may help you with optimizations and progress report. + + +Optimized Libraries +------------------- + +If you dump DeepDiff or Delta objects as json, you can improve the performance by installing orjson. +DeepDiff will automatically use orjson instead of Python's built-in json library to do json serialization. + + pip install "deepdiff[optimize]" + + +Max Passes +---------- + +:ref:`max_passes_label` comes with the default of 10000000. +If you don't need to exactly pinpoint the difference and you can get away with getting a less granular report, you can reduce the number of passes. It is recommended to get a diff of your objects with the defaults max_passes and take a look at the stats by running :ref:`get_stats_label` before deciding to reduce this number. In many cases reducing this number does not yield faster results. + +A new pass is started each time 2 iterables are compared in a way that every single item that is different from the first one is compared to every single item that is different in the second iterable. + +.. _max_diffs_label: + +Max Diffs +--------- + +max_diffs: Integer, default = None + max_diffs defined the maximum number of diffs to run on objects to pin point what exactly is different. This is only used when ignore_order=True. Every time 2 individual items are compared a diff is counted. The default value of None means there is no limit in the number of diffs that will take place. Any positive integer can make DeepDiff stop doing the calculations upon reaching that max_diffs count. + +You can run diffs and then :ref:`get_stats_label` to see how many diffs and passes have happened. + + >>> from deepdiff import DeepDiff + >>> diff=DeepDiff(1, 2) + >>> diff + {'values_changed': {'root': {'new_value': 2, 'old_value': 1}}} + >>> diff.get_stats() + {'PASSES COUNT': 0, 'DIFF COUNT': 1, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} + >>> diff=DeepDiff([[1,2]], [[2,3,1]]) + >>> diff.get_stats() + {'PASSES COUNT': 0, 'DIFF COUNT': 8, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} + >>> diff=DeepDiff([[1,2]], [[2,3,1]], ignore_order=True) + >>> diff.get_stats() + {'PASSES COUNT': 3, 'DIFF COUNT': 6, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} + +.. note:: + Compare :ref:`max_diffs_label` with :ref:`max_passes_label` + + +.. _cache_size_label: + +Cache Size +---------- + +cache_size : int >= 0, default=0 + Cache size to be used to improve the performance. A cache size of zero means it is disabled. + Using the cache_size can dramatically improve the diff performance especially for the nested objects at the cost of more memory usage. However if cache hits rate is very low, having a cache actually reduces the performance. + +Cache Examples +-------------- + +For example lets take a look at the performance of the benchmark_deeply_nested_a in the `DeepDiff-Benchmark repo `_ . + +No Cache +^^^^^^^^ + +With the no cache option we have the following stats: + + {'PASSES COUNT': 11234, 'DIFF COUNT': 107060, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 10} + +Yes it has taken 10 seconds to do the diff! + +.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png + :alt: cache_size=0 + + cache_size=0 + +Cache Size 500 +^^^^^^^^^^^^^^ + +With a cache size of 500, we are doing the same diff in 2.5 seconds! And the memory usage has not changed. It is still hovering around 100Mb. + + {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} + +As you can see the number of passes and diff counts have gone down and instead the distance cache hit count has gone up. + +.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=500__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png + :alt: cache_size=500 + + cache_size=500 + + +Cache Size 500 and Cache Tuning Sample Size 500 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +With a cache size of 500, we set the :ref:`cache_tuning_sample_size_label` to be 500 too. And we have a slight improvement. we are doing the same diff in 2 seconds now. And the memory usage has not changed. It is still hovering around 100Mb. + + {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} + +As you can see in this case none of the stats have changed compared to the previous stats. + +.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=500__cache_tuning_sample_size=500__cutoff_intersection_for_pairs=1.png + :alt: cache_size=500 cache_tuning_sample_size=500 + + cache_size=500 cache_tuning_sample_size=500 + + +Cache Size of 5000 +^^^^^^^^^^^^^^^^^^ + +Let's pay a little attention to our stats. Particularly to 'DISTANCE CACHE HIT COUNT': 11847 and the fact that the memory usage has not changed so far. What if we bump the cache_size to 5000 and disable cache_tuning_sample_size? + + {'PASSES COUNT': 1486, 'DIFF COUNT': 6637, 'DISTANCE CACHE HIT COUNT': 3440, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 0} + +We get the result calculated below 1 second! And the memory usage is only slightly above 100Mb. + +.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=5000__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png + :alt: cache_size=5000 + + cache_size=5000 + + + +.. _cache_tuning_sample_size_label: + +Cache Tuning Sample Size +------------------------ + +cache_tuning_sample_size : int >= 0, default = 0 + cache_tuning_sample_size is an experimental feature. It works hands in hands with the :ref:`cache_size_label`. When cache_tuning_sample_size is set to anything above zero, it will sample the cache usage with the passed sample size and decide whether to use the cache or not. And will turn it back on occasionally during the diffing process. This option can be useful if you are not sure if you need any cache or not. However you will gain much better performance with keeping this parameter zero and running your diff with different cache sizes and benchmarking to find the optimal cache size. + +.. note:: + A good start with cache_tuning_sample_size is to set it to the size of your cache. + + +.. _diffing_numbers_optimizations_label: + +Optimizations for Diffing Numbers +--------------------------------- + +If you are diffing lists of python numbers, you could get performance improvement just by installing numpy. DeepDiff will use Numpy to improve the performance behind the scene. + +For example lets take a look at the performance of the benchmark_array_no_numpy vs. benchmark_numpy_array in the `DeepDiff-Benchmark repo `_. + +In this specific test, we have 2 lists of numbers that have nothing in common: `mat1 `_ and `mat2 `_ . + +No Cache and No Numpy +^^^^^^^^^^^^^^^^^^^^^ + +With the no cache option and no Numpy installed we have the following stats: + + {'PASSES COUNT': 1, 'DIFF COUNT': 439944, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 30} + +Yes it has taken 30 seconds to do the diff! + +.. figure:: _static/benchmark_array_no_numpy__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png + :alt: cache_size=0 and no Numpy + + cache_size=0 and no Numpy + +Cache Size 10000 and No Numpy +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +What if we increase the cache size to 10000? + + {'PASSES COUNT': 1, 'DIFF COUNT': 439944, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 35} + +Not only it didn't help, it increased the diff time by 15%!! + +Worse, if you look at the stats you see that the cache hit count is zero. This has happened since the 2 lists of items have nothing in common and hence caching the results does not improve the performance. + + +.. figure:: _static/benchmark_array_no_numpy__3.8__ignore_order=True__cache_size=10000__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png + :alt: cache_size=10000 and no Numpy + + cache_size=10000 and no Numpy + +No Cache and Numpy +^^^^^^^^^^^^^^^^^^ + +Let's install Numpy now. Set the cache_size=0 and run the diff again. + +Yay, the same diff is done in 5 seconds! + + {'PASSES COUNT': 1, 'DIFF COUNT': 1348, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 5} + +As you can see the memory usage has gone up from around 500Mb to around 630Mb. + +.. figure:: _static/benchmark_numpy_array__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png + :alt: Numpy but no cache + + Numpy but no cache + + +Pypy +---- + +If you are diffing big blobs of data that do not mainly include numbers, you may gain some performance improvement by running DeepDiff on Pypy3 instead of cPython. + +For example lets take a look at the performance of the benchmark_big_jsons in the `DeepDiff-Benchmark repo `_. + +First we will run it on cPython 3.8: + +It takes around 17.5 seconds and 40Mb of memory: + +.. figure:: _static/benchmark_big_jsons__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__max_diffs=300000__max_passes=40000__cutoff_intersection_for_pairs=1.png + :alt: Nested blob of text diffed in Python3.8 + + Nested blob of text diffed in Python3.8 + +And then we run it in Pypy3.6-7.3.0. It takes 12 seconds now but around 110Mb of memory. + +.. figure:: _static/benchmark_big_jsons__pypy3.6__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__max_diffs=300000__max_passes=40000__cutoff_intersection_for_pairs=1.png + :alt: Nested blob of text diffed in Pypy3.6-7.3.0 + + Nested blob of text diffed in Pypy3.6-7.3.0 + +.. note:: + Note that if you are diffing numbers, and have Numpy installed as recommended, cPython will have a better performance than Pypy. But if you are diffing blobs of mixed strings and some numbers, Pypy will have a better CPU performance and worse memory usage. + + +Cutoff Intersection For Pairs +----------------------------- + +:ref:`cutoff_intersection_for_pairs_label` which is only used when ignore_order=True can have a huge affect on the granularity of the results and the performance. A value of zero essentially stops DeepDiff from doing passes while a value of 1 forced DeepDiff to do passes on iterables even when they are very different. Running passes is an expensive operation. + +As an example of how much this parameter can affect the results in deeply nested objects, please take a look at :ref:`distance_and_diff_granularity_label`. + +.. _cache_purge_level: + +Cache Purge Level +----------------- + +cache_purge_level: int, 0, 1, or 2. default=1 + cache_purge_level defines what objects in DeepDiff should be deleted to free the memory once the diff object is calculated. If this value is set to zero, most of the functionality of the diff object is removed and the most memory is released. A value of 1 preserves all the functionalities of the diff object. A value of 2 also preserves the cache and hashes that were calculated during the diff calculations. In most cases the user does not need to have those objects remained in the diff unless for investigation purposes. + + +.. _zip_ordered_iterables_label: + +Zip Ordered Iterables +--------------------- + +zip_ordered_iterables: Boolean, default = False + When comparing ordered iterables such as lists, DeepDiff tries to find the smallest difference between the two iterables to report. That means that items in the two lists are not paired individually in the order of appearance in the iterables. Sometimes, that is not the desired behavior. Set this flag to True to make DeepDiff pair and compare the items in the iterables in the order they appear. + + + >>> from pprint import pprint + >>> from deepdiff import DeepDiff + >>> t1 = ["a", "b", "d", "e"] + >>> t2 = ["a", "b", "c", "d", "e"] + >>> DeepDiff(t1, t2) + {'iterable_item_added': {'root[2]': 'c'}} + + When this flag is set to True and ignore_order=False, diffing will be faster. + + >>> diff=DeepDiff(t1, t2, zip_ordered_iterables=True) + >>> pprint(diff) + {'iterable_item_added': {'root[4]': 'e'}, + 'values_changed': {'root[2]': {'new_value': 'c', 'old_value': 'd'}, + 'root[3]': {'new_value': 'd', 'old_value': 'e'}}} + + +.. _threshold_to_diff_deeper_label: + +Threshold To Diff Deeper +------------------------ + +threshold_to_diff_deeper: float, default = 0.33 + threshold_to_diff_deeper is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a new_value instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. + + + >>> from deepdiff import DeepDiff + >>> t1 = {"veggie": "carrots"} + >>> t2 = {"meat": "carrots"} + >>> + >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0) + {'dictionary_item_added': ["root['meat']"], 'dictionary_item_removed': ["root['veggie']"]} + >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0.33) + {'values_changed': {'root': {'new_value': {'meat': 'carrots'}, 'old_value': {'veggie': 'carrots'}}}} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/other.rst b/deepdiff/docstrings/other.rst new file mode 100644 index 00000000..c380e356 --- /dev/null +++ b/deepdiff/docstrings/other.rst @@ -0,0 +1,55 @@ +:doc:`/index` + +Other Parameters +================ + + +.. _encodings_label: + +Encodings +--------- + +significant_digits : int >= 0, default=None + +Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out :ref:`ignore_encoding_errors_label` if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] + +The reason the decoding of bytes to string is needed is that when `ignore_order = True` we calculate the hash of the objects in order to facilitate in diffing them. In order to calculate the hash, we serialize all objects into strings. During the serialization we may encounter issues with character encodings. + +**Examples:** + +Comparing bytes that have non UTF-8 encoding: + >>> from deepdiff import DeepDiff + >>> item = b"\xbc cup of flour" + >>> DeepDiff([b'foo'], [item], ignore_order=True) + Traceback (most recent call last): + raise UnicodeDecodeError( + UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbc in position 0: Can not produce a hash for root: invalid start byte in 'p of flo...'. Please either pass ignore_encoding_errors=True or pass the encoding via encodings=['utf-8', '...']. + +Let's try to pass both 'utf-8' and 'latin-1' as encodings to be tried: + >>> DeepDiff([b'foo'], [item], encodings=['utf-8', 'latin-1'], ignore_order=True) + {'values_changed': {'root[0]': {'new_value': b'\xbc cup of flour', 'old_value': b'foo'}}} + + +.. _ignore_encoding_errors_label: + +Ignore Encoding Errors +---------------------- + +ignore_encoding_errors: Boolean, default = False + +If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. + +We can generally get the same results as above example if we just pass `ignore_encoding_errors=True`. However it comes at the cost of less accuracy of the results. + >>> DeepDiff([b'foo'], [b"\xbc cup of flour"], ignore_encoding_errors=True, ignore_order=True) + {'values_changed': {'root[0]': {'new_value': b'\xbc cup of flour', 'old_value': b'foo'}}} + +For example if we replace `foo` with ` cup of flour`, we have bytes that are only different in the problematic character. Ignoring that character means DeepDiff will consider these 2 strings to be equal since their hash becomes the same. Note that we only hash items when `ignore_order=True`. + >>> DeepDiff([b" cup of flour"], [b"\xbc cup of flour"], ignore_encoding_errors=True, ignore_order=True) + {} + +But if we had passed the proper encoding, it would have detected that these 2 bytes are different: + >>> DeepDiff([b" cup of flour"], [b"\xbc cup of flour"], encodings=['latin-1'], ignore_order=True) + {'values_changed': {'root[0]': {'new_value': b'\xbc cup of flour', 'old_value': b' cup of flour'}}} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/search_doc.rst b/deepdiff/docstrings/search_doc.rst new file mode 100644 index 00000000..89ef333a --- /dev/null +++ b/deepdiff/docstrings/search_doc.rst @@ -0,0 +1,75 @@ +:orphan: + +grep is a more user friendly interface for DeepSearch. It takes exactly the same arguments as DeepSearch except that you pipe the object into it instead of passing it as a parameter. + +It works just like grep in linux shell! + +**Parameters** + +item : The item to search for + +verbose_level : int >= 0, default = 1. + Verbose level one shows the paths of found items. + Verbose level 2 shows the path and value of the found items. + +exclude_paths: list, default = None. + List of paths to exclude from the report. + Supports wildcard patterns: use ``[*]`` to match one segment or ``[**]`` to match any depth. + +exclude_types: list, default = None. + List of object types to exclude from the report. + +case_sensitive: Boolean, default = False + +match_string: Boolean, default = False + If True, the value of the object or its children have to exactly match the item. + If False, the value of the item can be a part of the value of the object or its children + +use_regexp: Boolean, default = False + +strict_checking: Boolean, default = True + If True, it will check the type of the object to match, so when searching for '1234', + it will NOT match the int 1234. Currently this only affects the numeric values searching. + + +**Examples** + +Importing + >>> from deepdiff import grep + >>> from pprint import pprint + +Search in list for string + >>> obj = ["long somewhere", "string", 0, "somewhere great!"] + >>> item = "somewhere" + >>> ds = obj | grep(item) + >>> print(ds) + {'matched_values': ['root[0]', 'root[3]']} + +Search in nested data for string + >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] + >>> item = "somewhere" + >>> ds = obj | grep(item, verbose_level=2) + >>> pprint(ds, indent=2) + { 'matched_paths': {"root[1]['somewhere']": 'around'}, + 'matched_values': { 'root[0]': 'something somewhere', + "root[1]['long']": 'somewhere'}} + +You can also use regular expressions + >>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] + >>> ds = obj | grep("some.*", use_regexp=True) + >>> pprint(ds, indent=2) + { 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], + 'matched_values': ['root[0]', "root[1]['long']"]} + + +Change strict_checking to False to match numbers in strings and vice versa: + >>> obj = {"long": "somewhere", "num": 1123456, 0: 0, "somewhere": "around"} + >>> item = "1234" + >>> result = {"matched_values": {"root['num']"}} + >>> ds = obj | grep(item, verbose_level=1, use_regexp=True) + >>> pprint(ds) + {} + >>> + >>> ds = obj | grep(item, verbose_level=1, use_regexp=True, strict_checking=False) + >>> pprint(ds) + {'matched_values': ["root['num']"]} diff --git a/deepdiff/docstrings/serialization.rst b/deepdiff/docstrings/serialization.rst new file mode 100644 index 00000000..3daba73b --- /dev/null +++ b/deepdiff/docstrings/serialization.rst @@ -0,0 +1,325 @@ +:doc:`/index` + +.. _serialization_label: + +Serialization +============= + +.. _to_dict_label: + +To Dict +------- + +In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. +The result is always a text-view dictionary regardless of the original view used to create the DeepDiff object. + +**Parameters** + +verbose_level: int, default=None + Override the verbose_level for the serialized output. + When None, the behavior depends on the original view: + + - If the original view is 'text', the verbose_level from DeepDiff initialization is used. + - If the original view is 'tree', verbose_level=2 is used to provide the most detailed output. + + Valid values are 0, 1, or 2. + +Example: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> ddiff.to_dict() + {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} + + +When the original view is 'tree', to_dict() defaults to verbose_level=2 for the most detailed output: + +Example: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_dict() + {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} + +You can also override the verbose_level: + +Example: + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_dict(verbose_level=0) + {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': }}} + +.. _to_json_label: + +To Json +------- + +Dump json of the text view. + +In order to do safe json serialization, use the to_json() method. + +**Parameters** + +default_mapping : dictionary(optional), a dictionary of mapping of different types to json types. + +by default DeepDiff converts certain data types. For example Decimals into floats so they can be exported into json. +If you have a certain object type that the json serializer can not serialize it, please pass the appropriate type +conversion through this dictionary. + +verbose_level: int, default=None + Override the verbose_level for the serialized output. Same behavior as to_dict(). + +kwargs: Any other kwargs you pass will be passed on to Python's json.dumps() + + +Example 1 Serialize custom objects: + >>> class A: + ... pass + ... + >>> class B: + ... pass + ... + >>> t1 = A() + >>> t2 = B() + >>> ddiff = DeepDiff(t1, t2) + >>> ddiff.to_json() + TypeError: We do not know how to convert <__main__.A object at 0x10648> of type for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type. + + >>> default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} + >>> ddiff.to_json(default_mapping=default_mapping) + '{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' + + +Example 2: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_json() + '{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' + + +.. _to_json_pickle_label: + +To Json Pickle +-------------- + +If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and from_json_pickle() in order to serialize and deserialize its results into json. Note that json_pickle is unsafe and json pickle dumps from untrusted sources should never be loaded. It is recommended not to use this serialization unless you have to. + +.. note:: + You need to install the `jsonpickle `_ package to use the to_json_pickle() method. + +Serialize and then deserialize back to deepdiff + >>> t1 = {1: 1, 2: 2, 3: 3} + >>> t2 = {1: 1, 2: "2", 3: 3} + >>> ddiff = DeepDiff(t1, t2) + >>> jsoned = ddiff.to_json_pickle() + >>> jsoned + '{"type_changes": {"root[2]": {"new_type": {"py/type": "builtins.str"}, "new_value": "2", "old_type": {"py/type": "builtins.int"}, "old_value": 2}}}' + >>> ddiff_new = DeepDiff.from_json_pickle(jsoned) + >>> ddiff == ddiff_new + True + + +.. _from_json_pickle_label: + +From Json Pickle +---------------- + +Load the diff object from the json pickle dump. +Take a look at the above :ref:`to_json_pickle_label` for an example. + + +.. _delta_to_flat_rows_label: + +Delta Serialize To Flat Rows +---------------------------- + +Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat rows. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_rows to achieve the desired outcome. The rows are named tuples and can be converted to dictionaries using `._asdict()` + + >>> from pprint import pprint + >>> from deepdiff import DeepDiff, Delta + >>> t1 = {"key1": "value1"} + >>> t2 = {"field2": {"key2": "value2"}} + >>> diff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(diff, indent=2) + { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, + 'dictionary_item_removed': {"root['key1']": 'value1'}} + >>> delta = Delta(diff, bidirectional=True) + >>> flat_rows = delta.to_flat_rows() + >>> pprint(flat_rows, indent=2) + [ FlatDeltaRow(path=['field2', 'key2'], action='dictionary_item_added', value='value2'), + FlatDeltaRow(path=['key1'], action='dictionary_item_removed', value='value1')] + +.. note:: + When converting a delta to flat rows, nested dictionaries that have single keys in them are flattened too. + Notice that the diff object says + + { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}} + + but the flat row is: + + FlatDeltaRow(path=['field2', 'key2'], action='dictionary_item_added', value='value2') + + That means, when you recreate the delta from the flat rows, you need to set force=True to apply the delta: + + >>> t1 + delta == t2 + True + >>> t2 - delta == t1 + True + >>> delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True) + >>> t1 + delta2 == t2 + Expected the old value for root['field2']['key2'] to be None but it is not found. Error found on: 'field2' + False. You may want to set force=True, especially if this delta is created by passing flat_rows_list or flat_dict_list + >>> t1 + delta + {'field2': {'key2': 'value2'}} + >>> t1 + delta2 + {} + >>> delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True, force=True) # We need to set force=True + >>> t1 + delta2 + {'field2': {'key2': 'value2'}} + >>> + + + +Flat Row Specs: + + + class FlatDataAction(str, enum.Enum): + values_changed = 'values_changed' + type_changes = 'type_changes' + set_item_added = 'set_item_added' + set_item_removed = 'set_item_removed' + dictionary_item_added = 'dictionary_item_added' + dictionary_item_removed = 'dictionary_item_removed' + iterable_item_added = 'iterable_item_added' + iterable_item_removed = 'iterable_item_removed' + iterable_item_moved = 'iterable_item_moved' + iterable_items_inserted = 'iterable_items_inserted' # opcode + iterable_items_deleted = 'iterable_items_deleted' # opcode + iterable_items_replaced = 'iterable_items_replaced' # opcode + iterable_items_equal = 'iterable_items_equal' # opcode + attribute_removed = 'attribute_removed' + attribute_added = 'attribute_added' + unordered_iterable_item_added = 'unordered_iterable_item_added' + unordered_iterable_item_removed = 'unordered_iterable_item_removed' + + + UnkownValueCode = 'unknown___' + + + class FlatDeltaRow(NamedTuple): + path: List + action: FlatDataAction + value: Optional[Any] = UnkownValueCode + old_value: Optional[Any] = UnkownValueCode + type: Optional[Any] = UnkownValueCode + old_type: Optional[Any] = UnkownValueCode + new_path: Optional[List] = None + t1_from_index: Optional[int] = None + t1_to_index: Optional[int] = None + t2_from_index: Optional[int] = None + t2_to_index: Optional[int] = None + + +.. _delta_to_flat_dicts_label: + +Delta Serialize To Flat Dictionaries +------------------------------------ + +Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat dictionaries. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_dicts to achieve the desired outcome. + +Since None is a valid value, we use a special hard-coded string to signify "unknown": 'unknown___' + +.. note:: + Many new keys are added to the flat dicts in DeepDiff 7.0.0 + You may want to use :ref:`delta_to_flat_rows_label` instead of flat dicts. + +For example: + + >>> from pprint import pprint + >>> from deepdiff import DeepDiff, Delta + >>> t1 = {"key1": "value1"} + >>> t2 = {"field2": {"key2": "value2"}} + >>> diff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(diff, indent=2) + { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, + 'dictionary_item_removed': {"root['key1']": 'value1'}} + >>> delta = Delta(diff, bidirectional=True) + >>> flat_dicts = delta.to_flat_dicts() + >>> pprint(flat_dicts, indent=2) + [ { 'action': 'dictionary_item_added', + 'new_path': None, + 'old_type': 'unknown___', + 'old_value': 'unknown___', + 'path': ['field2', 'key2'], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': 'unknown___', + 'value': 'value2'}, + { 'action': 'dictionary_item_removed', + 'new_path': None, + 'old_type': 'unknown___', + 'old_value': 'unknown___', + 'path': ['key1'], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': 'unknown___', + 'value': 'value1'}] + + +Example 2: + + >>> t3 = ["A", "B"] + >>> t4 = ["A", "B", "C", "D"] + >>> diff = DeepDiff(t3, t4, verbose_level=2) + >>> pprint(diff, indent=2) + {'iterable_item_added': {'root[2]': 'C', 'root[3]': 'D'}} + >>> + >>> delta = Delta(diff, bidirectional=True) + >>> flat_dicts = delta.to_flat_dicts() + >>> pprint(flat_dicts, indent=2) + [ { 'action': 'iterable_item_added', + 'new_path': None, + 'old_type': 'unknown___', + 'old_value': 'unknown___', + 'path': [2], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': 'unknown___', + 'value': 'C'}, + { 'action': 'iterable_item_added', + 'new_path': None, + 'old_type': 'unknown___', + 'old_value': 'unknown___', + 'path': [3], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': 'unknown___', + 'value': 'D'}] + + +.. _delta_from_flat_dicts_label: + +Delta Load From Flat Dictionaries +------------------------------------ + + >>> from deepdiff import DeepDiff, Delta + >>> t3 = ["A", "B"] + >>> t4 = ["A", "B", "C", "D"] + >>> diff = DeepDiff(t3, t4, verbose_level=2) + >>> delta = Delta(diff, bidirectional=True) + >>> flat_dicts = delta.to_flat_dicts() + >>> + >>> delta2 = Delta(flat_dict_list=flat_dicts) + >>> t3 + delta == t4 + True + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/stats.rst b/deepdiff/docstrings/stats.rst new file mode 100644 index 00000000..a3292aec --- /dev/null +++ b/deepdiff/docstrings/stats.rst @@ -0,0 +1,78 @@ +:doc:`/index` + +.. _stats_n_logging_label: + +Stats and Logging +================= + +.. _log_frequency_in_sec_label: + +Log Frequency In Sec +-------------------- + +log_frequency_in_sec: Integer, default = 0 + How often to log the progress. The default of 0 means logging progress is disabled. + If you set it to 20, it will log every 20 seconds. This is useful only when running DeepDiff + on massive objects that will take a while to run. If you are only dealing with small objects, keep it at 0 to disable progress logging. + +For example we have run a diff on 2 nested objects that took 2 seconds to get the results. By passing the log_frequency_in_sec=1, we get the following in the logs: + + >>> DeepDiff(t1, t2, log_frequency_in_sec=1) + INFO:deepdiff.diff:DeepDiff 1 seconds in progress. Pass #1634, Diff #8005 + INFO:deepdiff.diff:DeepDiff 2 seconds in progress. Pass #3319, Diff #16148 + INFO:deepdiff.diff:stats {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} + +.. note:: + The default python logger will omit the info logs. You can either set the logging filter to include info logs or pass a different logger via :ref:`progress_logger_label` + + >>> import logging + >>> logging.basicConfig(level=logging.INFO) + + +.. _progress_logger_label: + +Progress Logger +--------------- + +progress_logger: log function, default = logger.info + What logging function to use specifically for progress reporting. This function is only used when progress logging is enabled + by setting log_frequency_in_sec to anything above zero. The function that is passed as the progress_logger needs to be thread safe. + + +For example you can pass progress_logger=logger.warning to the example above and everything is logged as warning level: + + >>> DeepDiff(t1, t2, log_frequency_in_sec=1, progress_logger=logger.warning) + WARNING:deepdiff.diff:DeepDiff 1 seconds in progress. Pass #1634, Diff #8005 + WARNING:deepdiff.diff:DeepDiff 2 seconds in progress. Pass #3319, Diff #16148 + WARNING:deepdiff.diff:stats {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} + + +.. _get_stats_label: + +Get Stats +--------- + +You can run the get_stats() method on a diff object to get some stats on the object. +For example: + + >>> from pprint import pprint + >>> from deepdiff import DeepDiff + >>> + >>> t1 = [ + ... [1, 2, 3, 9], [9, 8, 5, 9] + ... ] + >>> + >>> t2 = [ + ... [1, 2, 4, 10], [4, 2, 5] + ... ] + >>> + >>> diff = DeepDiff(t1, t2, ignore_order=True, cache_size=5000, cutoff_intersection_for_pairs=1) + >>> pprint(diff.get_stats()) + {'DIFF COUNT': 37, + 'DISTANCE CACHE HIT COUNT': 0, + 'MAX DIFF LIMIT REACHED': False, + 'MAX PASS LIMIT REACHED': False, + 'PASSES COUNT': 7} + + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/support.rst b/deepdiff/docstrings/support.rst new file mode 100644 index 00000000..7878f86a --- /dev/null +++ b/deepdiff/docstrings/support.rst @@ -0,0 +1,19 @@ +:doc:`/index` + +Support +======= + +.. |qluster_link| raw:: html + + Qluster + +DeepDiff is now part of |qluster_link|. + +*If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* + +Thank you for using DeepDiff! +If you find a bug, please create a ticket on our `GitHub repo `__ + +We are **available for consulting** if you need immediate help or custom implementations of DeepDiff. You can reach us via filling up `this form `__ + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/troubleshoot.rst b/deepdiff/docstrings/troubleshoot.rst new file mode 100644 index 00000000..84e18b6f --- /dev/null +++ b/deepdiff/docstrings/troubleshoot.rst @@ -0,0 +1,29 @@ +:doc:`/index` + +.. _troubleshoot_label: + +Troubleshoot +============ + +Murmur3 Installation +~~~~~~~~~~~~~~~~~~~~ + +NOTE: Murmur3 was removed from DeepDiff 5.2.0 + +If you are running into this issue, you are using an older version of DeepDiff. + +`Failed to build mmh3 when installing DeepDiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install murmur3 by running: `pip install mmh3` + +On MacOS Mojave, some users experience difficulty when installing Murmur3. + +The problem can be solved by running: + + `xcode-select --install` + +And then running + + `pip install mmh3` + +Back to :doc:`/index` diff --git a/deepdiff/docstrings/view.rst b/deepdiff/docstrings/view.rst new file mode 100644 index 00000000..3cdec070 --- /dev/null +++ b/deepdiff/docstrings/view.rst @@ -0,0 +1,365 @@ +:doc:`/index` + +.. _view_label: + +View +==== + +You have the options of text view and tree view. +The main difference is that the tree view has the capabilities to traverse the objects to see what objects were compared to what other objects. + +While the view options decide the format of the output that is mostly machine readable, regardless of the view you choose, you can get a more human readable output by using the pretty() method. +DeepDiff also offers other specialized views such as the :doc:`colored_view` (which includes a compact variant) and :doc:`delta` view for specific use cases. + +.. _text_view_label: + +Text View +--------- + +Text view is the default view of DeepDiff. It is simpler than tree view. + +Example of using the text view. + >>> from decimal import Decimal + >>> from deepdiff import DeepDiff + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2) + >>> print(ddiff) + {'dictionary_item_added': [root[5], root[6]], 'dictionary_item_removed': [root[4]]} + +So for example ddiff['dictionary_item_added'] is a set of string results. That's why this view is called the text view. +You can get this view by default or by passing `view='text'`. + +.. _tree_view_label: + +Tree View +--------- + +The tree view provides you with tree objects that you can traverse through to find +the parents of the objects that are diffed and the actual objects that are being diffed. +This view is very useful when dealing with nested objects. +Note that tree view always returns results in the form of Python sets. + +You can traverse through the tree elements! + +.. note:: + The Tree view is just a different representation of the diffed data. + Behind the scene, DeepDiff creates the tree view first and then converts it to textual + representation for the text view. + +**Tree View Interface** + +.. code:: text + + +---------------------------------------------------------------+ + | | + | parent(t1) parent node parent(t2) |----level + | + ^ + | + +------|--------------------------|---------------------|-------+ + | | | up | + | Child | | | ChildRelationship + | Relationship | | | + | down | | | + +------|----------------------|-------------------------|-------+ + | v v v | + | child(t1) child node child(t2) |----level + | | + +---------------------------------------------------------------+ + + +:up: Move up to the parent node aka parent level +:down: Move down to the child node aka child level +:path(): Get the path to the current node in string representation, path(output_format='list') gives you the path in list representation. path(use_t2=True) gives you the path to t2. +:t1: The first item in the current node that is being diffed +:t2: The second item in the current node that is being diffed +:additional: Additional information about the node i.e. repetition +:repetition: Shortcut to get the repetition report + + +The tree view allows you to have more than mere textual representaion of the diffed objects. +It gives you the actual objects (t1, t2) throughout the tree of parents and children. + +**Examples for Tree View** + +.. note:: + Set view='tree' in order to get the results in tree view. + +Value of an item has changed (Tree View) + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:4, 3:3} + >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') + >>> ddiff_verbose0 + {'values_changed': []} + >>> + >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') + >>> ddiff_verbose1 + {'values_changed': []} + >>> set_of_values_changed = ddiff_verbose1['values_changed'] + >>> # since set_of_values_changed includes only one item in a set + >>> # in order to get that one item we can: + >>> (changed,) = set_of_values_changed + >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] + + >>> changed.t1 + 2 + >>> changed.t2 + 4 + >>> # You can traverse through the tree, get to the parents! + >>> changed.up + + +List difference (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff + {'iterable_item_removed': [, ]} + >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. + >>> # One way to get one item from the set is to convert it to a list + >>> # And then get the first item of the list: + >>> removed = list(ddiff['iterable_item_removed'])[0] + >>> removed + + >>> + >>> parent = removed.up + >>> parent + + >>> parent.path() # gives you the string representation of the path + "root[4]['b']" + >>> parent.path(output_format='list') # gives you the list of keys and attributes that make up the path + [4, 'b'] + >>> parent.t1 + [1, 2, 3, 4] + >>> parent.t2 + [1, 2] + >>> parent.up + + >>> parent.up.up + + >>> parent.up.up.t1 + {1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} + >>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff + True + +List difference 2 (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint(ddiff, indent = 2) + { 'iterable_item_added': [], + 'values_changed': [, ]} + >>> + >>> # Note that iterable_item_added is a set with one item. + >>> # So in order to get that one item from it, we can do: + >>> + >>> (added,) = ddiff['iterable_item_added'] + >>> added + + >>> added.up.up + + >>> added.up.up.path() + 'root[4]' + >>> added.up.up.path(output_format='list') # gives you the list of keys and attributes that make up the path + [4] + >>> added.up.up.down + + >>> + >>> # going up twice and then down twice gives you the same node in the tree: + >>> added.up.up.down.down == added + True + +List difference ignoring order but reporting repetitions (Tree View) + >>> t1 = [1, 3, 1, 4] + >>> t2 = [4, 4, 1] + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') + >>> pprint(ddiff, indent=2) + { 'iterable_item_removed': [], + 'repetition_change': [, ]} + >>> + >>> # repetition_change is a set with 2 items. + >>> # in order to get those 2 items, we can do the following. + >>> # or we can convert the set to list and get the list items. + >>> # or we can iterate through the set items + >>> + >>> (repeat1, repeat2) = ddiff['repetition_change'] + >>> repeat1 # the default verbosity is set to 1. + + >>> # The actual data regarding the repetitions can be found in the repetition attribute: + >>> repeat1.repetition + {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} + >>> + >>> # If you change the verbosity, you will see less: + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) + >>> ddiff + {'repetition_change': [, ], 'iterable_item_removed': []} + >>> (repeat1, repeat2) = ddiff['repetition_change'] + >>> repeat1 + + >>> + >>> # But the verbosity level does not change the actual report object. + >>> # It only changes the textual representaion of the object. We get the actual object here: + >>> repeat1.repetition + {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} + >>> repeat1.t1 + 4 + >>> repeat1.t2 + 4 + >>> repeat1.up + + +List that contains dictionary (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint (ddiff, indent = 2) + { 'dictionary_item_removed': [], + 'values_changed': []} + +Sets (Tree View): + >>> t1 = {1, 2, 8} + >>> t2 = {1, 2, 3, 5} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> print(ddiff) + {'set_item_removed': [], 'set_item_added': [, ]} + >>> # grabbing one item from set_item_removed set which has one item only + >>> (item,) = ddiff['set_item_removed'] + >>> item.up + + >>> item.up.t1 == t1 + True + +Named Tuples (Tree View): + >>> from collections import namedtuple + >>> Point = namedtuple('Point', ['x', 'y']) + >>> t1 = Point(x=11, y=22) + >>> t2 = Point(x=11, y=23) + >>> print(DeepDiff(t1, t2, view='tree')) + {'values_changed': []} + +Custom objects (Tree View): + >>> class ClassA(object): + ... a = 1 + ... def __init__(self, b): + ... self.b = b + ... + >>> t1 = ClassA(1) + >>> t2 = ClassA(2) + >>> + >>> print(DeepDiff(t1, t2, view='tree')) + {'values_changed': []} + +Object attribute added (Tree View): + >>> t2.c = "new attribute" + >>> pprint(DeepDiff(t1, t2, view='tree')) + {'attribute_added': [], + 'values_changed': []} + +Approximate decimals comparison (Significant digits after the point) (Tree View): + >>> t1 = Decimal('1.52') + >>> t2 = Decimal('1.57') + >>> DeepDiff(t1, t2, significant_digits=0, view='tree') + {} + >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') + >>> ddiff + {'values_changed': []} + >>> (change1,) = ddiff['values_changed'] + >>> change1 + + >>> change1.t1 + Decimal('1.52') + >>> change1.t2 + Decimal('1.57') + >>> change1.path() + 'root' + +Approximate float comparison (Significant digits after the point) (Tree View): + >>> t1 = [ 1.1129, 1.3359 ] + >>> t2 = [ 1.113, 1.3362 ] + >>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') + >>> ddiff + {} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint(ddiff, indent=2) + { 'values_changed': [, ]} + >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') + >>> ddiff + {'values_changed': []} + + +pretty() method +--------------- + +Use the pretty method for human readable output. This is regardless of what view you have used to generate the results. + >>> from deepdiff import DeepDiff + >>> t1={1,2,4} + >>> t2={2,3} + >>> print(DeepDiff(t1, t2).pretty()) + Item root[3] added to set. + Item root[4] removed from set. + Item root[1] removed from set. + +The pretty method has an optional parameter ``prefix`` that allows a prefix string before every output line (*e.g.* for logging): + >>> from deepdiff import DeepDiff + >>> t1={1,2,4} + >>> t2={2,3} + >>> print(DeepDiff(t1, t2).pretty(prefix='Diff: ')) + Diff: Item root[3] added to set. + Diff: Item root[4] removed from set. + Diff: Item root[1] removed from set. + +The ``prefix`` may also be a callable function. This function must accept ``**kwargs``; as of this version, the only parameter is ``diff`` but the signature allows for future expansion. +The ``diff`` given will be the ``DeepDiff`` that ``pretty`` was called on; this allows interesting capabilities such as: + +.. code:: python + + >>> from deepdiff import DeepDiff + >>> t1={1,2,4} + >>> t2={2,3} + >>> def callback(**kwargs): + ... """Helper function using a hidden variable on the diff that tracks which count prints next""" + ... kwargs['diff']._diff_count = 1 + getattr(kwargs['diff'], '_diff_count', 0) + ... return f"Diff #{kwargs['diff']._diff_count}: " + ... + >>> print(DeepDiff(t1, t2).pretty(prefix=callback)) + Diff #1: Item root[3] added to set. + Diff #2: Item root[4] removed from set. + Diff #3: Item root[1] removed from set. + + +Text view vs. Tree view vs. pretty() method +----------------------------------------------- + +Views are just different format of results. Each comes with its own set of features. At the end of the day the user can choose the right format based on the use case. + +- The text view is the default format of the results. It is the format that is the most suitable if you don't need to know the traversal history of the objects being compared. +- The tree view allows you to traverse back and forth through the tree and see what objects were compared to what other objects. +- The pretty() method is not a view. All the views are dictionaries. The pretty() method spits out a string output of what has changed and is designed to be human readable. + +For example + >>> from deepdiff import DeepDiff + >>> t1={1,2,4} + >>> t2={2,3} + +Text view (default) + >>> DeepDiff(t1, t2) # same as view='text' + {'set_item_removed': [root[4], root[1]], 'set_item_added': [root[3]]} + +Tree view + >>> tree = DeepDiff(t1, t2, view='tree') + >>> tree + {'set_item_removed': [, ], 'set_item_added': []} + >>> tree['set_item_added'][0] + + >>> tree['set_item_added'][0].t2 + 3 + +Pretty method. Regardless of what view was used, you can use the "pretty()" method to get a human readable output. + >>> print(DeepDiff(t1, t2).pretty()) + Item root[3] added to set. + Item root[4] removed from set. + Item root[1] removed from set. + + +Back to :doc:`/index` diff --git a/deepdiff/helper.py b/deepdiff/helper.py index f7eeea24..a2ea5620 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -422,7 +422,7 @@ def type_is_subclass_of_type_group(item: Any, type_group: Tuple[Type[Any], ...]) def get_doc(doc_filename: str) -> str: try: - with open(os.path.join(current_dir, '../docs/', doc_filename), 'r') as doc_file: + with open(os.path.join(current_dir, 'docstrings', doc_filename), 'r') as doc_file: doc = doc_file.read() except Exception: # pragma: no cover doc = 'Failed to load the docstrings. Please visit: https://zepworks.com/deepdiff/current/' # pragma: no cover diff --git a/docs/authors.rst b/docs/authors.rst deleted file mode 100644 index 27fc8de3..00000000 --- a/docs/authors.rst +++ /dev/null @@ -1,167 +0,0 @@ -:doc:`/index` - -Authors -======= - -Authors in order of the timeline of their contributions: - -- `Sep Dehpour (Seperman)`_ -- `Victor Hahn Castell`_ for the tree view and major contributions: -- `nfvs`_ for Travis-CI setup script. -- `brbsix`_ for initial Py3 porting. -- `WangFenjin`_ for unicode support. -- `timoilya`_ for comparing list of sets when ignoring order. -- `Bernhard10`_ for significant digits comparison. -- `b-jazz`_ for PEP257 cleanup, Standardize on full names, fixing line - endings. -- `finnhughes`_ for fixing **slots** -- `moloney`_ for Unicode vs. Bytes default -- `serv-inc`_ for adding help(deepdiff) -- `movermeyer`_ for updating docs -- `maxrothman`_ for search in inherited class attributes -- `maxrothman`_ for search for types/objects -- `MartyHub`_ for exclude regex paths -- `sreecodeslayer`_ for DeepSearch match_string -- Brian Maissy `brianmaissy`_ for weakref fix, enum tests -- Bartosz Borowik `boba-2`_ for Exclude types fix when ignoring order -- Brian Maissy `brianmaissy `__ for - fixing classes which inherit from classes with slots didn’t have all - of their slots compared -- Juan Soler `Soleronline`_ for adding ignore_type_number -- `mthaddon`_ for adding timedelta diffing support -- `Necrophagos`_ for Hashing of the number 1 vs. True -- `gaal-dev`_ for adding exclude_obj_callback -- Ivan Piskunov `van-ess0`_ for deprecation warning enhancement. -- Michał Karaś `MKaras93`_ for the pretty view -- Christian Kothe `chkothe`_ for the basic support for diffing numpy - arrays -- `Timothy`_ for truncate_datetime -- `d0b3rm4n`_ for bugfix to not apply format to non numbers. -- `MyrikLD`_ for Bug Fix NoneType in ignore type groups -- Stian Jensen `stianjensen`_ for improving ignoring of NoneType in - diff -- Florian Klien `flowolf`_ for adding math_epsilon -- Tim Klein `timjklein36`_ for retaining the order of multiple - dictionary items added via Delta. -- Wilhelm Schürmann\ `wbsch`_ for fixing the typo with yml files. -- `lyz-code`_ for adding support for regular expressions in DeepSearch - and strict_checking feature in DeepSearch. -- `dtorres-sf`_ for adding the option for custom compare function -- Tony Wang `Tony-Wang`_ for bugfix: verbose_level==0 should disable - values_changes. -- Sun Ao `eggachecat`_ for adding custom operators. -- Sun Ao `eggachecat`_ for adding ignore_order_func. -- `SlavaSkvortsov`_ for fixing unprocessed key error. -- Håvard Thom `havardthom`_ for adding UUID support. -- Dhanvantari Tilak `Dhanvantari`_ for Bug-Fix: - ``TypeError in _get_numbers_distance() when ignore_order = True``. -- Yael Mintz `yaelmi3`_ for detailed pretty print when verbose_level=2. -- Mikhail Khviyuzov `mskhviyu`_ for Exclude obj callback strict. -- `dtorres-sf`_ for the fix for diffing using iterable_compare_func with nested objects. -- `Enric Pou `__ for bug fix of ValueError - when using Decimal 0.x -- `Uwe Fladrich `__ for fixing bug when diff'ing non-sequence iterables -- `Michal Ozery-Flato `__ for - setting equal_nan=ignore_nan_inequality in the call for - np.array_equal -- `martin-kokos `__ for using Pytest’s - tmp_path fixture instead of /tmp/ -- Håvard Thom `havardthom `__ for adding - include_obj_callback and include_obj_callback_strict. -- `Noam Gottlieb `__ for fixing a corner - case where numpy’s ``np.float32`` nans are not ignored when using - ``ignore_nan_equality``. -- `maggelus `__ for the bugfix deephash - for paths. -- `maggelus `__ for the bugfix deephash - compiled regex. -- `martin-kokos `__ for fixing the - tests dependent on toml. -- `kor4ik `__ for the bugfix for - ``include_paths`` for nested dictionaries. -- `martin-kokos `__ for using tomli - and tomli-w for dealing with tomli files. -- `Alex Sauer-Budge `__ for the bugfix for - ``datetime.date``. -- `William Jamieson `__ for `NumPy 2.0 compatibility `__ -- `Leo Sin `__ for Supporting Python 3.12 in - the build process -- `sf-tcalhoun `__ for fixing - “Instantiating a Delta with a flat_dict_list unexpectedly mutates the - flat_dict_list” -- `dtorres-sf `__ for fixing iterable - moved items when iterable_compare_func is used. -- `Florian Finkernagel `__ for pandas - and polars support. -- Mathis Chenuet `artemisart `__ for - fixing slots classes comparison and PR review. -- Sherjeel Shabih `sherjeelshabih `__ - for fixing the issue where the key deep_distance is not returned when - both compared items are equal #510 -- `Juergen Skrotzky `__ for adding - empty ``py.typed`` -- `Mate Valko `__ for fixing the issue so we - lower only if clean_key is instance of str via #504 -- `jlaba `__ for fixing #493 include_paths, - when only certain keys are included via #499 -- `Doron Behar `__ for fixing DeepHash - for numpy booleans via #496 -- `Aaron D. Marasco `__ for adding - print() options which allows a user-defined string (or callback - function) to prefix every output when using the pretty() call. -- `David Hotham `__ for relaxing - orderly-set dependency via #486 -- `dtorres-sf `__ for the fix for moving - nested tables when using iterable_compare_func. -- `Jim Cipar `__ for the fix recursion depth - limit when hashing numpy.datetime64 -- `Enji Cooper `__ for converting legacy - setuptools use to pyproject.toml -- `Diogo Correia `__ for reporting security vulnerability in Delta and DeepDiff that could allow remote code execution. -- `am-periphery `__ for reporting CVE-2026-33155: denial-of-service via crafted pickle payloads triggering massive memory allocation. - - -.. _Sep Dehpour (Seperman): http://www.zepworks.com -.. _Victor Hahn Castell: http://hahncastell.de -.. _nfvs: https://github.com/nfvs -.. _brbsix: https://github.com/brbsix -.. _WangFenjin: https://github.com/WangFenjin -.. _timoilya: https://github.com/timoilya -.. _Bernhard10: https://github.com/Bernhard10 -.. _b-jazz: https://github.com/b-jazz -.. _finnhughes: https://github.com/finnhughes -.. _moloney: https://github.com/moloney -.. _serv-inc: https://github.com/serv-inc -.. _movermeyer: https://github.com/movermeyer -.. _maxrothman: https://github.com/maxrothman -.. _MartyHub: https://github.com/MartyHub -.. _sreecodeslayer: https://github.com/sreecodeslayer -.. _brianmaissy: https://github.com/ -.. _boba-2: https://github.com/boba-2 -.. _Soleronline: https://github.com/Soleronline -.. _mthaddon: https://github.com/mthaddon -.. _Necrophagos: https://github.com/Necrophagos -.. _gaal-dev: https://github.com/gaal-dev -.. _van-ess0: https://github.com/van-ess0 -.. _MKaras93: https://github.com/MKaras93 -.. _chkothe: https://github.com/chkothe -.. _Timothy: https://github.com/timson -.. _d0b3rm4n: https://github.com/d0b3rm4n -.. _MyrikLD: https://github.com/MyrikLD -.. _stianjensen: https://github.com/stianjensen -.. _flowolf: https://github.com/flowolf -.. _timjklein36: https://github.com/timjklein36 -.. _wbsch: https://github.com/wbsch -.. _lyz-code: https://github.com/lyz-code -.. _dtorres-sf: https://github.com/dtorres-sf -.. _Tony-Wang: https://github.com/Tony-Wang -.. _eggachecat: https://github.com/eggachecat -.. _SlavaSkvortsov: https://github.com/SlavaSkvortsov -.. _havardthom: https://github.com/havardthom -.. _Dhanvantari: https://github.com/Dhanvantari -.. _yaelmi3: https://github.com/yaelmi3 -.. _mskhviyu: https://github.com/mskhviyu - -Thank you for contributing to DeepDiff! - -Back to :doc:`/index` diff --git a/docs/authors.rst b/docs/authors.rst new file mode 120000 index 00000000..9e94aacf --- /dev/null +++ b/docs/authors.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/authors.rst \ No newline at end of file diff --git a/docs/basics.rst b/docs/basics.rst deleted file mode 100644 index 6eba5507..00000000 --- a/docs/basics.rst +++ /dev/null @@ -1,345 +0,0 @@ -:doc:`/index` - -Basics -====== - - -Importing - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - -Same object returns empty - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = t1 - >>> print(DeepDiff(t1, t2)) - {} - -Type of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} - -Value of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - -Item added and/or removed - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff) - {'dictionary_item_added': [root[5], root[6]], - 'dictionary_item_removed': [root[4]]} - -Set verbose level to 2 in order to see the added or removed items with their values - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2, verbose_level=2) - >>> pprint(ddiff, indent=2) - { 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, - 'dictionary_item_removed': {'root[4]': 4}} - -Set verbose level to 2 includes new_path when the path has changed for a report between t1 and t2: - >>> t1 = [1, 3] - >>> t2 = [3, 2] - >>> - >>> - >>> diff = DeepDiff(t1, t2, ignore_order=True, verbose_level=2) - >>> pprint(diff) - {'values_changed': {'root[0]': {'new_path': 'root[1]', - 'new_value': 2, - 'old_value': 1}}} - -String difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} - >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} - - -String difference 2 - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - - >>> - >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) - --- - +++ - @@ -1,5 +1,4 @@ - -world! - -Goodbye! - +world - 1 - 2 - End - -List difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} - -List that contains dictionary: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': [root[4]['b'][2][2]], - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} - -Sets: - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff) - {'set_item_added': [root[3], root[5]], 'set_item_removed': [root[8]]} - -Named Tuples: - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> pprint (DeepDiff(t1, t2)) - {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} - -Custom objects: - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - -Object attribute added: - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2)) - {'attribute_added': [root.c], - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - - -Datetime - DeepDiff converts all datetimes into UTC. If a datetime is timezone naive, we assume it is in UTC too. - That is different than what Python does. Python assumes your timezone naive datetime is in your local timezone. - >>> from deepdiff import DeepDiff - >>> from datetime import datetime, timezone - >>> d1 = datetime(2020, 8, 31, 13, 14, 1) - >>> d2 = datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc) - >>> d1 == d2 - False - >>> DeepDiff(d1, d2) - {} - - -.. note:: - All the examples above use the default :ref:`text_view_label`. - If you want traversing functionality in the results, use the :ref:`tree_view_label`. - You just need to set view='tree' to get it in tree form. - - -.. _group_by_label: - -Group By --------- - -group_by can be used when dealing with the list of dictionaries. It converts them from lists to a single dictionary with the key defined by group_by. The common use case is when reading data from a flat CSV, and the primary key is one of the columns in the CSV. We want to use the primary key instead of the CSV row number to group the rows. The group_by can do 2D group_by by passing a list of 2 keys. It is also possible to have a callable group_by, which can be used to access keys in more nested data structures. - -For example: - >>> [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue'}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, - ... ] - -Becomes: - >>> t1 = { - ... 'AA': {'name': 'Joe', 'last_name': 'Nobody'}, - ... 'BB': {'name': 'James', 'last_name': 'Blue'}, - ... 'CC': {'name': 'Mike', 'last_name': 'Apple'}, - ... } - - -With that in mind, let's take a look at the following: - >>> from deepdiff import DeepDiff - >>> t1 = [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue'}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, - ... ] - >>> - >>> t2 = [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Brown'}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, - ... ] - >>> - >>> DeepDiff(t1, t2) - {'values_changed': {"root[1]['last_name']": {'new_value': 'Brown', 'old_value': 'Blue'}}} - - -Now we use group_by='id': - >>> DeepDiff(t1, t2, group_by='id') - {'values_changed': {"root['BB']['last_name']": {'new_value': 'Brown', 'old_value': 'Blue'}}} - -.. note:: - group_by actually changes the structure of the t1 and t2. You can see this by using the tree view: - - >>> diff = DeepDiff(t1, t2, group_by='id', view='tree') - >>> diff - {'values_changed': []} - >>> diff['values_changed'][0] - - >>> diff['values_changed'][0].up - - >>> diff['values_changed'][0].up.up - - >>> diff['values_changed'][0].up.up.t1 - {'AA': {'name': 'Joe', 'last_name': 'Nobody'}, 'BB': {'name': 'James', 'last_name': 'Blue'}, 'CC': {'name': 'Mike', 'last_name': 'Apple'}} - -2D Example: - >>> from pprint import pprint - >>> from deepdiff import DeepDiff - >>> - >>> t1 = [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue'}, - ... {'id': 'BB', 'name': 'Jimmy', 'last_name': 'Red'}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, - ... ] - >>> - >>> t2 = [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Brown'}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple'}, - ... ] - >>> - >>> diff = DeepDiff(t1, t2, group_by=['id', 'name']) - >>> pprint(diff) - {'dictionary_item_removed': [root['BB']['Jimmy']], - 'values_changed': {"root['BB']['James']['last_name']": {'new_value': 'Brown', - 'old_value': 'Blue'}}} - -Callable group_by Example: - >>> from deepdiff import DeepDiff - >>> - >>> t1 = [ - ... {'id': 'AA', 'demographics': {'names': {'first': 'Joe', 'middle': 'John', 'last': 'Nobody'}}}, - ... {'id': 'BB', 'demographics': {'names': {'first': 'James', 'middle': 'Joyce', 'last': 'Blue'}}}, - ... {'id': 'CC', 'demographics': {'names': {'first': 'Mike', 'middle': 'Mark', 'last': 'Apple'}}}, - ... ] - >>> - >>> t2 = [ - ... {'id': 'AA', 'demographics': {'names': {'first': 'Joe', 'middle': 'John', 'last': 'Nobody'}}}, - ... {'id': 'BB', 'demographics': {'names': {'first': 'James', 'middle': 'Joyce', 'last': 'Brown'}}}, - ... {'id': 'CC', 'demographics': {'names': {'first': 'Mike', 'middle': 'Charles', 'last': 'Apple'}}}, - ... ] - >>> - >>> diff = DeepDiff(t1, t2, group_by=lambda x: x['demographics']['names']['first']) - >>> pprint(diff) - {'values_changed': {"root['James']['demographics']['names']['last']": {'new_value': 'Brown', - 'old_value': 'Blue'}, - "root['Mike']['demographics']['names']['middle']": {'new_value': 'Charles', - 'old_value': 'Mark'}}} - -.. _group_by_sort_key_label: - -Group By - Sort Key -------------------- - -group_by_sort_key is used to define how dictionaries are sorted if multiple ones fall under one group. When this parameter is used, group_by converts the lists of dictionaries into a dictionary of keys to lists of dictionaries. Then, group_by_sort_key is used to sort between the list. - -For example, there are duplicate id values. If we only use group_by='id', one of the dictionaries with id of 'BB' will overwrite the other. However, if we also set group_by_sort_key='name', we keep both dictionaries with the id of 'BB'. - -Example: - >>> [{'id': 'AA', 'int_id': 2, 'last_name': 'Nobody', 'name': 'Joe'}, - ... {'id': 'BB', 'int_id': 20, 'last_name': 'Blue', 'name': 'James'}, - ... {'id': 'BB', 'int_id': 3, 'last_name': 'Red', 'name': 'Jimmy'}, - ... {'id': 'CC', 'int_id': 4, 'last_name': 'Apple', 'name': 'Mike'}] - - -Becomes: - >>> {'AA': [{'int_id': 2, 'last_name': 'Nobody', 'name': 'Joe'}], - ... 'BB': [{'int_id': 20, 'last_name': 'Blue', 'name': 'James'}, - ... {'int_id': 3, 'last_name': 'Red', 'name': 'Jimmy'}], - ... 'CC': [{'int_id': 4, 'last_name': 'Apple', 'name': 'Mike'}]} - - -Example of using group_by_sort_key - >>> t1 = [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody', 'int_id': 2}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Blue', 'int_id': 20}, - ... {'id': 'BB', 'name': 'Jimmy', 'last_name': 'Red', 'int_id': 3}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple', 'int_id': 4}, - ... ] - >>> - >>> t2 = [ - ... {'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody', 'int_id': 2}, - ... {'id': 'BB', 'name': 'James', 'last_name': 'Brown', 'int_id': 20}, - ... {'id': 'CC', 'name': 'Mike', 'last_name': 'Apple', 'int_id': 4}, - ... ] - >>> - >>> diff = DeepDiff(t1, t2, group_by='id', group_by_sort_key='name') - >>> - >>> pprint(diff) - {'iterable_item_removed': {"root['BB'][1]": {'int_id': 3, - 'last_name': 'Red', - 'name': 'Jimmy'}}, - 'values_changed': {"root['BB'][0]['last_name']": {'new_value': 'Brown', - 'old_value': 'Blue'}}} - - -.. _default_timezone_label: - -Default Time Zone ------------------ - -default_timezone defines the default timezone. If a datetime is timezone naive, which means it doesn't have a timezone, we assume the datetime is in this timezone. Also any datetime that has a timezone will be converted to this timezone so the datetimes can be compared properly all in the same timezone. Note that Python's default behavior assumes the default timezone is your local timezone. DeepDiff's default is UTC, not your local time zone. - - -Note that if we change the default_timezone, the output timezone changes accordingly - >>> from deepdiff import DeepDiff - >>> import pytz - >>> from datetime import date, datetime, time, timezone - >>> dt_utc = datetime(2025, 2, 3, 12, 0, 0, tzinfo=pytz.utc) # UTC timezone - >>> dt_utc2 = datetime(2025, 2, 3, 11, 0, 0, tzinfo=pytz.utc) # UTC timezone - >>> dt_ny = dt_utc.astimezone(pytz.timezone('America/New_York')) - >>> dt_ny2 = dt_utc2.astimezone(pytz.timezone('America/New_York')) - >>> diff = DeepDiff(dt_ny, dt_ny2) - >>> diff - {'values_changed': {'root': {'new_value': datetime.datetime(2025, 2, 3, 11, 0, tzinfo=datetime.timezone.utc), 'old_value': datetime.datetime(2025, 2, 3, 12, 0, tzinfo=datetime.timezone.utc)}}} - >>> diff2 = DeepDiff(dt_ny, dt_ny2, default_timezone=pytz.timezone('America/New_York')) - >>> diff2 - {'values_changed': {'root': {'new_value': datetime.datetime(2025, 2, 3, 6, 0, tzinfo=), 'old_value': datetime.datetime(2025, 2, 3, 7, 0, tzinfo=)}}} - - -Back to :doc:`/index` diff --git a/docs/basics.rst b/docs/basics.rst new file mode 120000 index 00000000..2ee9cb9c --- /dev/null +++ b/docs/basics.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/basics.rst \ No newline at end of file diff --git a/docs/changelog.rst b/docs/changelog.rst deleted file mode 100644 index e890436f..00000000 --- a/docs/changelog.rst +++ /dev/null @@ -1,340 +0,0 @@ -:doc:`/index` - -Changelog -========= - -DeepDiff Changelog - -- v8-7-0 - - migration note: - - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. - - Dropping support for Python 3.9 - - Support for python 3.14 - - Added support for callable ``group_by`` thanks to `echan5 `__ - - Added ``FlatDeltaDict`` TypedDict for ``to_flat_dicts`` return type - - Fixed colored view display when all list items are removed thanks to `yannrouillard `__ - - Fixed ``hasattr()`` swallowing ``AttributeError`` in ``__slots__`` handling for objects with ``__getattr__`` thanks to `tpvasconcelos `__ - - Fixed ``ignore_order=True`` missing int-vs-float type changes - - Always use t1 path for reporting thanks to `devin13cox `__ - - Fixed ``_convert_oversized_ints`` failing on NamedTuples - - Fixed orjson ``TypeError`` for integers exceeding 64-bit range - - Fixed parameter bug in ``to_flat_dicts`` where ``include_action_in_path`` and ``report_type_changes`` were not being passed through - - Fixed ``ignore_keys`` issue in ``detailed__dict__`` thanks to `vitalis89 `__ - - Fixed logarithmic similarity type hint thanks to `ljames8 `__ -- v8-6-2 - - Security fix (CVE-2026-33155): Prevent denial-of-service via crafted pickle payloads that trigger massive memory allocation through the REDUCE opcode. Size-sensitive callables like ``bytes()`` and ``bytearray()`` are now wrapped to reject allocations exceeding 128 MB. -- v8-6-1 - - Patched security vulnerability in the Delta class which was vulnerable to class pollution via its constructor, and when combined with a gadget available in DeltaDiff itself, it could lead to Denial of Service and Remote Code Execution (via insecure Pickle deserialization). - -- v8-6-0 - - Added Colored View thanks to @mauvilsa - - Added support for applying deltas to NamedTuple thanks to @paulsc - - Fixed test_delta.py with Python 3.14 thanks to @Romain-Geissler-1A - - Added python property serialization to json - - Added ip address serialization - - Switched to UV from pip - - Added Claude.md - - Added uuid hashing thanks to @akshat62 - - Added ``ignore_uuid_types`` flag to DeepDiff to avoid type reports - when comparing UUID and string. - - Added comprehensive type hints across the codebase (multiple commits - for better type safety) - - Added support for memoryview serialization - - Added support for bytes serialization (non-UTF8 compatible) - - Fixed bug where group_by with numbers would leak type info into group - path reports - - Fixed bug in ``_get_clean_to_keys_mapping`` without explicit - significant digits - - Added support for python dict key serialization - - Enhanced support for IP address serialization with safe module imports - - Added development tooling improvements (pyright config, .envrc - example) - - Updated documentation and development instructions - -- v8-5-0 - - Updating deprecated pydantic calls - - Switching to pyproject.toml - - Fix for moving nested tables when using iterable_compare_func. by - - Fix recursion depth limit when hashing numpy.datetime64 - - Moving from legacy setuptools use to pyproject.toml - -- v8-4-2 - - fixes the type hints for the base - - fixes summarize so if json dumps fails, we can still get a repr of the results - - adds ipaddress support - -- v8-4-1 - - Adding BaseOperatorPlus base class for custom operators - - default_timezone can be passed now to set your default timezone to something other than UTC. - - New summarization algorithm that produces valid json - - Better type hint support - - Breaking change in DeepHash where we raise Exception instead of logging if we can't hash a value. - - Added the log_stacktrace parameter to DeepDiff. When True, it will log the stacktrace along with the error. - -- v8-3-0 - - Fixed some static typing issues - - Added the summarize module for better repr of nested values - - -- v8-2-0 - - Small optimizations so we don't load functions that are not needed - - Updated the minimum version of Orderly-set - - Normalize all datetimes into UTC. Assume timezone naive datetimes are UTC. - - -- v8-1-0 - - - Removing deprecated lines from setup.py - - Added ``prefix`` option to ``pretty()`` - - Fixes hashing of numpy boolean values. - - Fixes **slots** comparison when the attribute doesn’t exist. - - Relaxing orderly-set reqs - - Added Python 3.13 support - - Only lower if clean_key is instance of str - - Fixes issue where the key deep_distance is not returned when both - compared items are equal - - Fixes exclude_paths fails to work in certain cases - - exclude_paths fails to work - - Fixes to_json() method chokes on standard json.dumps() kwargs such as - sort_keys - - to_dict() method chokes on standard json.dumps() kwargs - - Fixes accessing the affected_root_keys property on the diff object - returned by DeepDiff fails when one of the dicts is empty - - Fixes accessing the affected_root_keys property on the - diff object returned by DeepDiff fails when one of the dicts is empty - - -- v8-0-1 - - - Bugfix. Numpy should be optional. - -- v8-0-0 - - - With the introduction of `threshold_to_diff_deeper`, the values returned are different than in previous versions of DeepDiff. You can still get the older values by setting `threshold_to_diff_deeper=0`. However to signify that enough has changed in this release that the users need to update the parameters passed to DeepDiff, we will be doing a major version update. - - `use_enum_value=True` makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. - - `threshold_to_diff_deeper=float` is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a `new_value` instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. - - Deprecated `ordered-set` and switched to `orderly-set`. The `ordered-set` package was not being maintained anymore and starting Python 3.6, there were better options for sets that ordered. I forked one of the new implementations, modified it, and published it as `orderly-set`. - - Added `use_log_scale:bool` and `log_scale_similarity_threshold:float`. They can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. - - json serialization of reversed lists. - - Fix for iterable moved items when `iterable_compare_func` is used. - - Pandas and Polars support - -- v7-0-1 - - - Fixes the translation between Difflib opcodes and Delta flat rows. - -- v7-0-0 - - - When verbose=2, return ``new_path`` when the ``path`` and - ``new_path`` are different (for example when ignore_order=True and - the index of items have changed). - - Dropping support for Python 3.7 - - Introducing serialize to flat rows for delta objects. - - fixes the issue with hashing ``datetime.date`` objects where it - treated them as numbers instead of dates (fixes #445). - - upgrading orjson to the latest version - - Fix for bug when diffing two lists with ignore_order and providing - compare_func - - Fixes “Wrong diff on list of strings” #438 - - Supporting Python 3.12 in the build process by `Leo - Sin `__ - - Fixes “Instantiating a Delta with a flat_dict_list unexpectedly - mutates the flat_dict_list” #457 by - `sf-tcalhoun `__ - - Fixes “Error on Delta With None Key and Removed Item from List” - #441 - - Fixes “Error when comparing two nested dicts with 2 added fields” - #450 - - Fixes “Error when subtracting Delta from a dictionary” #443 - -- v6-7-1 - - - Support for subtracting delta objects when iterable_compare_func - is used. - - Better handling of force adding a delta to an object. - - Fix for - ```Can't compare dicts with both single and double quotes in keys`` `__ - - Updated docs for Inconsistent Behavior with math_epsilon and - ignore_order = True - -- v6-7-0 - - - Delta can be subtracted from other objects now. - - verify_symmetry is deprecated. Use bidirectional instead. - - always_include_values flag in Delta can be enabled to include - values in the delta for every change. - - Fix for Delta.\__add\_\_ breaks with esoteric dict keys. - -- v6-6-1 - - - Fix for `DeepDiff raises decimal exception when using significant - digits `__ - - Introducing group_by_sort_key - - Adding group_by 2D. For example - ``group_by=['last_name', 'zip_code']`` - -- v6-6-0 - - - Numpy 2.0 support - - Adding - `Delta.to_flat_dicts `__ - -- v6-5-0 - - - Adding - ```parse_path`` `__ - -- v6-4-1 - - - Bugfix: Keep Numpy Optional - -- v6-4-0 - - - `Add Ignore List Order Option to - DeepHash `__ by - `Bobby Morck `__ - - `pyyaml to 6.0.1 to fix cython build - problems `__ by - `Robert Bo Davis `__ - - `Precompiled regex simple - diff `__ by - `cohml `__ - - New flag: ``zip_ordered_iterables`` for forcing iterable items to - be compared one by one. - -- v6-3-1 - - - Bugfix deephash for paths by - `maggelus `__ - - Bugfix deephash compiled regex - `maggelus `__ - - Fix tests dependent on toml by - `martin-kokos `__ - - Bugfix for ``include_paths`` for nested dictionaries by - `kor4ik `__ - - Use tomli and tomli-w for dealing with tomli files by - `martin-kokos `__ - - Bugfix for ``datetime.date`` by `Alex - Sauer-Budge `__ - -- v6-3-0 - - - ``PrefixOrSuffixOperator``: This operator will skip strings that - are suffix or prefix of each other. - - ``include_obj_callback`` and ``include_obj_callback_strict`` are - added by `Håvard Thom `__. - - Fixed a corner case where numpy’s ``np.float32`` nans are not - ignored when using ``ignore_nan_equality`` by `Noam - Gottlieb `__ - - ``orjson`` becomes optional again. - - Fix for ``ignore_type_in_groups`` with numeric values so it does - not report number changes when the number types are different. - -- v6-2-3 - - - Switching to Orjson for serialization to improve the performance. - - Setting ``equal_nan=ignore_nan_inequality`` in the call for - ``np.array_equal`` - - Using Pytest’s tmp_path fixture instead of ``/tmp/`` - -- v6-2-2 - - - Enum test fix for python 3.11 - - Adding support for dateutils rrules - -- v6-2-1 - - - Removed the print statements. - -- v6-2-0 - - - Major improvement in the diff report for lists when items are all - hashable and the order of items is important. - -- v6-1-0 - - - DeepDiff.affected_paths can be used to get the list of all paths - where a change, addition, or deletion was reported for. - - DeepDiff.affected_root_keys can be used to get the list of all - paths where a change, addition, or deletion was reported for. - - Bugfix: ValueError when using Decimal 0.x #339 by `Enric - Pou `__ - - Serialization of UUID - -- v6-0-0 - - - `Exclude obj callback - strict `__ - parameter is added to DeepDiff by Mikhail Khviyuzov - `mskhviyu `__. - - A fix for diffing using ``iterable_compare_func`` with nested - objects by `dtorres-sf `__ who - originally contributed this feature. -- v5-7-0: - - - https://github.com/seperman/deepdiff/pull/284 Bug-Fix: TypeError - in \_get_numbers_distance() when ignore_order = True by - @Dhanvantari - - https://github.com/seperman/deepdiff/pull/280 Add support for - UUIDs by @havardthom - - Major bug in delta when it comes to iterable items added or - removed is investigated by @uwefladrich and resolved by @seperman -- v5-6-0: Adding custom operators, and ignore_order_func. Bugfix: verbose_level==0 should disable values_changes. Bugfix: unprocessed key error. -- v5-5-0: adding iterable_compare_func for DeepDiff, adding output_format of list for path() in tree view. -- v5-4-0: adding strict_checking for numbers in DeepSearch. -- v5-3-0: add support for regular expressions in DeepSearch. -- v5-2-3: Retaining the order of multiple dictionary items added via Delta. Fixed the typo with yml files in deep cli. Fixing Grep RecursionError where using non UTF-8 character. Allowing kwargs to be passed to to_json method. -- v5-2-2: Fixed Delta serialization when None type is present. -- v5-2-0: Removed Murmur3 as the preferred hashing method. Using SHA256 by default now. Added commandline for deepdiff. Added group_by. Added math_epsilon. Improved ignoring of NoneType. -- v5-0-2: Bug Fix NoneType in ignore type groups https://github.com/seperman/deepdiff/issues/207 -- v5-0-1: Bug fix to not apply format to non numbers. -- v5-0-0: Introducing the Delta object, Improving Numpy support, Fixing tuples comparison when ignore_order=True, Dramatically improving the results when ignore_order=True by running in passes, Introducing pretty print view, deep_distance, purge, progress logging, cache and truncate_datetime. -- v4-3-3: Adds support for datetime.time -- v4-3-2: Deprecation Warning Enhancement -- v4-3-1: Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174 -- v4-3-0: adding exclude_obj_callback -- v4-2-0: .json property is finally removed. Fix for Py3.10. Dropping support for EOL Python 3.4. Ignoring private keys when calculating hashes. For example __init__ is not a part of hash calculation anymore. Fix for #166 Problem with comparing lists, with an boolean as element. -- v4-1-0: .json property is finally removed. -- v4-0-9: Fixing the bug for hashing custom unhashable objects -- v4-0-8: Adding ignore_nan_inequality for float('nan') -- v4-0-7: Hashing of the number 1 vs. True -- v4-0-6: found a tiny bug in Python formatting of numbers in scientific notation. Added a workaround. -- v4-0-5: Fixing number diffing. Adding number_format_notation and number_to_string_func. -- v4-0-4: Adding ignore_string_case and ignore_type_subclasses -- v4-0-3: Adding versionbump tool for release -- v4-0-2: Fixing installation issue where rst files are missing. -- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional. -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. -- v3-5-0: Exclude regex path -- v3-3-0: Searching for objects and class attributes -- v3-2-2: Adding help(deepdiff) -- v3-2-1: Fixing hash of None -- v3-2-0: Adding grep for search: object | grep(item) -- v3-1-3: Unicode vs. Bytes default fix -- v3-1-2: NotPresent Fix when item is added or removed. -- v3-1-1: Bug fix when item value is None (#58) -- v3-1-0: Serialization to/from json -- v3-0-0: Introducing Tree View -- v2-5-3: Bug fix on logging for content hash. -- v2-5-2: Bug fixes on content hash. -- v2-5-0: Adding ContentHash module to fix ignore_order once and for all. -- v2-1-0: Adding Deep Search. Now you can search for item in an object. -- v2-0-0: Exclusion patterns better coverage. Updating docs. -- v1-8-0: Exclusion patterns. -- v1-7-0: Deep Set comparison. -- v1-6-0: Unifying key names. i.e newvalue is new_value now. For backward compatibility, newvalue still works. -- v1-5-0: Fixing ignore order containers with unordered items. Adding significant digits when comparing decimals. Changes property is deprecated. -- v1-1-0: Changing Set, Dictionary and Object Attribute Add/Removal to be reported as Set instead of List. Adding Pypy compatibility. -- v1-0-2: Checking for ImmutableMapping type instead of dict -- v1-0-1: Better ignore order support -- v1-0-0: Restructuring output to make it more useful. This is NOT backward compatible. -- v0-6-1: Fixing iterables with unhashable when order is ignored -- v0-6-0: Adding unicode support -- v0-5-9: Adding decimal support -- v0-5-8: Adding ignore order for unhashables support -- v0-5-7: Adding ignore order support -- v0-5-6: Adding slots support -- v0-5-5: Adding loop detection - - -Back to :doc:`/index` diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 120000 index 00000000..830dd033 --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/changelog.rst \ No newline at end of file diff --git a/docs/colored_view.rst b/docs/colored_view.rst deleted file mode 100644 index 16f49ab7..00000000 --- a/docs/colored_view.rst +++ /dev/null @@ -1,101 +0,0 @@ -.. _colored_view_label: - -Colored View -============ - -The `ColoredView` feature in `deepdiff` provides a human-readable, color-coded JSON output of the -differences between two objects. This feature is particularly useful for visualizing changes in a -clear and intuitive manner. - -- **Color-Coded Differences:** - - - **Added Elements:** Shown in green. - - **Removed Elements:** Shown in red. - - **Changed Elements:** The old value is shown in red, and the new value is shown in green. - -Usage ------ - -To use the `ColoredView`, simply pass the `COLORED_VIEW` option to the `DeepDiff` function: - -.. code-block:: python - - from deepdiff import DeepDiff - from deepdiff.helper import COLORED_VIEW - - t1 = {"name": "John", "age": 30, "scores": [1, 2, 3], "address": {"city": "New York", "zip": "10001"}} - t2 = {"name": "John", "age": 31, "scores": [1, 2, 4], "address": {"city": "Boston", "zip": "10001"}, "new": "value"} - - diff = DeepDiff(t1, t2, view=COLORED_VIEW) - print(diff) - -Or from command line: - -.. code-block:: bash - - deep diff --view colored t1.json t2.json - -The output will look something like this: - -.. raw:: html - -
-    {
-      "name": "John",
-      "age": 30 -> 31,
-      "scores": [
-        1,
-        2,
-        3 -> 4
-      ],
-      "address": {
-        "city": "New York" -> "Boston",
-        "zip": "10001"
-      },
-      "new": "value"
-    }
-    
- -Colored Compact View --------------------- - -For a more concise output, especially with deeply nested objects where many parts are unchanged, -the `ColoredView` with the compact option can be used. This view is similar but collapses -unchanged nested dictionaries to `{...}` and unchanged lists/tuples to `[...]`. To use the compact -option do: - -.. code-block:: python - - from deepdiff import DeepDiff - from deepdiff.helper import COLORED_COMPACT_VIEW - - t1 = {"name": "John", "age": 30, "scores": [1, 2, 3], "address": {"city": "New York", "zip": "10001"}} - t2 = {"name": "John", "age": 31, "scores": [1, 2, 4], "address": {"city": "New York", "zip": "10001"}, "new": "value"} - - diff = DeepDiff(t1, t2, view=COLORED_COMPACT_VIEW) - print(diff) - -Or from command line: - -.. code-block:: bash - - deep diff --view colored_compact t1.json t2.json - - -The output will look something like this: - -.. raw:: html - -
-    {
-      "name": "John",
-      "age": 30 -> 31,
-      "scores": [
-        1,
-        2,
-        3 -> 4
-      ],
-      "address": {...},
-      "new": "value"
-    }
-    
diff --git a/docs/colored_view.rst b/docs/colored_view.rst new file mode 120000 index 00000000..39523826 --- /dev/null +++ b/docs/colored_view.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/colored_view.rst \ No newline at end of file diff --git a/docs/commandline.rst b/docs/commandline.rst deleted file mode 100644 index e7853dd6..00000000 --- a/docs/commandline.rst +++ /dev/null @@ -1,320 +0,0 @@ -:doc:`/index` - -Command Line -============ - -`New in DeepDiff 5.2.0` - -DeepDiff provides commandline interface to a subset of functionality that it provides through its Python API. - -The commands are: - -- :ref:`deep_diff_command` -- :ref:`deep_grep_command` -- :ref:`deep_extract_command` -- :ref:`deep_patch_command` - - -.. _deep_diff_command: - -deep diff command ------------------ - -Run - -.. code:: bash - - $ deep diff - -to get the options: - -.. code-block:: bash - - $ deep diff --help - Usage: deep diff [OPTIONS] T1 T2 - - Deep Diff Commandline - - Deep Difference of content in files. - It can read csv, tsv, json, yaml, and toml files. - - T1 and T2 are the path to the files to be compared with each other. - - Options: - --cutoff-distance-for-pairs FLOAT - [default: 0.3] - --cutoff-intersection-for-pairs FLOAT - [default: 0.7] - --cache-size INTEGER [default: 0] - --cache-tuning-sample-size INTEGER - [default: 0] - --cache-purge-level INTEGER RANGE - [default: 1] - --create-patch [default: False] - --exclude-paths TEXT - --exclude-regex-paths TEXT - --math-epsilon DECIMAL - --get-deep-distance [default: False] - --group-by TEXT - --ignore-order [default: False] - --ignore-string-type-changes [default: False] - --ignore-numeric-type-changes [default: False] - --ignore-type-subclasses [default: False] - --ignore-string-case [default: False] - --ignore-nan-inequality [default: False] - --include-private-variables [default: False] - --log-frequency-in-sec INTEGER [default: 0] - --max-passes INTEGER [default: 10000000] - --max_diffs INTEGER - --number-format-notation [f|e] [default: f] - --progress-logger [info|error] [default: info] - --report-repetition [default: False] - --significant-digits INTEGER - --truncate-datetime [second|minute|hour|day] - --verbose-level INTEGER RANGE [default: 1] - --view [-|colored|colored_compact] - [default: -] - Format for displaying differences. - --help Show this message and exit. - - -Example usage: - -Let's imagine we have t1.csv and t2.csv: - -.. csv-table:: t1.csv - :file: ../tests/fixtures/t1.csv - :header-rows: 1 - - -.. csv-table:: t2.csv - :file: ../tests/fixtures/t2.csv - :header-rows: 1 - -We can run: - -.. code-block:: bash - - $ deep diff t1.csv t2.csv --ignore-order - {'values_changed': {"root[2]['zip']": {'new_value': 90002, 'old_value': 90001}}} - -As you can see here the path to the item that is being changed is `root[2]['zip']` which is ok but -what if we assume last names are unique and group by last_name? - -.. code-block:: bash - - $ deep diff t1.csv t2.csv --ignore-order --group-by last_name - { 'values_changed': { "root['Molotov']['zip']": { 'new_value': 90002, - 'old_value': 90001}}} - -The path is perhaps more readable now: `root['Molotov']['zip']`. It is more clear that the zip code of Molotov has changed. - -.. Note:: - The parameters in the deep diff commandline are a subset of those in :ref:`deepdiff_label` 's Python API. - -To output in a specific format, for example the colored compact view (see :doc:`colored_view` for output details): - -.. code-block:: bash - - $ deep diff t1.json t2.json --view colored_compact - - -.. _deep_grep_command: - -deep grep command ------------------ - -Run - -.. code:: bash - - $ deep grep - -to get the options: - -.. code-block:: bash - - $ deep grep --help - Usage: deep grep [OPTIONS] ITEM PATH - - Deep Grep Commandline - - Grep through the contents of a file and find the path to the item. - It can read csv, tsv, json, yaml, and toml files. - - Options: - -i, --ignore-case [default: False] - --exact-match [default: False] - --exclude-paths TEXT - --exclude-regex-paths TEXT - --verbose-level INTEGER RANGE [default: 1] - --help Show this message and exit. - - -.. csv-table:: t1.csv - :file: ../tests/fixtures/t1.csv - :header-rows: 1 - -.. code-block:: bash - - $ deep grep --ignore-case james t1.csv - {'matched_values': ["root[2]['first_name']"]} - - -.. _deep_extract_command: - -deep extract command --------------------- - -Run - -.. code:: bash - - $ deep extract - -to get the options: - -.. code-block:: bash - - $ deep extract --help - Usage: deep extract [OPTIONS] PATH_INSIDE PATH - - Deep Extract Commandline - - Extract an item from a file based on the path that is passed. It can read - csv, tsv, json, yaml, and toml files. - - Options: - --help Show this message and exit. - -.. csv-table:: t1.csv - :file: ../tests/fixtures/t1.csv - :header-rows: 1 - -.. code-block:: bash - - $ deep extract "root[2]['first_name']" t1.csv - 'James' - - -.. _deep_patch_command: - -deep patch command ------------------- - -Run - -.. code:: bash - - $ deep patch --help - -to get the options: - -.. code-block:: text - - $ deep patch --help - Usage: deep patch [OPTIONS] PATH DELTA_PATH - - Deep Patch Commandline - - Patches a file based on the information in a delta file. The delta file - can be created by the deep diff command and passing the --create-patch - argument. - - Deep Patch is similar to Linux's patch command. The difference is that it - is made for patching data. It can read csv, tsv, json, yaml, and toml - files. - - Options: - -b, --backup [default: False] - --raise-errors [default: False] - --help Show this message and exit. - -Imagine if we have the following files: - - -.. csv-table:: t1.csv - :file: ../tests/fixtures/t1.csv - :header-rows: 1 - -.. csv-table:: t2.csv - :file: ../tests/fixtures/t2.csv - :header-rows: 1 - - -First we need to create a "delta" file which represents the difference between the 2 files. - -.. code-block:: bash - - $ deep diff t1.csv t2.csv --ignore-order - {'values_changed': {"root[2]['zip']": {'new_value': 90002, 'old_value': 90001}}} - -We create the delta by using the deep diff command and passing the `--create-patch` argument. -However since we are using `--ignore-order`, `deep diff` will ask us to also use `--report-repetition`: - -.. code-block:: bash - - deep diff t1.csv t2.csv --ignore-order --report-repetition --create-patch - =}values_changed}root[2]['zip']} new_valueJ_sss.% - -Note that the delta is not human readable. It is meant for us to pass it into a file: - -.. code-block:: bash - - deep diff t1.csv t2.csv --ignore-order --report-repetition --create-patch > patch1.pickle - -Now this delta file is ready to be applied by the `deep patch` command to any json, csv, toml or yaml file! -It is expecting the structure of the file to be similar to the one in the csv file though. - -Let's look at this yaml file: - -`another.yaml` - -.. code-block:: yaml - - --- - - - first_name: Joe - last_name: Nobody - zip: 90011 - - - first_name: Jack - last_name: Doit - zip: 22222 - - - first_name: Sara - last_name: Stanley - zip: 11111 - -All that our delta knows is that `root[2]['zip']` has changed to `90002`. - -Let's apply the delta: - -.. code-block:: bash - - deep patch --backup another.yaml patch1.pickle --raise-errors - -And looking at the `another.yaml` file, the zip code is indeed updated! - -.. code-block:: yaml - - - first_name: Joe - last_name: Nobody - zip: 90011 - - first_name: Jack - last_name: Doit - zip: 22222 - - first_name: Sara - last_name: Stanley - zip: 90002 - -As you can see the formatting of the yaml file is changed. -This is due to the fact that DeepDiff loads the file into a Python dictionary, modifies it and then writes it back to disk. -During this operation, the file loses its original formatting. - -.. note:: - The deep patch command only provides a subset of what DeepDiff's :ref:`delta_label`'s Python API provides. - The deep patch command is minimalistic and is designed to have a similar interface to Linux's patch command - rather than DeepDiff's :ref:`delta_label`. - -Back to :doc:`/index` diff --git a/docs/commandline.rst b/docs/commandline.rst new file mode 120000 index 00000000..6d31ec8e --- /dev/null +++ b/docs/commandline.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/commandline.rst \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index cf3f7be5..deb7dd41 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -64,9 +64,9 @@ # built documents. # # The short X.Y version. -version = '8.7.0' +version = '9.0.0' # The full version, including alpha/beta/rc tags. -release = '8.7.0' +release = '9.0.0' load_dotenv(override=True) DOC_VERSION = os.environ.get('DOC_VERSION', version) diff --git a/docs/custom.rst b/docs/custom.rst deleted file mode 100644 index e2ff1d96..00000000 --- a/docs/custom.rst +++ /dev/null @@ -1,440 +0,0 @@ -:doc:`/index` - -Customized Diff -=============== - -.. _iterable_compare_func_label: - -Iterable Compare Func ---------------------- - -New in DeepDiff 5.5.0 - -There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. - - -For example take the following objects: - - -Now let's define a compare_func that takes 3 parameters: x, y and level. - - >>> from deepdiff import DeepDiff - >>> from deepdiff.helper import CannotCompare - >>> - >>> t1 = [ - ... { - ... 'id': 1, - ... 'value': [1] - ... }, - ... { - ... 'id': 2, - ... 'value': [7, 8, 1] - ... }, - ... { - ... 'id': 3, - ... 'value': [7, 8], - ... }, - ... ] - >>> - >>> t2 = [ - ... { - ... 'id': 2, - ... 'value': [7, 8] - ... }, - ... { - ... 'id': 3, - ... 'value': [7, 8, 1], - ... }, - ... { - ... 'id': 1, - ... 'value': [1] - ... }, - ... ] - >>> - >>> DeepDiff(t1, t2) - {'values_changed': {"root[0]['id']": {'new_value': 2, 'old_value': 1}, "root[0]['value'][0]": {'new_value': 7, 'old_value': 1}, "root[1]['id']": {'new_value': 3, 'old_value': 2}, "root[2]['id']": {'new_value': 1, 'old_value': 3}, "root[2]['value'][0]": {'new_value': 1, 'old_value': 7}}, 'iterable_item_added': {"root[0]['value'][1]": 8}, 'iterable_item_removed': {"root[2]['value'][1]": 8}} - -As you can see the results are different. Now items with the same ids are compared with each other. - - >>> def compare_func(x, y, level=None): - ... try: - ... return x['id'] == y['id'] - ... except Exception: - ... raise CannotCompare() from None - ... - >>> DeepDiff(t1, t2, iterable_compare_func=compare_func) - {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}} - -If we set the verbose_level=2, we can see more details. - - >>> DeepDiff(t1, t2, iterable_compare_func=compare_func, verbose_level=2) - {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}, 'iterable_item_moved': {'root[0]': {'new_path': 'root[2]', 'value': {'id': 1, 'value': [1]}}, 'root[1]': {'new_path': 'root[0]', 'value': {'id': 2, 'value': [7, 8]}}, 'root[2]': {'new_path': 'root[1]', 'value': {'id': 3, 'value': [7, 8, 1]}}}} - - -We can also use the level parameter. Levels are explained in the :ref:`tree_view_label`. - -For example you could use the level object to further determine if the 2 objects should be matches or not. - - - >>> t1 = { - ... 'path1': [], - ... 'path2': [ - ... { - ... 'id': 1, - ... 'value': [1] - ... }, - ... { - ... 'id': 2, - ... 'value': [7, 8, 1] - ... }, - ... ] - ... } - >>> - >>> t2 = { - ... 'path1': [{'pizza'}], - ... 'path2': [ - ... { - ... 'id': 2, - ... 'value': [7, 8, 1] - ... }, - ... { - ... 'id': 1, - ... 'value': [1, 2] - ... }, - ... ] - ... } - >>> - >>> - >>> def compare_func2(x, y, level): - ... if (not isinstance(x, dict) or not isinstance(y, dict)): - ... raise CannotCompare - ... if(level.path() == "root['path2']"): - ... if (x["id"] == y["id"]): - ... return True - ... return False - ... - >>> - >>> DeepDiff(t1, t2, iterable_compare_func=compare_func2) - {'iterable_item_added': {"root['path1'][0]": {'pizza'}, "root['path2'][0]['value'][1]": 2}} - - -.. note:: - - The level parameter of the iterable_compare_func is only used when ignore_order=False which is the default value for ignore_order. - - -.. _custom_operators_label: - -Custom Operators ----------------- - -Whether two objects are different or not largely depends on the context. For example, apples and bananas are the same -if you are considering whether they are fruits or not. - -In that case, you can pass a *custom_operators* for the job. - -Custom operators give you a lot of power. In the following examples, we explore various use cases such as: - -- Making DeepDiff report the L2 Distance of items -- Only include specific paths in diffing -- Making DeepDiff stop diffing once we find the first diff. - -You can use one of the predefined custom operators that come with DeepDiff. Or you can define one yourself. - - -Built-In Custom Operators - -.. _prefix_or_suffix_operator_label: - -PrefixOrSuffixOperator -...................... - - -This operator will skip strings that are suffix or prefix of each other. - -For example when this operator is used, the two strings of "joe" and "joe's car" will not be reported as different. - - >>> from deepdiff import DeepDiff - >>> from deepdiff.operator import PrefixOrSuffixOperator - >>> t1 = { - ... "key1": ["foo", "bar's food", "jack", "joe"] - ... } - >>> t2 = { - ... "key1": ["foo", "bar", "jill", "joe'car"] - ... } - >>> - >>> DeepDiff(t1, t2) - {'values_changed': {"root['key1'][1]": {'new_value': 'bar', 'old_value': "bar's food"}, "root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}, "root['key1'][3]": {'new_value': "joe'car", 'old_value': 'joe'}}} - >>> DeepDiff(t1, t2, custom_operators=[ - ... PrefixOrSuffixOperator() - ... ]) - >>> - {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}} - - - - -Define A Custom Operator ------------------------- - - -To define a custom operator, you just need to inherit *BaseOperator* or *BaseOperatorPlus*. - - - *BaseOperatorPlus* is our new base operator that can be subclassed and provides the structure to build any custom operator. - - *BaseOperator* is our older base class for creating custom operators. It was designed mainly for simple string based regex comparison. - - -Base Operator Plus -.................. - -*BaseOperatorPlus* is our new base operator that can be subclassed and provides the structure to build any custom operator. - -.. code-block:: python - - class BaseOperatorPlus(metaclass=ABCMeta): - - @abstractmethod - def match(self, level) -> bool: - """ - Given a level which includes t1 and t2 in the tree view, is this operator a good match to compare t1 and t2? - If yes, we will run the give_up_diffing to compare t1 and t2 for this level. - """ - pass - - @abstractmethod - def give_up_diffing(self, level, diff_instance: "DeepDiff") -> bool: - """ - Given a level which includes t1 and t2 in the tree view, and the "distance" between l1 and l2. - do we consider t1 and t2 to be equal or not. The distance is a number between zero to one and is calculated by DeepDiff to measure how similar objects are. - """ - - @abstractmethod - def normalize_value_for_hashing(self, parent: Any, obj: Any) -> Any: - """ - You can use this function to normalize values for ignore_order=True - - For example, you may want to turn all the words to be lowercase. Then you return obj.lower() - """ - pass - - -**Example 1: We don't care about the exact GUID values. As long as pairs of strings match GUID regex, we want them to be considered as equals** - - >>> import re - ... from typing import Any - ... from deepdiff import DeepDiff - ... from deepdiff.operator import BaseOperatorPlus - ... - ... - ... d1 = { - ... "Name": "SUB_OBJECT_FILES", - ... "Values": { - ... "Value": [ - ... "{f254498b-b752-4f35-bef5-6f1844b61eb7}", - ... "{7fb2a550-1849-45c0-b273-9aa5e4eb9f2b}", - ... "{a9cbecc0-21dc-49ce-8b2c-d36352dae139}" - ... ] - ... } - ... } - ... - ... d2 = { - ... "Name": "SUB_OBJECT_FILES", - ... "Values": { - ... "Value": [ - ... "{e5d18917-1a2c-4abe-b601-8ec002629953}", - ... "{ea71ba1f-1339-4fae-bc28-a9ce9b8a8c67}", - ... "{66bb6192-9cd2-4074-8be1-f2ac52877c70}", - ... ] - ... } - ... } - ... - ... - ... class RemoveGUIDsOperator(BaseOperatorPlus): - ... _pattern = r"[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}" - ... _substitute = "guid" - ... - ... def match(self, level) -> bool: - ... return isinstance(level.t1, str) and isinstance(level.t2, str) - ... - ... @classmethod - ... def _remove_pattern(cls, t: str): - ... return re.sub(cls._pattern, cls._substitute, t) - ... - ... def give_up_diffing(self, level, diff_instance): - ... t1 = self._remove_pattern(level.t1) - ... t2 = self._remove_pattern(level.t2) - ... return t1 == t2 - ... - ... def normalize_value_for_hashing(self, parent: Any, obj: Any) -> Any: - ... """ - ... Used for ignore_order=True - ... """ - ... if isinstance(obj, str): - ... return self._remove_pattern(obj) - ... return obj - ... - ... - ... operator = RemoveGUIDsOperator() - ... - >>> diff1 = DeepDiff(d1, d2, custom_operators=[operator], log_stacktrace=True) - ... diff1 - {} - >>> diff2 = DeepDiff(d1, d2, ignore_order=True, custom_operators=[operator], log_stacktrace=True) - ... diff2 - {} - - - -Base Operator -............. - -*BaseOperator* is our older base class for creating custom operators. It was designed mainly for simple string based regex comparison. - - -.. code-block:: python - - class BaseOperator: - - def __init__(self, regex_paths:Optional[List[str]]=None, types:Optional[List[type]]=None): - if regex_paths: - self.regex_paths = convert_item_or_items_into_compiled_regexes_else_none(regex_paths) - else: - self.regex_paths = None - self.types = types - - def match(self, level) -> bool: - if self.regex_paths: - for pattern in self.regex_paths: - matched = re.search(pattern, level.path()) is not None - if matched: - return True - if self.types: - for type_ in self.types: - if isinstance(level.t1, type_) and isinstance(level.t2, type_): - return True - return False - - def give_up_diffing(self, level, diff_instance) -> bool: - raise NotImplementedError('Please implement the diff function.') - - - -**Example 2: An operator that mapping L2:distance as diff criteria and reports the distance** - - >>> import math - >>> - >>> from typing import List - >>> from deepdiff import DeepDiff - >>> from deepdiff.operator import BaseOperator - >>> - >>> - >>> class L2DistanceDifferWithPreventDefault(BaseOperator): - ... def __init__(self, regex_paths: List[str], distance_threshold: float): - ... super().__init__(regex_paths) - ... self.distance_threshold = distance_threshold - ... def _l2_distance(self, c1, c2): - ... return math.sqrt( - ... (c1["x"] - c2["x"]) ** 2 + (c1["y"] - c2["y"]) ** 2 - ... ) - ... def give_up_diffing(self, level, diff_instance): - ... l2_distance = self._l2_distance(level.t1, level.t2) - ... if l2_distance > self.distance_threshold: - ... diff_instance.custom_report_result('distance_too_far', level, { - ... "l2_distance": l2_distance - ... }) - ... return True - ... - >>> - >>> t1 = { - ... "coordinates": [ - ... {"x": 5, "y": 5}, - ... {"x": 8, "y": 8} - ... ] - ... } - >>> - >>> t2 = { - ... "coordinates": [ - ... {"x": 6, "y": 6}, - ... {"x": 88, "y": 88} - ... ] - ... } - >>> DeepDiff(t1, t2, custom_operators=[L2DistanceDifferWithPreventDefault( - ... ["^root\\['coordinates'\\]\\[\\d+\\]$"], - ... 1 - ... )]) - {'distance_too_far': {"root['coordinates'][0]": {'l2_distance': 1.4142135623730951}, "root['coordinates'][1]": {'l2_distance': 113.13708498984761}}} - - -**Example 3: If the objects are subclasses of a certain type, only compare them if their list attributes are not equal sets** - - >>> class CustomClass: - ... def __init__(self, d: dict, l: list): - ... self.dict = d - ... self.dict['list'] = l - ... - >>> - >>> custom1 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3]) - >>> custom2 = CustomClass(d=dict(c=3, d=4), l=[1, 2, 3, 2]) - >>> custom3 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3, 4]) - >>> - >>> - >>> class ListMatchOperator(BaseOperator): - ... def give_up_diffing(self, level, diff_instance): - ... if set(level.t1.dict['list']) == set(level.t2.dict['list']): - ... return True - ... - >>> - >>> DeepDiff(custom1, custom2, custom_operators=[ - ... ListMatchOperator(types=[CustomClass]) - ... ]) - {} - >>> - >>> - >>> DeepDiff(custom2, custom3, custom_operators=[ - ... ListMatchOperator(types=[CustomClass]) - ... ]) - {'dictionary_item_added': [root.dict['a'], root.dict['b']], 'dictionary_item_removed': [root.dict['c'], root.dict['d']], 'values_changed': {"root.dict['list'][3]": {'new_value': 4, 'old_value': 2}}} - >>> - -**Example 4: Only diff certain paths** - - >>> from deepdiff import DeepDiff - >>> class MyOperator: - ... def __init__(self, include_paths): - ... self.include_paths = include_paths - ... def match(self, level) -> bool: - ... return True - ... def give_up_diffing(self, level, diff_instance) -> bool: - ... return level.path() not in self.include_paths - ... - >>> - >>> t1 = {'a': [10, 11], 'b': [20, 21], 'c': [30, 31]} - >>> t2 = {'a': [10, 22], 'b': [20, 33], 'c': [30, 44]} - >>> - >>> DeepDiff(t1, t2, custom_operators=[ - ... MyOperator(include_paths="root['a'][1]") - ... ]) - {'values_changed': {"root['a'][1]": {'new_value': 22, 'old_value': 11}}} - -**Example 5: Give up further diffing once the first diff is found** - -Sometimes all you care about is that there is a difference between 2 objects and not all the details of what exactly is different. -In that case you may want to stop diffing as soon as the first diff is found. - - >>> from deepdiff import DeepDiff - >>> class MyOperator: - ... def match(self, level) -> bool: - ... return True - ... def give_up_diffing(self, level, diff_instance) -> bool: - ... return any(diff_instance.tree.values()) - ... - >>> t1 = [[1, 2], [3, 4], [5, 6]] - >>> t2 = [[1, 3], [3, 5], [5, 7]] - >>> - >>> DeepDiff(t1, t2, custom_operators=[ - ... MyOperator() - ... ]) - {'values_changed': {'root[0][1]': {'new_value': 3, 'old_value': 2}}} - - -Back to :doc:`/index` diff --git a/docs/custom.rst b/docs/custom.rst new file mode 120000 index 00000000..f9df393d --- /dev/null +++ b/docs/custom.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/custom.rst \ No newline at end of file diff --git a/docs/deep_distance.rst b/docs/deep_distance.rst deleted file mode 100644 index 09179b7b..00000000 --- a/docs/deep_distance.rst +++ /dev/null @@ -1,119 +0,0 @@ -:doc:`/index` - -.. _deep_distance_label: - -Deep Distance -============= - - -Deep Distance is the distance between 2 objects. It is a floating point number between 0 and 1. Deep Distance in concept is inspired by `Levenshtein Edit Distance `_. - -At its core, the Deep Distance is the number of operations needed to convert one object to the other divided by the sum of the sizes of the 2 objects capped at 1. Note that unlike Levenshtein Distance, the Deep Distance is based on the number of operations and NOT the “minimum” number of operations to convert one object to the other. The number is highly dependent on the granularity of the diff results. And the granularity is controlled by the parameters passed to DeepDiff. - -.. _get_deep_distance_label: - -Get Deep Distance ------------------ - -get_deep_distance: Boolean, default = False - get_deep_distance will get you the deep distance between objects. The distance is a number between 0 and 1 where zero means there is no diff between the 2 objects and 1 means they are very different. Note that this number should only be used to compare the similarity of 2 objects and nothing more. The algorithm for calculating this number may or may not change in the future releases of DeepDiff. - - The value of Deep Distance will show up in the result diff object's deep_distance key. - - >>> from deepdiff import DeepDiff - >>> DeepDiff(10.0, 10.1, get_deep_distance=True) - {'values_changed': {'root': {'new_value': 10.1, 'old_value': 10.0}}, 'deep_distance': 0.0014925373134328302} - >>> DeepDiff(10.0, 100.1, get_deep_distance=True) - {'values_changed': {'root': {'new_value': 100.1, 'old_value': 10.0}}, 'deep_distance': 0.24550408719346048} - >>> DeepDiff(10.0, 1000.1, get_deep_distance=True) - {'values_changed': {'root': {'new_value': 1000.1, 'old_value': 10.0}}, 'deep_distance': 0.29405999405999406} - >>> DeepDiff([1], [1], get_deep_distance=True) - {} - >>> DeepDiff([1], [1, 2], get_deep_distance=True) - {'iterable_item_added': {'root[1]': 2}, 'deep_distance': 0.2} - >>> DeepDiff([1], [1, 2, 3], get_deep_distance=True) - {'iterable_item_added': {'root[1]': 2, 'root[2]': 3}, 'deep_distance': 0.3333333333333333} - >>> DeepDiff([[2, 1]], [[1, 2, 3]], ignore_order=True, get_deep_distance=True) - {'iterable_item_added': {'root[0][2]': 3}, 'deep_distance': 0.1111111111111111} - -.. _distance_and_diff_granularity_label: - -Distance And Diff Granularity ------------------------------ - -.. note:: - Deep Distance of objects are highly dependent on the diff object that is produced. A diff object that is more granular will give more accurate Deep Distance value too. - -Let's use the following 2 deeply nested objects as an example. If you ignore the order of items, they are very similar and only differ in a few elements. - -We will run 2 diffs and ask for the deep distance. The only difference between the below 2 diffs is that in the first one the :ref:`cutoff_intersection_for_pairs_label` is not passed so the default value of 0.3 is used while in the other one cutoff_intersection_for_pairs=1 is used which forces extra pass calculations. - ->>> from pprint import pprint ->>> t1 = [ -... { -... "key3": [[[[[[[[[[1, 2, 4, 5]]], [[[8, 7, 3, 5]]]]]]]]]], -... "key4": [7, 8] -... }, -... { -... "key5": "val5", -... "key6": "val6" -... } -... ] ->>> ->>> t2 = [ -... { -... "key5": "CHANGE", -... "key6": "val6" -... }, -... { -... "key3": [[[[[[[[[[1, 3, 5, 4]]], [[[8, 8, 1, 5]]]]]]]]]], -... "key4": [7, 8] -... } -... ] - -We don't pass cutoff_intersection_for_pairs in the first diff. - ->>> diff1=DeepDiff(t1, t2, ignore_order=True, cache_size=5000, get_deep_distance=True) ->>> pprint(diff1) -{'deep_distance': 0.36363636363636365, - 'values_changed': {'root[0]': {'new_value': {'key5': 'CHANGE', 'key6': 'val6'}, - 'old_value': {'key3': [[[[[[[[[[1, 2, 4, 5]]], - [[[8, - 7, - 3, - 5]]]]]]]]]], - 'key4': [7, 8]}}, - 'root[1]': {'new_value': {'key3': [[[[[[[[[[1, 3, 5, 4]]], - [[[8, - 8, - 1, - 5]]]]]]]]]], - 'key4': [7, 8]}, - 'old_value': {'key5': 'val5', 'key6': 'val6'}}}} - -Note that the stats show that only 5 set of objects were compared with each other according to the DIFF COUNT: - ->>> diff1.get_stats() -{'PASSES COUNT': 0, 'DIFF COUNT': 5, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} - -Let's pass cutoff_intersection_for_pairs=1 to enforce pass calculations. As you can see the results are way more granular and the deep distance value is way more accurate now. - ->>> diff2=DeepDiff(t1, t2, ignore_order=True, cache_size=5000, cutoff_intersection_for_pairs=1, get_deep_distance=True) ->>> from pprint import pprint ->>> pprint(diff2) -{'deep_distance': 0.06060606060606061, - 'iterable_item_removed': {"root[0]['key3'][0][0][0][0][0][0][1][0][0][1]": 7}, - 'values_changed': {"root[0]['key3'][0][0][0][0][0][0][0][0][0][1]": {'new_value': 3, - 'old_value': 2}, - "root[0]['key3'][0][0][0][0][0][0][1][0][0][2]": {'new_value': 1, - 'old_value': 3}, - "root[1]['key5']": {'new_value': 'CHANGE', - 'old_value': 'val5'}}} - -As you can see now way more calculations have happened behind the scene. Instead of only 5 set of items being compared with each other, we have 306 items that are compared with each other in 110 passes. - ->>> diff2.get_stats() -{'PASSES COUNT': 110, 'DIFF COUNT': 306, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} - - -Back to :doc:`/index` diff --git a/docs/deep_distance.rst b/docs/deep_distance.rst new file mode 120000 index 00000000..f233f137 --- /dev/null +++ b/docs/deep_distance.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/deep_distance.rst \ No newline at end of file diff --git a/docs/deephash.rst b/docs/deephash.rst deleted file mode 100644 index fd5fbda4..00000000 --- a/docs/deephash.rst +++ /dev/null @@ -1,14 +0,0 @@ -:doc:`/index` - -DeepHash -======== - -.. toctree:: - :maxdepth: 3 - -.. automodule:: deepdiff.deephash - -.. autoclass:: DeepHash - :members: - -Back to :doc:`/index` diff --git a/docs/deephash.rst b/docs/deephash.rst new file mode 120000 index 00000000..cdbb1c71 --- /dev/null +++ b/docs/deephash.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/deephash.rst \ No newline at end of file diff --git a/docs/deephash_doc.rst b/docs/deephash_doc.rst deleted file mode 100644 index 7039281f..00000000 --- a/docs/deephash_doc.rst +++ /dev/null @@ -1,388 +0,0 @@ -:orphan: - -**DeepHash** - -DeepHash calculates the hash of objects based on their contents in a deterministic way. -This way 2 objects with the same content should have the same hash. - -The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. -For example you can use DeepHash to calculate the hash of a set or a dictionary! - -At the core of it, DeepHash is a deterministic serialization of your object into a string so it -can be passed to a hash function. By default it uses SHA256. You have the option to pass any other hashing function to be used instead. - -**Import** - >>> from deepdiff import DeepHash - -**Parameters** - -obj : any object, The object to be hashed based on its content. - - -apply_hash: Boolean, default = True - DeepHash at its core is doing deterministic serialization of objects into strings. - Then it hashes the string. - The only time you want the apply_hash to be False is if you want to know what - the string representation of your object is BEFORE it gets hashed. - - -exclude_types: list, default = None - List of object types to exclude from hashing. - - -exclude_paths: list, default = None - List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path. - Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. - - -include_paths: list, default = None - List of the only paths to include in the report. If only one item, you can pass it as a string. - Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. - - -exclude_regex_paths: list, default = None - List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one regex path. - - -exclude_obj_callback - function, default = None - A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. - This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. - - -encodings: List, default = None - Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out ignore_encoding_errors if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] - - -hashes: dictionary, default = empty dictionary - A dictionary of {object or object id: object hash} to start with. - Any object that is encountered and it is already in the hashes dictionary or its id is in the hashes dictionary, - will re-use the hash that is provided by this dictionary instead of re-calculating - its hash. This is typically used when you have a series of objects to be hashed and there might be repeats of the same object. - - -hasher: function. default = DeepHash.sha256hex - hasher is the hashing function. The default is DeepHash.sha256hex. - But you can pass another hash function to it if you want. - For example a cryptographic hash function or Python's builtin hash function. - All it needs is a function that takes the input in string format and returns the hash. - - You can use it by passing: hasher=hash for Python's builtin hash. - - The following alternative is already provided: - - - hasher=DeepHash.sha1hex - - Note that prior to DeepDiff 5.2, Murmur3 was the default hash function. - But Murmur3 is removed from DeepDiff dependencies since then. - - -ignore_repetition: Boolean, default = True - If repetitions in an iterable should cause the hash of iterable to be different. - Note that the deepdiff diffing functionality lets this to be the default at all times. - But if you are using DeepHash directly, you can set this parameter. - - -ignore_type_in_groups - Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. - The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. - - For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: - - 1. Set ignore_string_type_changes=True which is the default. - 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . - - Now what if you want also typeA and typeB to be ignored when comparing agains each other? - - 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] - 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] - -ignore_string_type_changes: Boolean, default = True - string type conversions should not affect the hash output when this is set to True. - For example "Hello" and b"Hello" should produce the same hash. - - By setting it to True, both the string and bytes of hello return the same hash. - - -ignore_numeric_type_changes: Boolean, default = False - numeric type conversions should not affect the hash output when this is set to True. - For example 10, 10.0 and Decimal(10) should produce the same hash. - When ignore_numeric_type_changes is set to True, all numbers are converted - to strings with the precision of significant_digits parameter and number_format_notation notation. - If no significant_digits is passed by the user, a default value of 12 is used. - - -ignore_type_subclasses - Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. - - -ignore_string_case - Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. - - -ignore_private_variables: Boolean, default = True - Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). - - -ignore_encoding_errors: Boolean, default = False - If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. - -ignore_iterable_order: Boolean, default = True - If order of items in an iterable should not cause the hash of the iterable to be different. - -number_format_notation : string, default="f" - number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. - - -significant_digits : int >= 0, default=None - By default the significant_digits compares only that many digits AFTER the decimal point. However you can set override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. - - Important: This will affect ANY number comparison when it is set. - - Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 12. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. - - Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. - - Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 - - For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) - - When you set the number_format_notation="e", we use "{:.Xe}".format(Your Number) where X=significant_digits. - -truncate_datetime: string, default = None - Can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it - - - -**Returns** - A dictionary of {item: item hash}. - If your object is nested, it will build hashes of all the objects it contains too. - - -.. note:: - DeepHash output is not like conventional hash functions. It is a dictionary of object IDs to their hashes. This happens because DeepHash calculates the hash of the object and any other objects found within the object in a recursive manner. If you only need the hash of the object you are passing, all you need to do is to do: - - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - >>> DeepHash(obj)[obj] # doctest: +SKIP - - -**Examples** - -Let's say you have a dictionary object. - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - -If you try to hash it: - >>> hash(obj) - Traceback (most recent call last): - File "", line 1, in - TypeError: unhashable type: 'dict' - -But with DeepHash: - - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - >>> DeepHash(obj) # doctest: +SKIP - - So what is exactly the hash of obj in this case? - DeepHash is calculating the hash of the obj and any other object that obj contains. - The output of DeepHash is a dictionary of object IDs to their hashes. - In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: - - >>> hashes = DeepHash(obj) - >>> hashes[obj] - 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' - - Which you can write as: - - >>> hashes = DeepHash(obj)[obj] - - At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. - - If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. - - If you do a deep copy of the obj, it should still give you the same hash: - - >>> from copy import deepcopy - >>> obj2 = deepcopy(obj) - >>> DeepHash(obj2)[obj2] - 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' - - Note that by default DeepHash will include string type differences. So if your strings were bytes: - - >>> obj3 = {1: 2, b'a': b'b'} - >>> DeepHash(obj3)[obj3] - '71db3231177d49f78b52a356ca206e6179417b681604d00ed703a077049e3300' - - But if you want the same hash if string types are different, set ignore_string_type_changes to True: - - >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] - 'e60c2befb84be625037c75e1e26d0bfc85a0ffc1f3cde9500f68f6eac55e5ad6' - - ignore_numeric_type_changes is by default False too. - - >>> from decimal import Decimal - >>> obj1 = {4:10} - >>> obj2 = {4.0: Decimal(10.0)} - >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] - False - - But by setting it to True, we can get the same hash. - - >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] - True - -number_format_notation: String, default = "f" - number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. - - -ignore_string_type_changes: Boolean, default = True - By setting it to True, both the string and bytes of hello return the same hash. - - >>> DeepHash(b'hello', ignore_string_type_changes=True)[b'hello'] - '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' - >>> DeepHash('hello', ignore_string_type_changes=True)['hello'] - '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' - - -ignore_numeric_type_changes: Boolean, default = False - For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 - - That way they both produce the same hash. - - >>> t1 = {1: 1, 2: 2.22} - >>> DeepHash(t1)[1] - 'c1800a30c736483f13615542e7096f7973631fef8ca935ee1ed9f35fb06fd44e' - >>> DeepHash(t1, ignore_numeric_type_changes=True)[1] == DeepHash(t1, ignore_numeric_type_changes=True)[1.0] - True - - You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: - - >>> from deepdiff import DeepDiff - >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] - - -ignore_type_in_groups example with custom objects: - - >>> class Burrito: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> class Taco: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> burrito = Burrito() - >>> taco = Taco() - >>> - >>> burritos = [burrito] - >>> tacos = [taco] - >>> - >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) - >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) - >>> d1[burrito] == d2[taco] - True - - -ignore_type_subclasses - Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. - - >>> from deepdiff import DeepHash - >>> - >>> class ClassB: - ... def __init__(self, x): - ... self.x = x - ... def __repr__(self): - ... return "obj b" - ... - >>> - >>> class ClassC(ClassB): - ... def __repr__(self): - ... return "obj c" - ... - >>> obj_b = ClassB(1) - >>> obj_c = ClassC(1) - >>> - >>> # By default, subclasses are considered part of the type group. - ... # ignore_type_in_groups=[(ClassB, )] matches ClassC too since it's a subclass. - ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )]) - >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )]) - >>> hashes_b[obj_b] == hashes_c[obj_c] - True - >>> - >>> # With ignore_type_subclasses=True, only exact type matches count. - ... # ClassC no longer matches (ClassB, ) group, so hashes differ. - ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) - >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) - >>> hashes_b[obj_b] != hashes_c[obj_c] - True - -ignore_string_case - Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. - - >>> from deepdiff import DeepHash - >>> DeepHash('hello')['hello'] == DeepHash('heLLO')['heLLO'] - False - >>> DeepHash('hello', ignore_string_case=True)['hello'] == DeepHash('heLLO', ignore_string_case=True)['heLLO'] - True - -exclude_obj_callback - function, default = None - A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. - This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. - - >>> def exclude_obj_callback(obj, path): - ... return True if isinstance(obj, str) and obj in ('x', 'y') else False - ... - >>> dic1 = {"x": 1, "y": 2, "z": 3} - >>> t1 = [dic1] - >>> t1_hash = DeepHash(t1, exclude_obj_callback=exclude_obj_callback) - >>> - >>> dic2 = {"z": 3} - >>> t2 = [dic2] - >>> t2_hash = DeepHash(t2, exclude_obj_callback=exclude_obj_callback) - >>> - >>> t1_hash[t1] == t2_hash[t2] - True - -number_format_notation : string, default="f" - When numbers are converted to the string, you have the choices between "f" as fixed point and "e" as scientific notation: - - >>> t1=10002 - >>> t2=10004 - >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="f") - >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="f") - >>> - >>> t1_hash[t1] == t2_hash[t2] - False - >>> - >>> - >>> # Now we use the scientific notation - ... t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e") - >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e") - >>> - >>> t1_hash[t1] == t2_hash[t2] - True - -Defining your own number_to_string_func - Lets say you want the hash of numbers below 100 to be the same for some reason. - - >>> from deepdiff import DeepHash - >>> from deepdiff.helper import number_to_string - >>> def custom_number_to_string(number, *args, **kwargs): - ... number = 100 if number < 100 else number - ... return number_to_string(number, *args, **kwargs) - ... - >>> t1 = [10, 12, 100000] - >>> t2 = [50, 63, 100021] - >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) - >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) - >>> t1_hash[t1] == t2_hash[t2] - True - - So both lists produced the same hash thanks to the low significant digits for 100000 vs 100021 and also the custom_number_to_string that converted all numbers below 100 to be 100! diff --git a/docs/deephash_doc.rst b/docs/deephash_doc.rst new file mode 120000 index 00000000..6466f6e1 --- /dev/null +++ b/docs/deephash_doc.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/deephash_doc.rst \ No newline at end of file diff --git a/docs/delta.rst b/docs/delta.rst deleted file mode 100644 index a8ab0d3f..00000000 --- a/docs/delta.rst +++ /dev/null @@ -1,539 +0,0 @@ -.. _delta_label: - -Delta -===== - -DeepDiff Delta is a directed delta that when applied to t1 can yield t2 where delta is the difference between t1 and t2. -Delta objects are like git commits but for structured data. -You can convert the diff results into Delta objects, store the deltas, and later apply to other objects. - -.. note:: - If you plan to generate Delta objects from the DeepDiff result, and ignore_order=True, you need to also set the report_repetition=True. - -**Parameters** - -diff : Delta dictionary, Delta dump payload or a DeepDiff object, default=None. - :ref:`delta_diff_label` is the content to be loaded. - -delta_path : String, default=None. - :ref:`delta_path_label` is the local path to the delta dump file to be loaded - -delta_file : File Object, default=None. - :ref:`delta_file_label` is the file object containing the delta data. - -delta_diff : Delta diff, default=None. - This is a slightly different diff than the output of DeepDiff. When Delta object is initiated from the DeepDiff output, it transforms the diff into a slightly different structure that is more suitable for delta. You can find that object via delta.diff. - It is the same object that is serialized when you create a delta dump. If you already have the delta_diff object, you can pass it to Delta via the delta_diff parameter. - -flat_dict_list : List of flat dictionaries, default=None, - :ref:`flat_dict_list_label` can be used to load the delta object from a list of flat dictionaries. - -.. note:: - You need to pass only one of the diff, delta_path, or delta_file parameters. - -deserializer : Deserializer function, default=pickle_load - :ref:`delta_deserializer_label` is the function to deserialize the delta content. The default is the pickle_load function that comes with DeepDiff. - -serializer : Serializer function, default=pickle_dump - :ref:`delta_serializer_label` is the function to serialize the delta content into a format that can be stored. The default is the pickle_dump function that comes with DeepDiff. - -log_errors : Boolean, default=True - Whether to log the errors or not when applying the delta object. - -raise_errors : Boolean, default=False - :ref:`raise_errors_label` - Whether to raise errors or not when applying a delta object. - -mutate : Boolean, default=False. - :ref:`delta_mutate_label` defines whether to mutate the original object when adding the delta to it or not. - Note that this parameter is not always successful in mutating. For example if your original object - is an immutable type such as a frozenset or a tuple, mutation will not succeed. - Hence it is recommended to keep this parameter as the default value of False unless you are sure - that you do not have immutable objects. There is a small overhead of doing deepcopy on the original - object when mutate=False. If performance is a concern and modifying the original object is not a big deal, - set the mutate=True but always reassign the output back to the original object. - -safe_to_import : Set, default=None. - :ref:`delta_safe_to_import_label` is a set of modules that needs to be explicitly white listed to be loaded - Example: {'mymodule.MyClass', 'decimal.Decimal'} - Note that this set will be added to the basic set of modules that are already white listed. - The set of what is already white listed can be found in deepdiff.serialization.SAFE_TO_IMPORT - -bidirectional : Boolean, default=False - :ref:`delta_verify_symmetry_label` is used to verify that the original value of items are the same as when the delta was created. Note that in order for this option to work, the delta object will need to store more data and thus the size of the object will increase. Let's say that the diff object says root[0] changed value from X to Y. If you create the delta with the default value of bidirectional=False, then what delta will store is root[0] = Y. And if this delta was applied to an object that has any root[0] value, it will still set the root[0] to Y. However if bidirectional=True, then the delta object will store also that the original value of root[0] was X and if you try to apply the delta to an object that has root[0] of any value other than X, it will notify you. - -force : Boolean, default=False - :ref:`delta_force_label` is used to force apply a delta to objects that have a different structure than what the delta was originally created from. - -always_include_values : Boolean, default=False - :ref:`always_include_values_label` is used to make sure the delta objects includes the values that were changed. Sometime Delta tries to be efficient not include the values when it can get away with it. By setting this parameter to True, you ensure that the Delta object will include the values. - -.. _delta_fill: - -fill : Any, default=No Fill - This is only relevant if `force` is set. This parameter only applies when force is set and trying to fill an existing array. If the index of the array being applied is larger than the length of the array this value will be used to fill empty spaces of the array to extend it in order to add the new value. If this parameter is not set, the items will get dropped and the array not extended. If this parameter is set with a callable function, it will get called each time a fill item is needed. It will be provided with three arguments: first argument is the array being filled, second argument is the value that is being added to the array, the third argument is the path that is being added. - Example function: `def fill(obj, value, path): return "Camry" if "car" in path else None` - - -**Returns** - - A delta object that can be added to t1 to recreate t2. - - Delta objects can contain the following vocabulary: - - iterable_item_added - iterable_item_moved - iterable_item_removed - set_item_added - set_item_removed - dictionary_item_added - dictionary_item_removed - attribute_added - attribute_removed - type_changes - values_changed - iterable_items_added_at_indexes - iterable_items_removed_at_indexes - - -.. _delta_diff_label: - -Diff to load in Delta ---------------------- - -diff : Delta dictionary, Delta dump payload or a DeepDiff object, default=None. - diff is the content to be loaded. - ->>> from deepdiff import DeepDiff, Delta ->>> from pprint import pprint ->>> ->>> t1 = [1, 2, 3] ->>> t2 = ['a', 2, 3, 4] ->>> diff = DeepDiff(t1, t2) ->>> diff -{'type_changes': {'root[0]': {'old_type': , 'new_type': , 'old_value': 1, 'new_value': 'a'}}, 'iterable_item_added': {'root[3]': 4}} ->>> delta = Delta(diff) ->>> delta # doctest: +SKIP - - -Applying the delta object to t1 will yield t2: - ->>> t1 + delta -['a', 2, 3, 4] ->>> t1 + delta == t2 -True - -If we want to subtract a delta, we need to create a bidirectional delta: - ->>> delta = Delta(diff, bidirectional=True) ->>> t2 - delta -[1, 2, 3] ->>> t2 - delta == t1 -True - -Now let's dump the delta object so we can store it. - ->>> dump = delta.dumps() ->>> ->>> dump # doctest: +SKIP - -The dumps() function gives us the serialized content of the delta in the form of bytes. We could store it however we want. Or we could use the dump(file_object) to write the dump to the file_object instead. But before we try the dump(file_object) method, let's create a new Delta object and reapply it to t1 and see if we still get t2: - ->>> delta2 = Delta(dump) ->>> t1 + delta2 == t2 -True ->>> - -.. _delta_path_label: - -Delta Path parameter --------------------- - -Ok now we can try the dumps(file_object). It does what you expect: - ->>> with open('/tmp/delta1', 'wb') as dump_file: -... delta.dump(dump_file) -... - -And we use the delta_path parameter to load the delta - ->>> delta3 = Delta(delta_path='/tmp/delta1') - -It still gives us the same result when applied. - ->>> t1 + delta3 == t2 -True - - -.. _delta_file_label: - -Delta File parameter --------------------- - -You can also pass a file object containing the delta dump: - ->>> with open('/tmp/delta1', 'rb') as dump_file: -... delta4 = Delta(delta_file=dump_file) -... ->>> t1 + delta4 == t2 -True - - -.. _flat_dict_list_label: - -Flat Dict List --------------- - -You can create a delta object from the list of flat dictionaries that are produced via :ref:`to_flat_dicts_label`. Read more on :ref:`delta_from_flat_dicts_label`. - -.. _flat_rows_list_label: - -Flat Rows List --------------- - -You can create a delta object from the list of flat dictionaries that are produced via :ref:`delta_to_flat_rows_label`. Read more on :ref:`delta_to_flat_rows_label`. - - -.. _delta_deserializer_label: - -Delta Deserializer ------------------- - -DeepDiff by default uses a restricted Python pickle function to deserialize the Delta dumps. Read more about :ref:`delta_dump_safety_label`. - -The user of Delta can decide to switch the serializer and deserializer to their custom ones. The serializer and deserializer parameters can be used exactly for that reason. The best way to come up with your own serializer and deserializer is to take a look at the `pickle_dump and pickle_load functions in the serializer module `_ - -.. _delta_json_deserializer_label: - -Json Deserializer for Delta -``````````````````````````` - -If all you deal with are Json serializable objects, you can use json for serialization. - ->>> from deepdiff import DeepDiff, Delta ->>> from deepdiff.serialization import json_dumps, json_loads ->>> t1 = {"a": 1} ->>> t2 = {"a": 2} ->>> ->>> diff = DeepDiff(t1, t2) ->>> delta = Delta(diff, serializer=json_dumps) ->>> dump = delta.dumps() ->>> dump -'{"values_changed":{"root[\'a\']":{"new_value":2}}}' ->>> delta_reloaded = Delta(dump, deserializer=json_loads) ->>> t2 == delta_reloaded + t1 -True - - -.. note:: - - Json is very limited and easily you can get to deltas that are not json serializable. You will probably want to extend the Python's Json serializer to support your needs. - - >>> import json - >>> t1 = {"a": 1} - >>> t2 = {"a": None} - >>> diff = DeepDiff(t1, t2) - >>> diff - {'type_changes': {"root['a']": {'old_type': , 'new_type': , 'old_value': 1, 'new_value': None}}} - >>> Delta(diff, serializer=json.dumps) # doctest: +SKIP - - >>> delta = Delta(diff, serializer=json.dumps) - >>> dump = delta.dumps() # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - TypeError: Object of type type is not JSON serializable... - -.. _delta_serializer_label: - -Delta Serializer ----------------- - -DeepDiff uses pickle to serialize delta objects by default. Please take a look at the :ref:`delta_deserializer_label` for more information. - - -.. _to_flat_dicts_label: - -Delta Serialize To Flat Dictionaries ------------------------------------- - -Read about :ref:`delta_to_flat_dicts_label` - -.. _delta_dump_safety_label: - -Delta Dump Safety ------------------ - -Delta by default uses Python's pickle to serialize and deserialize. While the unrestricted use of pickle is not safe as noted in the `pickle's documentation `_ , DeepDiff's Delta is written with extra care to `restrict the globals `_ and hence mitigate this security risk. - -In fact only a few Python object types are allowed by default. The user of DeepDiff can pass additional types using the :ref:`delta_safe_to_import_label` to allow further object types that need to be allowed. - - -.. _delta_mutate_label: - -Delta Mutate parameter ----------------------- - -mutate : Boolean, default=False. - delta_mutate defines whether to mutate the original object when adding the delta to it or not. - Note that this parameter is not always successful in mutating. For example if your original object - is an immutable type such as a frozenset or a tuple, mutation will not succeed. - Hence it is recommended to keep this parameter as the default value of False unless you are sure - that you do not have immutable objects. There is a small overhead of doing deepcopy on the original - object when mutate=False. If performance is a concern and modifying the original object is not a big deal, - set the mutate=True but always reassign the output back to the original object. - -For example: - ->>> t1 = [1, 2, [3, 5, 6]] ->>> t2 = [2, 3, [3, 6, 8]] - ->>> diff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) ->>> diff -{'values_changed': {'root[0]': {'new_value': 3, 'old_value': 1}, 'root[2][1]': {'new_value': 8, 'old_value': 5}}} ->>> delta = Delta(diff) ->>> delta # doctest: +SKIP - - -Note that we can apply delta to objects different than the original objects they were made from: - ->>> t3 = ["a", 2, [3, "b", "c"]] ->>> t3 + delta -[3, 2, [3, 8, 'c']] - -If we check t3, it is still the same as the original value of t3: - ->>> t3 -['a', 2, [3, 'b', 'c']] - -Now let's make the delta with mutate=True - ->>> delta2 = Delta(diff, mutate=True) ->>> t3 + delta2 -[3, 2, [3, 8, 'c']] ->>> t3 -[3, 2, [3, 8, 'c']] - -Applying the delta to t3 mutated the t3 itself in this case! - - -.. _delta_and_numpy_label: - -Delta and Numpy ---------------- - ->>> from deepdiff import DeepDiff, Delta ->>> import numpy as np ->>> t1 = np.array([1, 2, 3, 5]) ->>> t2 = np.array([2, 2, 7, 5]) ->>> diff = DeepDiff(t1, t2) ->>> diff -{'values_changed': {'root[0]': {'new_value': np.int64(2), 'old_value': np.int64(1)}, 'root[2]': {'new_value': np.int64(7), 'old_value': np.int64(3)}}} ->>> delta = Delta(diff) - -.. note:: - When applying delta to Numpy arrays, make sure to put the delta object first and the numpy array second. This is because Numpy array overrides the + operator and thus DeepDiff's Delta won't be able to be applied. - - >>> t1 + delta - Traceback (most recent call last): - File "", line 1, in - raise DeltaNumpyOperatorOverrideError(DELTA_NUMPY_OPERATOR_OVERRIDE_MSG) - deepdiff.delta.DeltaNumpyOperatorOverrideError: A numpy ndarray is most likely being added to a delta. Due to Numpy override the + operator, you can only do: delta + ndarray and NOT ndarray + delta - -Let's put the delta first then: - ->>> delta + t1 -array([2, 2, 7, 5]) ->>> delta + t2 == t2 -array([ True, True, True, True]) - - -.. note:: - You can apply a delta that was created from normal Python objects to Numpy arrays. But it is not recommended. - -.. _raise_errors_label: - -Delta Raise Errors parameter ----------------------------- - -raise_errors : Boolean, default=False - Whether to raise errors or not when applying a delta object. - ->>> from deepdiff import DeepDiff, Delta ->>> t1 = [1, 2, [3, 5, 6]] ->>> t2 = [2, 3, [3, 6, 8]] ->>> diff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) ->>> delta = Delta(diff, raise_errors=False) - -Now let's apply the delta to a very different object: - ->>> t3 = [1, 2, 3, 5] ->>> t4 = t3 + delta # doctest: +SKIP - -We get a log message that it was unable to get the item at root[2][1]. We get the message since by default log_errors=True - -Let's see what t4 is now: - ->>> t4 # doctest: +SKIP -[3, 2, 3, 5] - -So the delta was partially applied on t3. - -Now let's set the raise_errors=True - ->>> delta2 = Delta(diff, raise_errors=True) ->>> ->>> t3 + delta2 # doctest: +ELLIPSIS -Traceback (most recent call last): - ... -deepdiff.delta.DeltaError: Unable to get the item at root[2][1] - - -.. _delta_safe_to_import_label: - -Delta Safe To Import parameter ------------------------------- - -safe_to_import : Set, default=None. - safe_to_import is a set of modules that needs to be explicitly white listed to be loaded - Example: {'mymodule.MyClass', 'decimal.Decimal'} - Note that this set will be added to the basic set of modules that are already white listed. - - -As noted in :ref:`delta_dump_safety_label` and :ref:`delta_deserializer_label`, DeepDiff's Delta takes safety very seriously and thus limits the globals that can be deserialized when importing. However on occasions that you need a specific type (class) that needs to be used in delta objects, you need to pass it to the Delta via safe_to_import parameter. - -The set of what is already white listed can be found in deepdiff.serialization.SAFE_TO_IMPORT -At the time of writing this document, this list consists of: - ->>> from deepdiff.serialization import SAFE_TO_IMPORT ->>> from pprint import pprint ->>> pprint(SAFE_TO_IMPORT) # doctest: +SKIP -frozenset({'builtins.None', - 'builtins.bin', - 'builtins.bool', - ...}) - -If you want to pass any other argument to safe_to_import, you will need to put the full path to the type as it appears in the sys.modules - -For example let's say you have a package call mypackage and has a module called mymodule. If you check the sys.modules, the address to this module must be mypackage.mymodule. In order for Delta to be able to serialize this object via pickle, first of all it has to be `picklable `_. - ->>> diff = DeepDiff(t1, t2) ->>> delta = Delta(diff) ->>> dump = delta.dumps() - -The dump at this point is serialized via Pickle and can be written to disc if needed. - -Later when you want to load this dump, by default Delta will block you from importing anything that is NOT in deepdiff.serialization.SAFE_TO_IMPORT . In fact it will show you this error message when trying to load this dump: - - deepdiff.serialization.ForbiddenModule: Module 'builtins.type' is forbidden. You need to explicitly pass it by passing a safe_to_import parameter - -In order to let Delta know that this specific module is safe to import, you will need to pass it to Delta during loading of this dump: - ->>> delta = Delta(dump, safe_to_import={'mypackage.mymodule'}) - -.. note :: - - If you pass a custom deserializer to Delta, DeepDiff will pass safe_to_import parameter to the custom deserializer if that deserializer takes safe_to_import as a parameter in its definition. - For example if you just use json.loads as deserializer, the safe_to_import items won't be passed to it since json.loads does not have such a parameter. - - -.. _delta_verify_symmetry_label: - -Delta Verify Symmetry parameter -------------------------------- - -bidirectional : Boolean, default=False - bidirectional is used to to include all the required information so that we can use the delta object both for addition and subtraction. It will also check that the object you are adding the delta to, has the same values as the original object that the delta was created from. - - It complains if the object is not what it expected to be. - - ->>> from deepdiff import DeepDiff, Delta ->>> t1 = [1] ->>> t2 = [2] ->>> t3 = [3] ->>> ->>> diff = DeepDiff(t1, t2) ->>> ->>> delta2 = Delta(diff, raise_errors=False, bidirectional=True) ->>> t4 = delta2 + t3 # doctest: +SKIP ->>> t4 # doctest: +SKIP -[2] - -And if you had set raise_errors=True, then it would have raised the error in addition to logging it. - - -.. _delta_force_label: - -Delta Force ------------ - -force : Boolean, default=False - force is used to force apply a delta to objects that have a different structure than what the delta was originally created from. - - ->>> from deepdiff import DeepDiff, Delta ->>> t1 = { -... 'x': { -... 'y': [1, 2, 3] -... }, -... 'q': { -... 'r': 'abc', -... } -... } ->>> ->>> t2 = { -... 'x': { -... 'y': [1, 2, 3, 4] -... }, -... 'q': { -... 'r': 'abc', -... 't': 0.5, -... } -... } ->>> ->>> diff = DeepDiff(t1, t2) ->>> diff -{'dictionary_item_added': ["root['q']['t']"], 'iterable_item_added': {"root['x']['y'][3]": 4}} ->>> delta = Delta(diff) ->>> {} + delta # doctest: +SKIP -{} - -Once we set the force to be True - ->>> delta = Delta(diff, force=True) ->>> {} + delta -{'x': {'y': {3: 4}}, 'q': {'t': 0.5}} - -Notice that the force attribute does not know the original object at ['x']['y'] was supposed to be a list, so it assumes it was a dictionary. - - -.. _always_include_values_label: - -Always Include Values ---------------------- - -always_include_values is used to make sure the delta objects includes the values that were changed. Sometime Delta tries to be efficient not include the values when it can get away with it. By setting this parameter to True, you ensure that the Delta object will include the values. - -For example, when the type of an object changes, if we can easily convert from one type to the other, the Delta object does not include the values: - - ->>> from deepdiff import DeepDiff, Delta ->>> diff = DeepDiff(t1=[1, 2], t2=[1, '2']) ->>> diff -{'type_changes': {'root[1]': {'old_type': , 'new_type': , 'old_value': 2, 'new_value': '2'}}} ->>> delta=Delta(diff) ->>> delta # doctest: +SKIP - - -As you can see the delta object does not include the values that were changed. Now let's pass always_include_values=True: - ->>> delta=Delta(diff, always_include_values=True) ->>> delta.diff -{'type_changes': {'root[1]': {'old_type': , 'new_type': , 'new_value': '2'}}} - -If we want to make sure the old values stay with delta, we pass bidirectional=True. By doing so we can also use the delta object to subtract from other objects. - ->>> delta=Delta(diff, always_include_values=True, bidirectional=True) ->>> delta.diff -{'type_changes': {'root[1]': {'old_type': , 'new_type': , 'old_value': 2, 'new_value': '2'}}} - diff --git a/docs/delta.rst b/docs/delta.rst new file mode 120000 index 00000000..8084e1f5 --- /dev/null +++ b/docs/delta.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/delta.rst \ No newline at end of file diff --git a/docs/diff.rst b/docs/diff.rst deleted file mode 100644 index 23d67c9c..00000000 --- a/docs/diff.rst +++ /dev/null @@ -1,30 +0,0 @@ -:doc:`/index` - -.. _deepdiff_label: - -DeepDiff -======== - -.. automodule:: deepdiff.diff - -.. autoclass:: DeepDiff - :members: - -.. toctree:: - :maxdepth: 3 - - basics - custom - deep_distance - exclude_paths - ignore_order - ignore_types_or_values - numbers - optimizations - other - serialization - stats - troubleshoot - view - -Back to :doc:`/index` diff --git a/docs/diff.rst b/docs/diff.rst new file mode 120000 index 00000000..23437d4f --- /dev/null +++ b/docs/diff.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/diff.rst \ No newline at end of file diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst deleted file mode 100644 index 1fc18db6..00000000 --- a/docs/diff_doc.rst +++ /dev/null @@ -1,233 +0,0 @@ -:orphan: - -DeepDiff Module -=============== - -.. |qluster_link| raw:: html - - Qluster - -DeepDiff is now part of |qluster_link|. - -*If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* - -Deep Difference of dictionaries, iterables, strings and almost any other object. -It will recursively look for all the changes. - -.. Note:: - |:mega:| **Please fill out our** `fast 5-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| - -**Parameters** - -t1 : A dictionary, list, string or any python object that has __dict__ or __slots__ - This is the first item to be compared to the second item - -t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ - The second item is to be compared to the first one - -cutoff_distance_for_pairs : 1 >= float >= 0, default=0.3 - :ref:`cutoff_distance_for_pairs_label` What is the threshold to consider 2 items as pairs. - Note that it is only used when ignore_order = True. - -cutoff_intersection_for_pairs : 1 >= float >= 0, default=0.7 - :ref:`cutoff_intersection_for_pairs_label` What is the threshold to calculate pairs of items between 2 iterables. - For example 2 iterables that have nothing in common, do not need their pairs to be calculated. - Note that it is only used when ignore_order = True. - -cache_size : int >= 0, default=0 - :ref:`cache_size_label` Cache size to be used to improve the performance. A cache size of zero means it is disabled. - Using the cache_size can dramatically improve the diff performance especially for the nested objects at the cost of more memory usage. - -cache_purge_level: int, 0, 1, or 2. default=1 - :ref:`cache_purge_level` defines what objects in DeepDiff should be deleted to free the memory once the diff object is calculated. If this value is set to zero, most of the functionality of the diff object is removed and the most memory is released. A value of 1 preserves all the functionalities of the diff object. A value of 2 also preserves the cache and hashes that were calculated during the diff calculations. In most cases the user does not need to have those objects remained in the diff unless for investigation purposes. - -cache_tuning_sample_size : int >= 0, default = 0 - :ref:`cache_tuning_sample_size_label` This is an experimental feature. It works hands in hands with the :ref:`cache_size_label`. When cache_tuning_sample_size is set to anything above zero, it will sample the cache usage with the passed sample size and decide whether to use the cache or not. And will turn it back on occasionally during the diffing process. This option can be useful if you are not sure if you need any cache or not. However you will gain much better performance with keeping this parameter zero and running your diff with different cache sizes and benchmarking to find the optimal cache size. - -custom_operators : BaseOperator subclasses, default = None - :ref:`custom_operators_label` if you are considering whether they are fruits or not. In that case, you can pass a *custom_operators* for the job. - -default_timezone : datetime.timezone subclasses or pytz datetimes, default = datetime.timezone.utc - :ref:`default_timezone_label` defines the default timezone. If a datetime is timezone naive, which means it doesn't have a timezone, we assume the datetime is in this timezone. Also any datetime that has a timezone will be converted to this timezone so the datetimes can be compared properly all in the same timezone. Note that Python's default behavior assumes the default timezone is your local timezone. DeepDiff's default is UTC, not your local time zone. - -encodings: List, default = None - :ref:`encodings_label` Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out :ref:`ignore_encoding_errors_label` if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] - -exclude_paths: list, default = None - :ref:`exclude_paths_label` - List of paths to exclude from the report. If only one item, you can pass it as a string. - Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. - -exclude_regex_paths: list, default = None - :ref:`exclude_regex_paths_label` - List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. - -exclude_types: list, default = None - :ref:`exclude_types_label` - List of object types to exclude from the report. - -exclude_obj_callback: function, default = None - :ref:`exclude_obj_callback_label` - A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. - This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. - -exclude_obj_callback_strict: function, default = None - :ref:`exclude_obj_callback_strict_label` - A function that works the same way as exclude_obj_callback, but excludes elements from the result only if the function returns True for both elements. - -include_paths: list, default = None - :ref:`include_paths_label` - List of the only paths to include in the report. If only one item is in the list, you can pass it as a string. - Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. - -include_obj_callback: function, default = None - :ref:`include_obj_callback_label` - A function that takes the object and its path and returns a Boolean. If True is returned, the object is included in the results, otherwise it is excluded. - This is to give the user a higher level of control than one can achieve via include_paths. - -include_obj_callback_strict: function, default = None - :ref:`include_obj_callback_strict_label` - A function that works the same way as include_obj_callback, but includes elements in the result only if the function returns True for both elements. - -get_deep_distance: Boolean, default = False - :ref:`get_deep_distance_label` will get you the deep distance between objects. The distance is a number between 0 and 1 where zero means there is no diff between the 2 objects and 1 means they are very different. Note that this number should only be used to compare the similarity of 2 objects and nothing more. The algorithm for calculating this number may or may not change in the future releases of DeepDiff. - -group_by: String or a list of size 2, default=None - :ref:`group_by_label` can be used when dealing with the list of dictionaries. It converts them from lists to a single dictionary with the key defined by group_by. The common use case is when reading data from a flat CSV, and the primary key is one of the columns in the CSV. We want to use the primary key instead of the CSV row number to group the rows. The group_by can do 2D group_by by passing a list of 2 keys. - -group_by_sort_key: String or a function - :ref:`group_by_sort_key_label` is used to define how dictionaries are sorted if multiple ones fall under one group. When this parameter is used, group_by converts the lists of dictionaries into a dictionary of keys to lists of dictionaries. Then, :ref:`group_by_sort_key_label` is used to sort between the list. - -hasher: default = DeepHash.sha256hex - Hash function to be used. If you don't want SHA256, you can use your own hash function - by passing hasher=hash. This is for advanced usage and normally you don't need to modify it. - -ignore_order : Boolean, default=False - :ref:`ignore_order_label` ignores order of elements when comparing iterables (lists) - Normally ignore_order does not report duplicates and repetition changes. - In order to report repetitions, set report_repetition=True in addition to ignore_order=True - -ignore_order_func : Function, default=None - :ref:`ignore_order_func_label` Sometimes single *ignore_order* parameter is not enough to do a diff job, - you can use *ignore_order_func* to determine whether the order of certain paths should be ignored - -ignore_string_type_changes: Boolean, default = False - :ref:`ignore_string_type_changes_label` - Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. - -ignore_numeric_type_changes: Boolean, default = False - :ref:`ignore_numeric_type_changes_label` - Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. - -ignore_type_in_groups: Tuple or List of Tuples, default = None - :ref:`ignore_type_in_groups_label` - ignores types when t1 and t2 are both within the same type group. - -ignore_type_subclasses: Boolean, default = False - :ref:`ignore_type_subclasses_label` - ignore type (class) changes when dealing with the subclasses of classes that were marked to be ignored. - -.. Note:: - ignore_type_subclasses was incorrectly doing the reverse of its job up until DeepDiff 6.7.1 - Please make sure to flip it in your use cases, when upgrading from older versions to 7.0.0 or above. - -ignore_uuid_types: Boolean, default = False - :ref:`ignore_uuid_types_label` - Whether to ignore UUID vs string type differences when comparing. When set to True, comparing a UUID object with its string representation will not report as a type change. - -ignore_string_case: Boolean, default = False - :ref:`ignore_string_case_label` - Whether to be case-sensitive or not when comparing strings. By setting ignore_string_case=True, strings will be compared case-insensitively. - -ignore_nan_inequality: Boolean, default = False - :ref:`ignore_nan_inequality_label` - Whether to ignore float('nan') inequality in Python. - - -ignore_private_variables: Boolean, default = True - :ref:`ignore_private_variables_label` - Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). - - -ignore_encoding_errors: Boolean, default = False - :ref:`ignore_encoding_errors_label` If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the :ref:`encodings_label` parameter. - - -zip_ordered_iterables: Boolean, default = False - :ref:`zip_ordered_iterables_label`: - When comparing ordered iterables such as lists, DeepDiff tries to find the smallest difference between the two iterables to report. That means that items in the two lists are not paired individually in the order of appearance in the iterables. Sometimes, that is not the desired behavior. Set this flag to True to make DeepDiff pair and compare the items in the iterables in the order they appear. - -iterable_compare_func: - :ref:`iterable_compare_func_label`: - There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a iterable_compare_func that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return True if it is a match, False if it is not a match or raise CannotCompare if it is unable to compare the two. - - -log_frequency_in_sec: Integer, default = 0 - :ref:`log_frequency_in_sec_label` - How often to log the progress. The default of 0 means logging progress is disabled. - If you set it to 20, it will log every 20 seconds. This is useful only when running DeepDiff - on massive objects that will take a while to run. If you are only dealing with small objects, keep it at 0 to disable progress logging. - -log_scale_similarity_threshold: float, default = 0.1 - :ref:`use_log_scale_label` along with :ref:`log_scale_similarity_threshold_label` can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. - -log_stacktrace: Boolean, default = False - If True, we log the stacktrace when logging errors. Otherwise we only log the error message. - -max_passes: Integer, default = 10000000 - :ref:`max_passes_label` defined the maximum number of passes to run on objects to pin point what exactly is different. This is only used when ignore_order=True. A new pass is started each time 2 iterables are compared in a way that every single item that is different from the first one is compared to every single item that is different in the second iterable. - -max_diffs: Integer, default = None - :ref:`max_diffs_label` defined the maximum number of diffs to run on objects to pin point what exactly is different. This is only used when ignore_order=True - -math_epsilon: Decimal, default = None - :ref:`math_epsilon_label` uses Python's built in Math.isclose. It defines a tolerance value which is passed to math.isclose(). Any numbers that are within the tolerance will not report as being different. Any numbers outside of that tolerance will show up as different. - -number_format_notation : string, default="f" - :ref:`number_format_notation_label` is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. - -number_to_string_func : function, default=None - :ref:`number_to_string_func_label` is an advanced feature to give the user the full control into overriding how numbers are converted to strings for comparison. The default function is defined in https://github.com/seperman/deepdiff/blob/master/deepdiff/helper.py and is called number_to_string. You can define your own function to do that. - -progress_logger: log function, default = logger.info - :ref:`progress_logger_label` defines what logging function to use specifically for progress reporting. This function is only used when progress logging is enabled which happens by setting log_frequency_in_sec to anything above zero. - -report_repetition : Boolean, default=False - :ref:`report_repetition_label` reports repetitions when set True - It only works when ignore_order is set to True too. - -significant_digits : int >= 0, default=None - :ref:`significant_digits_label` defines the number of digits AFTER the decimal point to be used in the comparison. However you can override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. - -truncate_datetime: string, default = None - :ref:`truncate_datetime_label` can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it - -threshold_to_diff_deeper: float, default = 0.33 - :ref:`threshold_to_diff_deeper_label` is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a new_value instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. - -use_enum_value: Boolean, default=False - :ref:`use_enum_value_label` makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. - -use_log_scale: Boolean, default=False - :ref:`use_log_scale_label` along with :ref:`log_scale_similarity_threshold_label` can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. - -verbose_level: 2 >= int >= 0, default = 1 - Higher verbose level shows you more details. - For example verbose level 1 shows what dictionary item are added or removed. - And verbose level 2 shows the value of the items that are added or removed too. - -view: string, default = text - :ref:`view_label` - Views are different "formats" of results. Each view comes with its own features. - The choices are text (the default) and tree. - The text view is the original format of the results. - The tree view allows you to traverse through the tree of results. So you can traverse through the tree and see what items were compared to what. - - -**Returns** - - A DeepDiff object that has already calculated the difference of the 2 items. The format of the object is chosen by the view parameter. - -**Supported data types** - -int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst new file mode 120000 index 00000000..d7168b6d --- /dev/null +++ b/docs/diff_doc.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/diff_doc.rst \ No newline at end of file diff --git a/docs/dsearch.rst b/docs/dsearch.rst deleted file mode 100644 index 4466c7a6..00000000 --- a/docs/dsearch.rst +++ /dev/null @@ -1,19 +0,0 @@ -:doc:`/index` - -.. _deepsearch_label: - -DeepSearch -========== - -.. toctree:: - :maxdepth: 3 - -.. automodule:: deepdiff.search - -.. autoclass:: grep - :members: - -.. autoclass:: DeepSearch - :members: - -Back to :doc:`/index` diff --git a/docs/dsearch.rst b/docs/dsearch.rst new file mode 120000 index 00000000..6c883259 --- /dev/null +++ b/docs/dsearch.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/dsearch.rst \ No newline at end of file diff --git a/docs/exclude_paths.rst b/docs/exclude_paths.rst deleted file mode 100644 index 0c9b78a6..00000000 --- a/docs/exclude_paths.rst +++ /dev/null @@ -1,130 +0,0 @@ -:doc:`/index` - -.. _exclude_paths_label: - -Exclude Paths -============= - -Exclude part of your object tree from comparison. -use exclude_paths and pass a set or list of paths to exclude, if only one item is being passed, then just put it there as a string. No need to pass it as a list then. - -Example - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths="root['ingredients']")) # one item pass it as a string - {} - >>> print (DeepDiff(t1, t2, exclude_paths=["root['ingredients']", "root['ingredients2']"])) # multiple items pass as a list or a set. - {} - -Also for root keys you don't have to pass as "root['key']". You can instead just pass the key: - -Example - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths="ingredients")) # one item pass it as a string - {} - >>> print (DeepDiff(t1, t2, exclude_paths=["ingredients", "ingredients2"])) # multiple items pass as a list or a set. - {} - - -.. _include_paths_label: - -Include Paths -============= - -Only include this part of your object tree in the comparison. -Use include_paths and pass a set or list of paths to limit diffing to only those paths. If only one item is being passed, just put it there as a string—no need to pass it as a list then. - -Example - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, include_paths="root['for life']")) # one item pass it as a string - {} - >>> print (DeepDiff(t1, t2, include_paths=["for life", "ingredients2"])) # multiple items pass as a list or a set and you don't need to pass the full path when dealing with root keys. So instead of "root['for life']" you can pass "for life" - {} - - -When passing include_paths, all the children of that path will be included too. - -Example - >>> t1 = { - ... "foo": {"bar": "potato"}, - ... "ingredients": ["no meat", "no eggs", "no dairy"] - ... } - >>> t2 = { - ... "foo": {"bar": "banana"}, - ... "ingredients": ["bread", "cheese"] - ... } - >>> DeepDiff(t1, t2, include_paths="foo") - {'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}} - - -.. _wildcard_paths_label: - -Wildcard (Glob) Paths ---------------------- - -Both ``exclude_paths`` and ``include_paths`` support wildcard patterns for matching multiple paths at once: - -- ``[*]`` or ``.*`` matches exactly **one** path segment (any key, index, or attribute). -- ``[**]`` or ``.**`` matches **zero or more** path segments at any depth. - -Wildcard patterns must use the full ``root`` prefix (shorthand keys are not supported for wildcards). - -Exclude all ``password`` fields regardless of the parent key: - >>> t1 = {"users": {"alice": {"name": "Alice", "password": "s1"}, "bob": {"name": "Bob", "password": "s2"}}} - >>> t2 = {"users": {"alice": {"name": "Alice", "password": "x1"}, "bob": {"name": "Bob", "password": "x2"}}} - >>> DeepDiff(t1, t2, exclude_paths=["root['users'][*]['password']"]) - {} - -Include only ``name`` fields at any depth: - >>> t1 = {"a": {"name": "A", "secret": 1}, "b": {"name": "B", "secret": 2}} - >>> t2 = {"a": {"name": "X", "secret": 1}, "b": {"name": "Y", "secret": 2}} - >>> result = DeepDiff(t1, t2, include_paths=["root[*]['name']"]) - >>> set(result.get('values_changed', {}).keys()) == {"root['a']['name']", "root['b']['name']"} - True - -Use ``[**]`` to match at any depth: - >>> t1 = {"config": {"db": {"password": "old"}, "cache": {"password": "old"}}} - >>> t2 = {"config": {"db": {"password": "new"}, "cache": {"password": "new"}}} - >>> DeepDiff(t1, t2, exclude_paths=["root[**]['password']"]) - {} - -Literal keys named ``*`` or ``**`` are not treated as wildcards when quoted: - >>> t1 = {"*": 1, "a": 2} - >>> t2 = {"*": 10, "a": 20} - >>> result = DeepDiff(t1, t2, exclude_paths=["root['*']"]) - >>> "root['a']" in result.get('values_changed', {}) - True - -When both ``exclude_paths`` and ``include_paths`` apply to the same path, exclusion takes precedence. - -Wildcards also work with ``DeepHash`` and ``DeepSearch`` exclude_paths. - - -.. _exclude_regex_paths_label: - -Exclude Regex Paths -------------------- - -You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. - >>> import re - >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] - >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] - >>> print(DeepDiff(t1, t2, exclude_regex_paths=r"root\[\d+\]\['b'\]")) - {} - >>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") - >>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) - {} - -example 2: - >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} - >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} - >>> DeepDiff(t1, t2, exclude_regex_paths="\['foo.'\]") # since it is one item in exclude_regex_paths, you don't have to put it in a list or a set. - {} - -Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. - - - -Back to :doc:`/index` diff --git a/docs/exclude_paths.rst b/docs/exclude_paths.rst new file mode 120000 index 00000000..a19e15d0 --- /dev/null +++ b/docs/exclude_paths.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/exclude_paths.rst \ No newline at end of file diff --git a/docs/extract.rst b/docs/extract.rst deleted file mode 100644 index 1c4ed730..00000000 --- a/docs/extract.rst +++ /dev/null @@ -1,13 +0,0 @@ -:doc:`/index` - -.. _extract_label: - - -Extract -======= - -.. automodule:: deepdiff.path - -.. autofunction:: extract - -Back to :doc:`/index` diff --git a/docs/extract.rst b/docs/extract.rst new file mode 120000 index 00000000..3a24fcaa --- /dev/null +++ b/docs/extract.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/extract.rst \ No newline at end of file diff --git a/docs/faq.rst b/docs/faq.rst deleted file mode 100644 index e6f30044..00000000 --- a/docs/faq.rst +++ /dev/null @@ -1,185 +0,0 @@ -:doc:`/index` - -F.A.Q -===== - -.. |qluster_link| raw:: html - - Qluster - -DeepDiff is now part of |qluster_link|. - -*If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* - -.. Note:: - |:mega:| **Please fill out our** `fast 5-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| - - -Q: DeepDiff report is not precise when ignore_order=True --------------------------------------------------------- - - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [ - ... { - ... "key": "some/pathto/customers/foo/", - ... "flags": 0, - ... "value": "" - ... }, - ... { - ... "key": "some/pathto/customers/foo/account_number", - ... "flags": 0, - ... "value": "somevalue1" - ... } - ... ] - >>> - >>> t2 = [ - ... { - ... "key": "some/pathto/customers/foo/account_number", - ... "flags": 0, - ... "value": "somevalue2" - ... }, - ... { - ... "key": "some/pathto/customers/foo/", - ... "flags": 0, - ... "value": "new" - ... } - ... ] - >>> - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {"root[0]['key']": {'new_value': 'some/pathto/customers/foo/account_number', - 'old_value': 'some/pathto/customers/foo/'}, - "root[0]['value']": {'new_value': 'somevalue2', - 'old_value': ''}, - "root[1]['key']": {'new_value': 'some/pathto/customers/foo/', - 'old_value': 'some/pathto/customers/foo/account_number'}, - "root[1]['value']": {'new_value': 'new', - 'old_value': 'somevalue1'}}} - -**Answer** - -This is explained in :ref:`cutoff_distance_for_pairs_label` and :ref:`cutoff_intersection_for_pairs_label` - -Bump up these 2 parameters to 1 and you get what you want: - - >>> pprint(DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=1, cutoff_intersection_for_pairs=1)) - {'values_changed': {"root[0]['value']": {'new_value': 'new', 'old_value': ''}, - "root[1]['value']": {'new_value': 'somevalue2', - 'old_value': 'somevalue1'}}} - - -Q: The report of changes in a nested dictionary is too granular ---------------------------------------------------------------- - -**Answer** - -Use :ref:`threshold_to_diff_deeper_label` - - >>> from deepdiff import DeepDiff - >>> t1 = {"veggie": "carrots"} - >>> t2 = {"meat": "carrots"} - >>> - >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0) - {'dictionary_item_added': ["root['meat']"], 'dictionary_item_removed': ["root['veggie']"]} - >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0.33) - {'values_changed': {'root': {'new_value': {'meat': 'carrots'}, 'old_value': {'veggie': 'carrots'}}}} - - - -Q: TypeError: Object of type type is not JSON serializable ----------------------------------------------------------- - -I'm trying to serialize the DeepDiff results into json and I'm getting the TypeError. - - >>> diff=DeepDiff(1, "a") - >>> diff - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 1, 'new_value': 'a'}}} - >>> json.dumps(diff) - Traceback (most recent call last): - File "", line 1, in - File ".../json/__init__.py", line 231, in dumps - return _default_encoder.encode(obj) - File ".../json/encoder.py", line 199, in encode - chunks = self.iterencode(o, _one_shot=True) - File ".../json/encoder.py", line 257, in iterencode - return _iterencode(o, 0) - File ".../json/encoder.py", line 179, in default - raise TypeError(f'Object of type {o.__class__.__name__} ' - TypeError: Object of type type is not JSON serializable - -**Answer** - -In order to serialize DeepDiff results into json, use to_json() - - >>> diff.to_json() - '{"type_changes": {"root": {"old_type": "int", "new_type": "str", "old_value": 1, "new_value": "a"}}}' - - -Q: How do I parse DeepDiff result paths? ----------------------------------------- - -**Answer** - -Use parse_path: - - >>> from deepdiff import parse_path - >>> parse_path("root[1][2]['age']") - [1, 2, 'age'] - >>> parse_path("root[1][2]['age']", include_actions=True) - [{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}] - >>> - >>> parse_path("root['joe'].age") - ['joe', 'age'] - >>> parse_path("root['joe'].age", include_actions=True) - [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}] - -Or use the tree view so you can use path(output_format='list'): - - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff - {'iterable_item_removed': [, ]} - >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. - >>> # One way to get one item from the set is to convert it to a list - >>> # And then get the first item of the list: - >>> removed = list(ddiff['iterable_item_removed'])[0] - >>> removed - - >>> - >>> parent = removed.up - >>> parent - - >>> parent.path() # gives you the string representation of the path - "root[4]['b']" - >>> parent.path(output_format='list') # gives you the list of keys and attributes that make up the path - [4, 'b'] - - -Q: Why my datetimes are reported in UTC? ----------------------------------------- - -**Answer** - -DeepDiff converts all datetimes into UTC. If a datetime is timezone naive, we assume it is in UTC too. -That is different than what Python does. Python assumes your timezone naive datetime is in your local timezone. However, you can override it to any other time zone such as your :ref:`default_timezone_label`. - - >>> from deepdiff import DeepDiff - >>> from datetime import datetime, timezone - >>> d1 = datetime(2020, 8, 31, 13, 14, 1) - >>> d2 = datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc) - >>> d1 == d2 - False - >>> DeepDiff(d1, d2) - {} - - >>> d3 = d2.astimezone(pytz.timezone('America/New_York')) - >>> DeepDiff(d1, d3) - {} - >>> d1 == d3 - False - - ---------- - -Back to :doc:`/index` diff --git a/docs/faq.rst b/docs/faq.rst new file mode 120000 index 00000000..c8c051ef --- /dev/null +++ b/docs/faq.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/faq.rst \ No newline at end of file diff --git a/docs/ignore_order.rst b/docs/ignore_order.rst deleted file mode 100644 index 5ca84318..00000000 --- a/docs/ignore_order.rst +++ /dev/null @@ -1,318 +0,0 @@ -:doc:`/index` - -.. _ignore_order_label: - -Ignore Order -============ - -DeepDiff by default compares objects in the order that it iterates through them in iterables. -In other words if you have 2 lists, then the first item of the lists are compared to each other, then the 2nd items and so on. -That makes DeepDiff be able to run in linear time. - -However, there are often times when you don't care about the order in which the items have appeared. -In such cases DeepDiff needs to do way more work in order to find the differences. - -There are a couple of parameters provided to you to have full control over. - - -List difference with ignore_order=False which is the default: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} - -Ignore Order ------------- - -List difference ignoring order or duplicates: (with the same dictionaries as above) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, ignore_order=True) - >>> print (ddiff) - {} - -.. _ignore_order_func_label: - -Dynamic Ignore Order --------------------- - -Sometimes single *ignore_order* parameter is not enough to do a diff job, -you can use *ignore_order_func* to determine whether the order of certain paths should be ignored - -List difference ignoring order with *ignore_order_func* - >>> t1 = {"set": [1,2,3], "list": [1,2,3]} - >>> t2 = {"set": [3,2,1], "list": [3,2,1]} - >>> ddiff = DeepDiff(t1, t2, ignore_order_func=lambda level: "set" in level.path()) - >>> print (ddiff) - { 'values_changed': { "root['list'][0]": {'new_value': 3, 'old_value': 1}, - "root['list'][2]": {'new_value': 1, 'old_value': 3}}} - - -Ignoring order when certain word in the path - >>> from deepdiff import DeepDiff - >>> t1 = {'a': [1, 2], 'b': [3, 4]} - >>> t2 = {'a': [2, 1], 'b': [4, 3]} - >>> DeepDiff(t1, t2, ignore_order=True) - {} - >>> def ignore_order_func(level): - ... return 'a' in level.path() - ... - >>> DeepDiff(t1, t2, ignore_order=True, ignore_order_func=ignore_order_func) - {'values_changed': {"root['b'][0]": {'new_value': 4, 'old_value': 3}, "root['b'][1]": {'new_value': 3, 'old_value': 4}}} - - -.. _report_repetition_label: - -Reporting Repetitions ---------------------- - -List difference ignoring order and reporting repetitions: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [1, 3, 1, 4] - >>> t2 = [4, 4, 1] - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': {'root[1]': 3}, - 'repetition_change': { 'root[0]': { 'new_indexes': [2], - 'new_repeat': 1, - 'old_indexes': [0, 2], - 'old_repeat': 2, - 'value': 1}, - 'root[3]': { 'new_indexes': [0, 1], - 'new_repeat': 2, - 'old_indexes': [3], - 'old_repeat': 1, - 'value': 4}}} - -.. _max_passes_label: - -Max Passes ----------- - -max_passes: Integer, default = 10000000 - Maximum number of passes to run on objects to pin point what exactly is different. This is only used when ignore_order=True - -If you have deeply nested objects, DeepDiff needs to run multiple passes in order to pin point the difference. -That can dramatically increase the time spent to find the difference. -You can control the maximum number of passes that can be run via the max_passes parameter. - -.. note:: - The definition of pass is whenever 2 iterable objects are being compared with each other and deepdiff decides to compare every single element of one iterable with every single element of the other iterable. - Refer to :ref:`cutoff_distance_for_pairs_label` and :ref:`cutoff_intersection_for_pairs_label` for more info on how DeepDiff decides to start a new pass. - -Max Passes Example - >>> from pprint import pprint - >>> from deepdiff import DeepDiff - >>> - >>> t1 = [ - ... { - ... 'key3': [[[[[1, 2, 4, 5]]]]], - ... 'key4': [7, 8], - ... }, - ... { - ... 'key5': 'val5', - ... 'key6': 'val6', - ... }, - ... ] - >>> - >>> t2 = [ - ... { - ... 'key5': 'CHANGE', - ... 'key6': 'val6', - ... }, - ... { - ... 'key3': [[[[[1, 3, 5, 4]]]]], - ... 'key4': [7, 8], - ... }, - ... ] - >>> - >>> for max_passes in (1, 2, 62, 65): - ... diff = DeepDiff(t1, t2, ignore_order=True, max_passes=max_passes, verbose_level=2) - ... print('-\n----- Max Passes = {} -----'.format(max_passes)) - ... pprint(diff) - ... - DeepDiff has reached the max number of passes of 1. You can possibly get more accurate results by increasing the max_passes parameter. - - - ----- Max Passes = 1 ----- - {'values_changed': {'root[0]': {'new_value': {'key5': 'CHANGE', 'key6': 'val6'}, - 'old_value': {'key3': [[[[[1, 2, 4, 5]]]]], - 'key4': [7, 8]}}, - 'root[1]': {'new_value': {'key3': [[[[[1, 3, 5, 4]]]]], - 'key4': [7, 8]}, - 'old_value': {'key5': 'val5', 'key6': 'val6'}}}} - DeepDiff has reached the max number of passes of 2. You can possibly get more accurate results by increasing the max_passes parameter. - - - ----- Max Passes = 2 ----- - {'values_changed': {"root[0]['key3'][0]": {'new_value': [[[[1, 3, 5, 4]]]], - 'old_value': [[[[1, 2, 4, 5]]]]}, - "root[1]['key5']": {'new_value': 'CHANGE', - 'old_value': 'val5'}}} - DeepDiff has reached the max number of passes of 62. You can possibly get more accurate results by increasing the max_passes parameter. - - - ----- Max Passes = 62 ----- - {'values_changed': {"root[0]['key3'][0][0][0][0]": {'new_value': [1, 3, 5, 4], - 'old_value': [1, 2, 4, 5]}, - "root[1]['key5']": {'new_value': 'CHANGE', - 'old_value': 'val5'}}} - DeepDiff has reached the max number of passes of 65. You can possibly get more accurate results by increasing the max_passes parameter. - - - ----- Max Passes = 65 ----- - {'values_changed': {"root[0]['key3'][0][0][0][0][1]": {'new_value': 3, - 'old_value': 2}, - "root[1]['key5']": {'new_value': 'CHANGE', - 'old_value': 'val5'}}} - - -.. note:: - If there are potential passes left to be run and the max_passes value is reached, DeepDiff will issue a warning. - However the most accurate result might have already been found when there are still potential passes left to be run. - - For example in the above example at max_passes=64, DeepDiff finds the optimal result however it has one more pass - to go before it has run all the potential passes. Hence just for the sake of example we are using max_passes=65 - as an example of a number that doesn't issue warnings. - -.. note:: - Also take a look at :ref:`max_passes_label` - -.. _cutoff_distance_for_pairs_label: - -Cutoff Distance For Pairs -------------------------- - -cutoff_distance_for_pairs : 1 >= float >= 0, default=0.3 - What is the threshold to consider 2 items as potential pairs. - Note that it is only used when ignore_order = True. - -cutoff_distance_for_pairs in combination with :ref:`cutoff_intersection_for_pairs_label` are the parameters that decide whether 2 objects to be paired with each other during ignore_order=True algorithm or not. Note that these parameters are mainly used for nested iterables. - -For example by going from the default of cutoff_distance_for_pairs=0.3 to 0.1, we have essentially disallowed the 1.0 and 20.0 to be paired with each other. As you can see, DeepDiff has decided that the :ref:`deep_distance_label` of 1.0 and 20.0 to be around 0.27. Since that is way above cutoff_distance_for_pairs of 0.1, the 2 items are not paired. As a result the lists containing the 2 numbers are directly compared with each other: - - >>> from deepdiff import DeepDiff - >>> t1 = [[1.0]] - >>> t2 = [[20.0]] - >>> DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=0.3) - {'values_changed': {'root[0][0]': {'new_value': 20.0, 'old_value': 1.0}}} - >>> DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=0.1) - {'values_changed': {'root[0]': {'new_value': [20.0], 'old_value': [1.0]}}} - >>> DeepDiff(1.0, 20.0, get_deep_distance=True) - {'values_changed': {'root': {'new_value': 20.0, 'old_value': 1.0}}, 'deep_distance': 0.2714285714285714} - - -.. _cutoff_intersection_for_pairs_label: - -Cutoff Intersection For Pairs ------------------------------ - -cutoff_intersection_for_pairs : 1 >= float >= 0, default=0.7 - What is the threshold to calculate pairs of items between 2 iterables. - For example 2 iterables that have nothing in common, do not need their pairs to be calculated. - Note that it is only used when ignore_order = True. - -Behind the scene DeepDiff takes the :ref:`deep_distance_label` of objects when running ignore_order=True. -The distance is between zero and 1. -A distance of zero means the items are equal. A distance of 1 means they are 100% different. -When comparing iterables, the cutoff_intersection_for_pairs is used to decide whether to compare every single item in each iterable -with every single item in the other iterable or not. If the distance between the 2 iterables is equal or bigger than the -cutoff_intersection_for_pairs, then the 2 iterables items are only compared as added or removed items and NOT modified items. -However, if the distance between 2 iterables is below the cutoff, every single item from each iterable will be compared to every -single item from the other iterable to find the closest "pair" of each item. - -.. note:: - The process of comparing every item to the other is very expensive so :ref:`cutoff_intersection_for_pairs_label` in combination with :ref:`cutoff_distance_for_pairs_label` is used to give acceptable results with much higher speed. - -With a low cutoff_intersection_for_pairs, the 2 iterables above will be considered too -far off from each other to get the individual pairs of items. -So numbers that are not only related to each other via their positions in the lists -and not their values are paired together in the results. - - >>> t1 = [1.0, 2.0, 3.0, 4.0, 5.0] - >>> t2 = [5.0, 3.01, 1.2, 2.01, 4.0] - >>> - >>> DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=0.1) - {'values_changed': {'root[1]': {'new_value': 3.01, 'old_value': 2.0}, 'root[2]': {'new_value': 1.2, 'old_value': 3.0}}, 'iterable_item_added': {'root[3]': 2.01}, 'iterable_item_removed': {'root[0]': 1.0}} - -With the cutoff_intersection_for_pairs of 0.7 (which is the default value), -the 2 iterables will be considered close enough to get pairs of items between the 2. -So 2.0 and 2.01 are paired together for example. - - >>> t1 = [1.0, 2.0, 3.0, 4.0, 5.0] - >>> t2 = [5.0, 3.01, 1.2, 2.01, 4.0] - >>> - >>> DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=0.7) - {'values_changed': {'root[2]': {'new_value': 3.01, 'old_value': 3.0}, 'root[0]': {'new_value': 1.2, 'old_value': 1.0}, 'root[1]': {'new_value': 2.01, 'old_value': 2.0}}} - - -As an example of how much this parameter can affect the results in deeply nested objects, please take a look at :ref:`distance_and_diff_granularity_label`. - - -.. _iterable_compare_func_label2: - -Iterable Compare Func ---------------------- - -New in DeepDiff 5.5.0 - -There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. - - -For example take the following objects: - - >>> from deepdiff import DeepDiff - >>> from deepdiff.helper import CannotCompare - >>> - >>> t1 = [ - ... { - ... 'id': 1, - ... 'value': [1] - ... }, - ... { - ... 'id': 2, - ... 'value': [7, 8, 1] - ... }, - ... { - ... 'id': 3, - ... 'value': [7, 8], - ... }, - ... ] - >>> - >>> t2 = [ - ... { - ... 'id': 2, - ... 'value': [7, 8] - ... }, - ... { - ... 'id': 3, - ... 'value': [7, 8, 1], - ... }, - ... { - ... 'id': 1, - ... 'value': [1] - ... }, - ... ] - >>> - >>> DeepDiff(t1, t2, ignore_order=True) - {'values_changed': {"root[2]['id']": {'new_value': 2, 'old_value': 3}, "root[1]['id']": {'new_value': 3, 'old_value': 2}}} - - -Now let's define a compare_func that takes 3 parameters: x, y and level. - - >>> def compare_func(x, y, level=None): - ... try: - ... return x['id'] == y['id'] - ... except Exception: - ... raise CannotCompare() from None - ... - >>> DeepDiff(t1, t2, ignore_order=True, iterable_compare_func=compare_func) - {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}} - -As you can see the results are different. Now items with the same ids are compared with each other. - -.. note:: - - The level parameter of the iterable_compare_func is only used when ignore_order=False. - -Back to :doc:`/index` diff --git a/docs/ignore_order.rst b/docs/ignore_order.rst new file mode 120000 index 00000000..373ed775 --- /dev/null +++ b/docs/ignore_order.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/ignore_order.rst \ No newline at end of file diff --git a/docs/ignore_types_or_values.rst b/docs/ignore_types_or_values.rst deleted file mode 100644 index da0d140f..00000000 --- a/docs/ignore_types_or_values.rst +++ /dev/null @@ -1,442 +0,0 @@ -:doc:`/index` - -Ignore Types Or Values -====================== - -DeepDiff provides numerous functionalities for the user to be able to define what paths, item types etc. to be included or ignored during the diffing process. - -As an example, you may have a type change in your objects: - -Type change - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'type_changes': { "root[4]['b']": { 'new_type': , - 'new_value': 'world\n\n\nEnd', - 'old_type': , - 'old_value': [1, 2, 3]}}} - -And if you don't care about the value of items that have changed type, you can set verbose level to 0 - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'old_type': }}} - - -But what if you did not care about the integer becoming a string with the same value? What if you didn't want 2 -> "2" to be considered a type or value change? Throughout this page you will find different examples of functionalities that can help you achieve what you want. - - -.. _exclude_types_label: - -Exclude Types -------------- - -exclude_types: list, default = None - List of object types to exclude from the report. - -Exclude certain types from comparison: - >>> l1 = logging.getLogger("test") - >>> l2 = logging.getLogger("test2") - >>> t1 = {"log": l1, 2: 1337} - >>> t2 = {"log": l2, 2: 1337} - >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) - {} - -.. _ignore_string_type_changes_label: - -Ignore String Type Changes --------------------------- - -ignore_string_type_changes: Boolean, default = False - Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. - - >>> DeepDiff(b'hello', 'hello', ignore_string_type_changes=True) - {} - >>> DeepDiff(b'hello', 'hello') - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': b'hello', 'new_value': 'hello'}}} - -.. _ignore_numeric_type_changes_label: - -Ignore Numeric Type Changes ---------------------------- - -ignore_numeric_type_changes: Boolean, default = False - Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. - -Example with Decimal - >>> from decimal import Decimal - >>> from deepdiff import DeepDiff - >>> - >>> t1 = Decimal('10.01') - >>> t2 = 10.01 - >>> - >>> DeepDiff(t1, t2) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': Decimal('10.01'), 'new_value': 10.01}}} - >>> DeepDiff(t1, t2, ignore_numeric_type_changes=True) - {} - -Note that this parameter only works for comparing numbers with numbers. If you compare a number to a string value of the number, this parameter does not solve your problem: - -Example with Fraction - >>> from fractions import Fraction - >>> from deepdiff import DeepDiff - >>> - >>> t1 = Fraction(1, 2) - >>> t2 = 0.5 - >>> - >>> DeepDiff(t1, t2) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': Fraction(1, 2), 'new_value': 0.5}}} - >>> DeepDiff(t1, t2, ignore_numeric_type_changes=True) - {} - -Example: - >>> t1 = Decimal('10.01') - >>> t2 = "10.01" - >>> - >>> DeepDiff(t1, t2, ignore_numeric_type_changes=True) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': Decimal('10.01'), 'new_value': '10.01'}}} - -If you face repeated patterns of comparing numbers to string values of numbers, you will want to preprocess your input to convert the strings into numbers before feeding it into DeepDiff. - - -.. _ignore_type_in_groups_label: - -Ignore Type In Groups ---------------------- - -ignore_type_in_groups: Tuple or List of Tuples, default = None - Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. While this feature is production ready for strings and numbers, it is still experimental with other custom lists of types, Hence it is recommended to use the shortcuts provided to you which are :ref:`ignore_string_type_changes_label` and :ref:`ignore_numeric_type_changes_label` unless you have a specific need beyond those 2 cases and you need do define your own ignore_type_in_groups. - - For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: - - 1. Set ignore_string_type_changes=True. - 2. Or set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . - - Now what if you want also typeA and typeB to be ignored when comparing against each other? - - 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] - 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] - - -Note: The example below shows you have to use this feature. For enum types, however, you can just use :ref:`use_enum_value_label` - -Example: Ignore Enum to string comparison - >>> from deepdiff import DeepDiff - >>> from enum import Enum - >>> class MyEnum1(Enum): - ... book = "book" - ... cake = "cake" - ... - >>> DeepDiff("book", MyEnum1.book) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 'book', 'new_value': }}} - >>> DeepDiff("book", MyEnum1.book, ignore_type_in_groups=[(Enum, str)]) - {} - - -Example: Ignore Type Number - Dictionary that contains float and integer. Note that this is exactly the same as passing ignore_numeric_type_changes=True. - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = {1: 1, 2: 2.22} - >>> t2 = {1: 1.0, 2: 2.22} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=2) - { 'type_changes': { 'root[1]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}}} - >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) - >>> pprint(ddiff, indent=2) - {} - -Example: Ignore Type Number - List that contains float and integer. Note that this is exactly the same as passing ignore_numeric_type_changes=True. - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [1, 2, 3] - >>> t2 = [1.0, 2.0, 3.0] - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=2) - { 'type_changes': { 'root[0]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}, - 'root[1]': { 'new_type': , - 'new_value': 2.0, - 'old_type': , - 'old_value': 2}, - 'root[2]': { 'new_type': , - 'new_value': 3.0, - 'old_type': , - 'old_value': 3}}} - >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) - >>> pprint(ddiff, indent=2) - {} - -You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: - >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] - - -ignore_type_in_groups example with custom objects: - >>> class Burrito: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> class Taco: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> burrito = Burrito() - >>> taco = Taco() - >>> - >>> burritos = [burrito] - >>> tacos = [taco] - >>> - >>> DeepDiff(burritos, tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) - {} - -.. note:: - You can pass list of tuples of types to ignore_type_in_groups or you can put actual values in the tuples and ignore_type_in_groups will extract the type from them. The example below has used (1, 1.0) instead of (int, float), - -Ignoring string to None comparison: - >>> from deepdiff import DeepDiff - >>> import datetime - >>> - >>> t1 = [1, 2, 3, 'a', None] - >>> t2 = [1.0, 2.0, 3.3, b'a', 'hello'] - >>> DeepDiff(t1, t2, ignore_type_in_groups=[(1, 1.0), (None, str, bytes)]) - {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} - >>> - -Ignoring datetime to string comparison - >>> now = datetime.datetime(2020, 5, 5) - >>> t1 = [1, 2, 3, 'a', now] - >>> t2 = [1, 2, 3, 'a', 'now'] - >>> DeepDiff(t1, t2, ignore_type_in_groups=[(str, bytes, datetime.datetime)]) - {'values_changed': {'root[4]': {'new_value': 'now', 'old_value': datetime.datetime(2020, 5, 5, 0, 0)}}} - - -.. _ignore_type_subclasses_label: - -Ignore Type Subclasses ----------------------- - -ignore_type_subclasses: Boolean, default = False - Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. - -.. Note:: - ignore_type_subclasses was incorrectly doing the reverse of its job up until DeepDiff 6.7.1 - Please make sure to flip it in your use cases, when upgrading from older versions to 7.0.0 or above. - - >>> from deepdiff import DeepDiff - >>> class ClassA: - ... def __init__(self, x, y): - ... self.x = x - ... self.y = y - ... - >>> class ClassB: - ... def __init__(self, x): - ... self.x = x - ... - >>> class ClassC(ClassB): - ... pass - ... - >>> obj_a = ClassA(1, 2) - >>> obj_c = ClassC(3) - >>> - >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=True) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': <__main__.ClassA object at 0x10076a2e8>, 'new_value': <__main__.ClassC object at 0x10082f630>}}} - >>> - >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=False) - {'values_changed': {'root.x': {'new_value': 3, 'old_value': 1}}, 'attribute_removed': [root.y]} - - -.. _ignore_uuid_types_label: - -Ignore UUID Types ------------------- - -ignore_uuid_types: Boolean, default = False - Whether to ignore UUID vs string type differences when comparing. When set to True, comparing a UUID object with its string representation will not report as a type change. - -Without ignore_uuid_types: - >>> import uuid - >>> from deepdiff import DeepDiff - >>> test_uuid = uuid.UUID('12345678-1234-5678-1234-567812345678') - >>> uuid_str = '12345678-1234-5678-1234-567812345678' - >>> DeepDiff(test_uuid, uuid_str) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': UUID('12345678-1234-5678-1234-567812345678'), 'new_value': '12345678-1234-5678-1234-567812345678'}}} - -With ignore_uuid_types=True: - >>> DeepDiff(test_uuid, uuid_str, ignore_uuid_types=True) - {} - -This works in both directions: - >>> DeepDiff(uuid_str, test_uuid, ignore_uuid_types=True) - {} - -The parameter works with nested structures like dictionaries and lists: - >>> dict1 = {'id': test_uuid, 'name': 'test'} - >>> dict2 = {'id': uuid_str, 'name': 'test'} - >>> DeepDiff(dict1, dict2, ignore_uuid_types=True) - {} - -Note that if the UUID and string represent different values, it will still report as a value change: - >>> different_uuid = uuid.UUID('87654321-4321-8765-4321-876543218765') - >>> DeepDiff(different_uuid, uuid_str, ignore_uuid_types=True) - {'values_changed': {'root': {'old_value': UUID('87654321-4321-8765-4321-876543218765'), 'new_value': '12345678-1234-5678-1234-567812345678'}}} - -This parameter can be combined with other ignore flags: - >>> data1 = {'id': test_uuid, 'name': 'TEST', 'count': 42} - >>> data2 = {'id': uuid_str, 'name': 'test', 'count': 42.0} - >>> DeepDiff(data1, data2, ignore_uuid_types=True, ignore_string_case=True, ignore_numeric_type_changes=True) - {} - - -.. _ignore_string_case_label: - -Ignore String Case ------------------- - -ignore_string_case: Boolean, default = False - Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. - - >>> DeepDiff(t1='Hello', t2='heLLO') - {'values_changed': {'root': {'new_value': 'heLLO', 'old_value': 'Hello'}}} - >>> DeepDiff(t1='Hello', t2='heLLO', ignore_string_case=True) - {} - -Ignore Nan Inequality ---------------------- - -ignore_nan_inequality: Boolean, default = False - Read more at :ref:`ignore_nan_inequality_label` - Whether to ignore float('nan') inequality in Python. - - -.. _ignore_private_variables_label: - -Ignore Private Variables ------------------------- - -ignore_private_variables: Boolean, default = True - Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). - - -.. _exclude_obj_callback_label: - -Exclude Obj Callback --------------------- - -exclude_obj_callback: function, default = None - A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. - This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. - - >>> def exclude_obj_callback(obj, path): - ... return True if "skip" in path or isinstance(obj, int) else False - ... - >>> t1 = {"x": 10, "y": "b", "z": "c", "skip_1": 0} - >>> t2 = {"x": 12, "y": "b", "z": "c", "skip_2": 0} - >>> DeepDiff(t1, t2, exclude_obj_callback=exclude_obj_callback) - {} - - -.. _exclude_obj_callback_strict_label: - -Exclude Obj Callback Strict ---------------------------- - -exclude_obj_callback_strict: function, default = None - A function that works the same way as exclude_obj_callback, but excludes elements from the result only if the function returns True for both elements - - >>> def exclude_obj_callback_strict(obj, path): - ... return True if isinstance(obj, int) and obj > 10 else False - ... - >>> t1 = {"x": 10, "y": "b", "z": "c"} - >>> t2 = {"x": 12, "y": "b", "z": "c"} - >>> DeepDiff(t1, t2, exclude_obj_callback=exclude_obj_callback_strict) - {} - >>> DeepDiff(t1, t2, exclude_obj_callback_strict=exclude_obj_callback_strict) - {'values_changed': {"root['x']": {'new_value': 12, 'old_value': 10}}} - - -.. _include_obj_callback_label: - -Include Obj Callback --------------------- - -include_obj_callback: function, default = None - A function that takes the object and its path and returns a Boolean. If True is returned, the object is included in the results, otherwise it is excluded. - This is to give the user a higher level of control than one can achieve via include_paths. - - >>> def include_obj_callback(obj, path): - ... return True if "include" in path or isinstance(obj, int) else False - ... - >>> t1 = {"x": 10, "y": "b", "z": "c", "include_me": "a"} - >>> t2 = {"x": 10, "y": "b", "z": "c", "include_me": "b"} - >>> DeepDiff(t1, t2, include_obj_callback=include_obj_callback) - {'values_changed': {"root['include_me']": {'new_value': "b", 'old_value': "a"}}} - - -.. _include_obj_callback_strict_label: - -Include Obj Callback Strict ---------------------------- - -include_obj_callback_strict: function, default = None - A function that works the same way as include_obj_callback, but includes elements in the result only if the function returns True for both elements. - - >>> def include_obj_callback_strict(obj, path): - ... return True if isinstance(obj, int) and obj > 10 else False - ... - >>> t1 = {"x": 10, "y": "b", "z": "c"} - >>> t2 = {"x": 12, "y": "b", "z": "c"} - >>> DeepDiff(t1, t2, include_obj_callback=include_obj_callback_strict) - {'values_changed': {"root['x']": {'new_value': 12, 'old_value': 10}}} - >>> DeepDiff(t1, t2, include_obj_callback_strict=include_obj_callback_strict) - {} - - -.. _truncate_datetime_label: - -Truncate Datetime ------------------ - -truncate_datetime: string, default = None - truncate_datetime can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it - - >>> import datetime - >>> from deepdiff import DeepDiff - >>> d1 = {'a': datetime.datetime(2020, 5, 17, 22, 15, 34, 913070)} - >>> d2 = {'a': datetime.datetime(2020, 5, 17, 22, 15, 39, 296583)} - >>> DeepDiff(d1, d2, truncate_datetime='minute') - {} - - -.. _use_enum_value_label: - -Use Enum Value --------------- - -use_enum_value: Boolean, default=False - Makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. - - >>> from enum import Enum - >>> from deepdiff import DeepDiff - - >>> - >>> class MyEnum2(str, Enum): - ... book = "book" - ... cake = "cake" - ... - >>> DeepDiff("book", MyEnum2.book) - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 'book', 'new_value': }}} - >>> DeepDiff("book", MyEnum2.book, use_enum_value=True) - {} - - -Back to :doc:`/index` diff --git a/docs/ignore_types_or_values.rst b/docs/ignore_types_or_values.rst new file mode 120000 index 00000000..cf43213a --- /dev/null +++ b/docs/ignore_types_or_values.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/ignore_types_or_values.rst \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 49040d42..00000000 --- a/docs/index.rst +++ /dev/null @@ -1,209 +0,0 @@ -.. DeepDiff documentation master file, created by - sphinx-quickstart on Mon Jul 20 06:06:44 2015. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - - -DeepDiff 8.7.0 documentation! -============================= - -******* -Modules -******* - -The DeepDiff library includes the following modules: - -- **DeepDiff** For Deep Difference of 2 objects. :doc:`/diff` - - It returns the deep difference of python objects. It can also be used to take the distance between objects. :doc:`/deep_distance` - -- **DeepSearch** Search for objects within other objects. :doc:`/dsearch` - -- **DeepHash** Hash any object based on their content even if they are not "hashable" in Python's eyes. :doc:`/deephash` - -- **Delta** Delta of objects that can be applied to other objects. Imagine git commits but for structured data. :doc:`/delta` - -- **Extract** For extracting a path from an object :doc:`/extract` - -- **Commandline** Most of the above functionality is also available via the commandline module :doc:`/commandline` - -*********** -What Is New -*********** - -DeepDiff 8-7-0 --------------- - - - migration note: - - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. - - Dropping support for Python 3.9 - - Support for python 3.14 - - Added support for callable ``group_by`` thanks to `echan5 `__ - - Added ``FlatDeltaDict`` TypedDict for ``to_flat_dicts`` return type - - Fixed colored view display when all list items are removed thanks to `yannrouillard `__ - - Fixed ``hasattr()`` swallowing ``AttributeError`` in ``__slots__`` handling for objects with ``__getattr__`` thanks to `tpvasconcelos `__ - - Fixed ``ignore_order=True`` missing int-vs-float type changes - - Always use t1 path for reporting thanks to `devin13cox `__ - - Fixed ``_convert_oversized_ints`` failing on NamedTuples - - Fixed orjson ``TypeError`` for integers exceeding 64-bit range - - Fixed parameter bug in ``to_flat_dicts`` where ``include_action_in_path`` and ``report_type_changes`` were not being passed through - - Fixed ``ignore_keys`` issue in ``detailed__dict__`` thanks to `vitalis89 `__ - - Fixed logarithmic similarity type hint thanks to `ljames8 `__ - -DeepDiff 8-6-2 --------------- - - - Security fix (CVE-2026-33155): Prevent denial-of-service via crafted pickle payloads that trigger massive memory allocation through the REDUCE opcode. Size-sensitive callables like ``bytes()`` and ``bytearray()`` are now wrapped to reject allocations exceeding 128 MB. - -DeepDiff 8-6-1 --------------- - - - Patched security vulnerability in the Delta class which was vulnerable to class pollution via its constructor, and when combined with a gadget available in DeltaDiff itself, it could lead to Denial of Service and Remote Code Execution (via insecure Pickle deserialization). - - -DeepDiff 8-6-0 --------------- - - - Added Colored View thanks to @mauvilsa - - Added support for applying deltas to NamedTuple thanks to @paulsc - - Fixed test_delta.py with Python 3.14 thanks to @Romain-Geissler-1A - - Added python property serialization to json - - Added ip address serialization - - Switched to UV from pip - - Added Claude.md - - Added uuid hashing thanks to @akshat62 - - Added ``ignore_uuid_types`` flag to DeepDiff to avoid type reports - when comparing UUID and string. - - Added comprehensive type hints across the codebase (multiple commits - for better type safety) - - Added support for memoryview serialization - - Added support for bytes serialization (non-UTF8 compatible) - - Fixed bug where group_by with numbers would leak type info into group - path reports - - Fixed bug in ``_get_clean_to_keys_mapping without`` explicit - significant digits - - Added support for python dict key serialization - - Enhanced support for IP address serialization with safe module imports - - Added development tooling improvements (pyright config, .envrc - example) - - Updated documentation and development instructions - - -DeepDiff 8-5-0 --------------- - - - Updating deprecated pydantic calls - - Switching to pyproject.toml - - Fix for moving nested tables when using iterable_compare_func. by - - Fix recursion depth limit when hashing numpy.datetime64 - - Moving from legacy setuptools use to pyproject.toml - - -DeepDiff 8-4-2 --------------- - - - fixes the type hints for the base - - fixes summarize so if json dumps fails, we can still get a repr of the results - - adds ipaddress support - - -********* -Tutorials -********* - -Tutorials can be found on `Zepworks blog `_ - - -************ -Installation -************ - -Install from PyPi:: - - pip install deepdiff - -If you want to use DeepDiff from commandline:: - - pip install "deepdiff[cli]" - -If you want to improve the performance of DeepDiff with certain processes such as json serialization:: - - pip install "deepdiff[optimize]" - -Read about DeepDiff optimizations at :ref:`optimizations_label` - -Importing ---------- - -.. code:: python - - >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects - >>> from deepdiff import grep, DeepSearch # For finding if item exists in an object - >>> from deepdiff import DeepHash # For hashing objects based on their contents - >>> from deepdiff import Delta # For creating delta of objects that can be applied later to other objects. - >>> from deepdiff import extract # For extracting a path from an object - - -.. note:: - if you want to use DeepDiff via commandline, make sure to run:: - - pip install "deepdiff[cli]" - -Then you can access the commands via: - -- DeepDiff - -.. code:: bash - - $ deep diff --help - -- Delta - -.. code:: bash - - $ deep patch --help - -- grep - -.. code:: bash - - $ deep grep --help - -- extract - -.. code:: bash - - $ deep extract --help - - -Supported data types --------------------- - -int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! - - -References -========== - -.. toctree:: - :maxdepth: 4 - - diff - dsearch - deephash - delta - extract - colored_view - commandline - changelog - authors - faq - support - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/index.rst b/docs/index.rst new file mode 120000 index 00000000..58da7f2c --- /dev/null +++ b/docs/index.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/index.rst \ No newline at end of file diff --git a/docs/numbers.rst b/docs/numbers.rst deleted file mode 100644 index c14fe5ef..00000000 --- a/docs/numbers.rst +++ /dev/null @@ -1,202 +0,0 @@ -:doc:`/index` - -Numbers -======= - -When dealing with numbers, DeepDiff provides the following functionalities: - -.. _significant_digits_label: - -Significant Digits ------------------- - -significant_digits : int >= 0, default=None - -significant_digits defines the number of digits AFTER the decimal point to be used in the comparison. However you can override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. - -.. note:: - Setting significant_digits will affect ANY number comparison. - -If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 55. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. - -.. note:: - significant_digits by default uses "{:.Xf}".format(Your Number) behind the scene to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. - - As a side note, please pay attention that adding digits to your floating point can result in small differences in the results. For example: - "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 - - For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) - -.. note:: - To override what significant digits mean and switch it to scientific notation, use number_format_notation="e" - Behind the scene that switches DeepDiff to use "{:.Xe}".format(Your Number) where X=significant_digits. - -**Examples:** - -Approximate decimals comparison (Significant digits after the point): - >>> t1 = Decimal('1.52') - >>> t2 = Decimal('1.57') - >>> DeepDiff(t1, t2, significant_digits=0) - {} - >>> DeepDiff(t1, t2, significant_digits=1) - {'values_changed': {'root': {'new_value': Decimal('1.57'), 'old_value': Decimal('1.52')}}} - -Approximate fractions comparison (Significant digits after the point): - >>> from fractions import Fraction - >>> t1 = Fraction(22, 7) # 3.142857... - >>> t2 = Fraction(355, 113) # 3.141592... - >>> DeepDiff(t1, t2, significant_digits=2) - {} - >>> DeepDiff(t1, t2, significant_digits=3) - {'values_changed': {'root': {'new_value': Fraction(355, 113), 'old_value': Fraction(22, 7)}}} - -Approximate float comparison (Significant digits after the point): - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> pprint(DeepDiff(t1, t2, significant_digits=3)) - {} - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} - >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) - {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} - - -.. _number_format_notation_label: - -Number Format Notation ----------------------- - -number_format_notation : string, default="f" - number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. - -**Examples:** - -Approximate number comparison (significant_digits after the decimal point in scientific notation) - >>> DeepDiff(1024, 1020, significant_digits=2, number_format_notation="f") # default is "f" - {'values_changed': {'root': {'new_value': 1020, 'old_value': 1024}}} - >>> DeepDiff(1024, 1020, significant_digits=2, number_format_notation="e") - {} - -.. _number_to_string_func_label: - -Number To String Function -------------------------- - -number_to_string_func : function, default=None - In many cases DeepDiff converts numbers to strings in order to compare them. For example when ignore_order=True, when significant digits parameter is defined or when the ignore_numeric_type_changes=True. - In its simplest form, the number_to_string_func is "{:.Xf}".format(Your Number) where X is the significant digits and the number_format_notation is left as the default of "f" meaning fixed point. - The number_to_string_func parameter gives the user the full control into overriding how numbers are converted to strings for comparison. The default function is defined in https://github.com/seperman/deepdiff/blob/master/deepdiff/helper.py and is called number_to_string. You can define your own custom function instead of the default one in the helper module. - -Defining your own number_to_string_func - Lets say you want the numbers comparison happen only for numbers above 100 for some reason. - - >>> from deepdiff import DeepDiff - >>> from deepdiff.helper import number_to_string - >>> def custom_number_to_string(number, *args, **kwargs): - ... number = 100 if number < 100 else number - ... return number_to_string(number, *args, **kwargs) - ... - >>> t1 = [10, 12, 100000] - >>> t2 = [50, 63, 100021] - >>> DeepDiff(t1, t2, significant_digits=3, number_format_notation="e") - {'values_changed': {'root[0]': {'new_value': 50, 'old_value': 10}, 'root[1]': {'new_value': 63, 'old_value': 12}}} - >>> - >>> DeepDiff(t1, t2, significant_digits=3, number_format_notation="e", - ... number_to_string_func=custom_number_to_string) - {} - - -Ignore Numeric Type Changes ---------------------------- - -ignore_numeric_type_changes: Boolean, default = False -read more at :ref:`ignore_numeric_type_changes_label` - -.. _ignore_nan_inequality_label: - -Ignore Nan Inequality ---------------------- - -ignore_nan_inequality: Boolean, default = False - Whether to ignore float('nan') inequality in Python. Note that this is a cPython "feature". Some versions of Pypy3 have nan==nan where in cPython nan!=nan - - >>> float('nan') == float('nan') - False - >>> DeepDiff(float('nan'), float('nan')) - {'values_changed': {'root': {'new_value': nan, 'old_value': nan}}} - >>> DeepDiff(float('nan'), float('nan'), ignore_nan_inequality=True) - {} - -.. _math_epsilon_label: - -Math Epsilon ------------- - -math_epsilon: Decimal, default = None - math_epsilon uses Python's built in Math.isclose. It defines a tolerance value which is passed to math.isclose(). Any numbers that are within the tolerance will not report as being different. Any numbers outside of that tolerance will show up as different. - - For example for some sensor data derived and computed values must lie in a certain range. It does not matter that they are off by e.g. 1e-5. - - To check against that the math core module provides the valuable isclose() function. It evaluates the being close of two numbers to each other, with reference to an epsilon (abs_tol). This is superior to the format function, as it evaluates the mathematical representation and not the string representation. - -Example with Decimal: - >>> from decimal import Decimal - >>> d1 = {"a": Decimal("7.175")} - >>> d2 = {"a": Decimal("7.174")} - >>> DeepDiff(d1, d2, math_epsilon=0.01) - {} - -Example with Fraction: - >>> from fractions import Fraction - >>> d1 = {"a": Fraction(7175, 1000)} - >>> d2 = {"a": Fraction(7174, 1000)} - >>> DeepDiff(d1, d2, math_epsilon=0.01) - {} - -.. note:: - math_epsilon cannot currently handle the hashing of values, which is done when :ref:`ignore_order_label` is True. - - -.. _use_log_scale_label: - -Use Log Scale -------------- - -use_log_scale: Boolean, default=False - use_log_scale along with :ref:`log_scale_similarity_threshold_label` can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. - - - >>> from deepdiff import DeepDiff - - >>> t1 = {'foo': 110, 'bar': 306} - >>> t2 = {'foo': 140, 'bar': 298} - >>> - >>> DeepDiff(t1, t2) - {'values_changed': {"root['foo']": {'new_value': 140, 'old_value': 110}, "root['bar']": {'new_value': 298, 'old_value': 306}}} - - >>> DeepDiff(t1, t2, use_log_scale=True, log_scale_similarity_threshold=0.01) - {'values_changed': {"root['foo']": {'new_value': 140, 'old_value': 110}, "root['bar']": {'new_value': 298, 'old_value': 306}}} - - >>> DeepDiff(t1, t2, use_log_scale=True, log_scale_similarity_threshold=0.1) - {'values_changed': {"root['foo']": {'new_value': 140, 'old_value': 110}}} - - >>> DeepDiff(t1, t2, use_log_scale=True, log_scale_similarity_threshold=0.3) - {} - - -.. _log_scale_similarity_threshold_label: - -Log Scale Similarity Threshold ------------------------------- - -log_scale_similarity_threshold: float, default = 0.1 - :ref:`use_log_scale_label` along with log_scale_similarity_threshold can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. See the example above. - - -Performance Improvement of Numbers diffing ------------------------------------------- - -Take a look at :ref:`diffing_numbers_optimizations_label` - -Back to :doc:`/index` diff --git a/docs/numbers.rst b/docs/numbers.rst new file mode 120000 index 00000000..d1621698 --- /dev/null +++ b/docs/numbers.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/numbers.rst \ No newline at end of file diff --git a/docs/optimizations.rst b/docs/optimizations.rst deleted file mode 100644 index 5bc77682..00000000 --- a/docs/optimizations.rst +++ /dev/null @@ -1,287 +0,0 @@ -:doc:`/index` - -.. _optimizations_label: - -Optimizations -============= - -If you are dealing with large nested objects and ignore_order=True, chances are DeepDiff takes a while to calculate the diff. Here are some tips that may help you with optimizations and progress report. - - -Optimized Libraries -------------------- - -If you dump DeepDiff or Delta objects as json, you can improve the performance by installing orjson. -DeepDiff will automatically use orjson instead of Python's built-in json library to do json serialization. - - pip install "deepdiff[optimize]" - - -Max Passes ----------- - -:ref:`max_passes_label` comes with the default of 10000000. -If you don't need to exactly pinpoint the difference and you can get away with getting a less granular report, you can reduce the number of passes. It is recommended to get a diff of your objects with the defaults max_passes and take a look at the stats by running :ref:`get_stats_label` before deciding to reduce this number. In many cases reducing this number does not yield faster results. - -A new pass is started each time 2 iterables are compared in a way that every single item that is different from the first one is compared to every single item that is different in the second iterable. - -.. _max_diffs_label: - -Max Diffs ---------- - -max_diffs: Integer, default = None - max_diffs defined the maximum number of diffs to run on objects to pin point what exactly is different. This is only used when ignore_order=True. Every time 2 individual items are compared a diff is counted. The default value of None means there is no limit in the number of diffs that will take place. Any positive integer can make DeepDiff stop doing the calculations upon reaching that max_diffs count. - -You can run diffs and then :ref:`get_stats_label` to see how many diffs and passes have happened. - - >>> from deepdiff import DeepDiff - >>> diff=DeepDiff(1, 2) - >>> diff - {'values_changed': {'root': {'new_value': 2, 'old_value': 1}}} - >>> diff.get_stats() - {'PASSES COUNT': 0, 'DIFF COUNT': 1, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} - >>> diff=DeepDiff([[1,2]], [[2,3,1]]) - >>> diff.get_stats() - {'PASSES COUNT': 0, 'DIFF COUNT': 8, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} - >>> diff=DeepDiff([[1,2]], [[2,3,1]], ignore_order=True) - >>> diff.get_stats() - {'PASSES COUNT': 3, 'DIFF COUNT': 6, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False} - -.. note:: - Compare :ref:`max_diffs_label` with :ref:`max_passes_label` - - -.. _cache_size_label: - -Cache Size ----------- - -cache_size : int >= 0, default=0 - Cache size to be used to improve the performance. A cache size of zero means it is disabled. - Using the cache_size can dramatically improve the diff performance especially for the nested objects at the cost of more memory usage. However if cache hits rate is very low, having a cache actually reduces the performance. - -Cache Examples --------------- - -For example lets take a look at the performance of the benchmark_deeply_nested_a in the `DeepDiff-Benchmark repo `_ . - -No Cache -^^^^^^^^ - -With the no cache option we have the following stats: - - {'PASSES COUNT': 11234, 'DIFF COUNT': 107060, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 10} - -Yes it has taken 10 seconds to do the diff! - -.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png - :alt: cache_size=0 - - cache_size=0 - -Cache Size 500 -^^^^^^^^^^^^^^ - -With a cache size of 500, we are doing the same diff in 2.5 seconds! And the memory usage has not changed. It is still hovering around 100Mb. - - {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} - -As you can see the number of passes and diff counts have gone down and instead the distance cache hit count has gone up. - -.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=500__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png - :alt: cache_size=500 - - cache_size=500 - - -Cache Size 500 and Cache Tuning Sample Size 500 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -With a cache size of 500, we set the :ref:`cache_tuning_sample_size_label` to be 500 too. And we have a slight improvement. we are doing the same diff in 2 seconds now. And the memory usage has not changed. It is still hovering around 100Mb. - - {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} - -As you can see in this case none of the stats have changed compared to the previous stats. - -.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=500__cache_tuning_sample_size=500__cutoff_intersection_for_pairs=1.png - :alt: cache_size=500 cache_tuning_sample_size=500 - - cache_size=500 cache_tuning_sample_size=500 - - -Cache Size of 5000 -^^^^^^^^^^^^^^^^^^ - -Let's pay a little attention to our stats. Particularly to 'DISTANCE CACHE HIT COUNT': 11847 and the fact that the memory usage has not changed so far. What if we bump the cache_size to 5000 and disable cache_tuning_sample_size? - - {'PASSES COUNT': 1486, 'DIFF COUNT': 6637, 'DISTANCE CACHE HIT COUNT': 3440, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 0} - -We get the result calculated below 1 second! And the memory usage is only slightly above 100Mb. - -.. figure:: _static/benchmark_deeply_nested_a__3.8__ignore_order=True__cache_size=5000__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png - :alt: cache_size=5000 - - cache_size=5000 - - - -.. _cache_tuning_sample_size_label: - -Cache Tuning Sample Size ------------------------- - -cache_tuning_sample_size : int >= 0, default = 0 - cache_tuning_sample_size is an experimental feature. It works hands in hands with the :ref:`cache_size_label`. When cache_tuning_sample_size is set to anything above zero, it will sample the cache usage with the passed sample size and decide whether to use the cache or not. And will turn it back on occasionally during the diffing process. This option can be useful if you are not sure if you need any cache or not. However you will gain much better performance with keeping this parameter zero and running your diff with different cache sizes and benchmarking to find the optimal cache size. - -.. note:: - A good start with cache_tuning_sample_size is to set it to the size of your cache. - - -.. _diffing_numbers_optimizations_label: - -Optimizations for Diffing Numbers ---------------------------------- - -If you are diffing lists of python numbers, you could get performance improvement just by installing numpy. DeepDiff will use Numpy to improve the performance behind the scene. - -For example lets take a look at the performance of the benchmark_array_no_numpy vs. benchmark_numpy_array in the `DeepDiff-Benchmark repo `_. - -In this specific test, we have 2 lists of numbers that have nothing in common: `mat1 `_ and `mat2 `_ . - -No Cache and No Numpy -^^^^^^^^^^^^^^^^^^^^^ - -With the no cache option and no Numpy installed we have the following stats: - - {'PASSES COUNT': 1, 'DIFF COUNT': 439944, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 30} - -Yes it has taken 30 seconds to do the diff! - -.. figure:: _static/benchmark_array_no_numpy__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png - :alt: cache_size=0 and no Numpy - - cache_size=0 and no Numpy - -Cache Size 10000 and No Numpy -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -What if we increase the cache size to 10000? - - {'PASSES COUNT': 1, 'DIFF COUNT': 439944, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 35} - -Not only it didn't help, it increased the diff time by 15%!! - -Worse, if you look at the stats you see that the cache hit count is zero. This has happened since the 2 lists of items have nothing in common and hence caching the results does not improve the performance. - - -.. figure:: _static/benchmark_array_no_numpy__3.8__ignore_order=True__cache_size=10000__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png - :alt: cache_size=10000 and no Numpy - - cache_size=10000 and no Numpy - -No Cache and Numpy -^^^^^^^^^^^^^^^^^^ - -Let's install Numpy now. Set the cache_size=0 and run the diff again. - -Yay, the same diff is done in 5 seconds! - - {'PASSES COUNT': 1, 'DIFF COUNT': 1348, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 5} - -As you can see the memory usage has gone up from around 500Mb to around 630Mb. - -.. figure:: _static/benchmark_numpy_array__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__cutoff_intersection_for_pairs=1.png - :alt: Numpy but no cache - - Numpy but no cache - - -Pypy ----- - -If you are diffing big blobs of data that do not mainly include numbers, you may gain some performance improvement by running DeepDiff on Pypy3 instead of cPython. - -For example lets take a look at the performance of the benchmark_big_jsons in the `DeepDiff-Benchmark repo `_. - -First we will run it on cPython 3.8: - -It takes around 17.5 seconds and 40Mb of memory: - -.. figure:: _static/benchmark_big_jsons__3.8__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__max_diffs=300000__max_passes=40000__cutoff_intersection_for_pairs=1.png - :alt: Nested blob of text diffed in Python3.8 - - Nested blob of text diffed in Python3.8 - -And then we run it in Pypy3.6-7.3.0. It takes 12 seconds now but around 110Mb of memory. - -.. figure:: _static/benchmark_big_jsons__pypy3.6__ignore_order=True__cache_size=0__cache_tuning_sample_size=0__max_diffs=300000__max_passes=40000__cutoff_intersection_for_pairs=1.png - :alt: Nested blob of text diffed in Pypy3.6-7.3.0 - - Nested blob of text diffed in Pypy3.6-7.3.0 - -.. note:: - Note that if you are diffing numbers, and have Numpy installed as recommended, cPython will have a better performance than Pypy. But if you are diffing blobs of mixed strings and some numbers, Pypy will have a better CPU performance and worse memory usage. - - -Cutoff Intersection For Pairs ------------------------------ - -:ref:`cutoff_intersection_for_pairs_label` which is only used when ignore_order=True can have a huge affect on the granularity of the results and the performance. A value of zero essentially stops DeepDiff from doing passes while a value of 1 forced DeepDiff to do passes on iterables even when they are very different. Running passes is an expensive operation. - -As an example of how much this parameter can affect the results in deeply nested objects, please take a look at :ref:`distance_and_diff_granularity_label`. - -.. _cache_purge_level: - -Cache Purge Level ------------------ - -cache_purge_level: int, 0, 1, or 2. default=1 - cache_purge_level defines what objects in DeepDiff should be deleted to free the memory once the diff object is calculated. If this value is set to zero, most of the functionality of the diff object is removed and the most memory is released. A value of 1 preserves all the functionalities of the diff object. A value of 2 also preserves the cache and hashes that were calculated during the diff calculations. In most cases the user does not need to have those objects remained in the diff unless for investigation purposes. - - -.. _zip_ordered_iterables_label: - -Zip Ordered Iterables ---------------------- - -zip_ordered_iterables: Boolean, default = False - When comparing ordered iterables such as lists, DeepDiff tries to find the smallest difference between the two iterables to report. That means that items in the two lists are not paired individually in the order of appearance in the iterables. Sometimes, that is not the desired behavior. Set this flag to True to make DeepDiff pair and compare the items in the iterables in the order they appear. - - - >>> from pprint import pprint - >>> from deepdiff import DeepDiff - >>> t1 = ["a", "b", "d", "e"] - >>> t2 = ["a", "b", "c", "d", "e"] - >>> DeepDiff(t1, t2) - {'iterable_item_added': {'root[2]': 'c'}} - - When this flag is set to True and ignore_order=False, diffing will be faster. - - >>> diff=DeepDiff(t1, t2, zip_ordered_iterables=True) - >>> pprint(diff) - {'iterable_item_added': {'root[4]': 'e'}, - 'values_changed': {'root[2]': {'new_value': 'c', 'old_value': 'd'}, - 'root[3]': {'new_value': 'd', 'old_value': 'e'}}} - - -.. _threshold_to_diff_deeper_label: - -Threshold To Diff Deeper ------------------------- - -threshold_to_diff_deeper: float, default = 0.33 - threshold_to_diff_deeper is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a new_value instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. - - - >>> from deepdiff import DeepDiff - >>> t1 = {"veggie": "carrots"} - >>> t2 = {"meat": "carrots"} - >>> - >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0) - {'dictionary_item_added': ["root['meat']"], 'dictionary_item_removed': ["root['veggie']"]} - >>> DeepDiff(t1, t2, threshold_to_diff_deeper=0.33) - {'values_changed': {'root': {'new_value': {'meat': 'carrots'}, 'old_value': {'veggie': 'carrots'}}}} - - -Back to :doc:`/index` diff --git a/docs/optimizations.rst b/docs/optimizations.rst new file mode 120000 index 00000000..9c87ba25 --- /dev/null +++ b/docs/optimizations.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/optimizations.rst \ No newline at end of file diff --git a/docs/other.rst b/docs/other.rst deleted file mode 100644 index c380e356..00000000 --- a/docs/other.rst +++ /dev/null @@ -1,55 +0,0 @@ -:doc:`/index` - -Other Parameters -================ - - -.. _encodings_label: - -Encodings ---------- - -significant_digits : int >= 0, default=None - -Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out :ref:`ignore_encoding_errors_label` if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] - -The reason the decoding of bytes to string is needed is that when `ignore_order = True` we calculate the hash of the objects in order to facilitate in diffing them. In order to calculate the hash, we serialize all objects into strings. During the serialization we may encounter issues with character encodings. - -**Examples:** - -Comparing bytes that have non UTF-8 encoding: - >>> from deepdiff import DeepDiff - >>> item = b"\xbc cup of flour" - >>> DeepDiff([b'foo'], [item], ignore_order=True) - Traceback (most recent call last): - raise UnicodeDecodeError( - UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbc in position 0: Can not produce a hash for root: invalid start byte in 'p of flo...'. Please either pass ignore_encoding_errors=True or pass the encoding via encodings=['utf-8', '...']. - -Let's try to pass both 'utf-8' and 'latin-1' as encodings to be tried: - >>> DeepDiff([b'foo'], [item], encodings=['utf-8', 'latin-1'], ignore_order=True) - {'values_changed': {'root[0]': {'new_value': b'\xbc cup of flour', 'old_value': b'foo'}}} - - -.. _ignore_encoding_errors_label: - -Ignore Encoding Errors ----------------------- - -ignore_encoding_errors: Boolean, default = False - -If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. - -We can generally get the same results as above example if we just pass `ignore_encoding_errors=True`. However it comes at the cost of less accuracy of the results. - >>> DeepDiff([b'foo'], [b"\xbc cup of flour"], ignore_encoding_errors=True, ignore_order=True) - {'values_changed': {'root[0]': {'new_value': b'\xbc cup of flour', 'old_value': b'foo'}}} - -For example if we replace `foo` with ` cup of flour`, we have bytes that are only different in the problematic character. Ignoring that character means DeepDiff will consider these 2 strings to be equal since their hash becomes the same. Note that we only hash items when `ignore_order=True`. - >>> DeepDiff([b" cup of flour"], [b"\xbc cup of flour"], ignore_encoding_errors=True, ignore_order=True) - {} - -But if we had passed the proper encoding, it would have detected that these 2 bytes are different: - >>> DeepDiff([b" cup of flour"], [b"\xbc cup of flour"], encodings=['latin-1'], ignore_order=True) - {'values_changed': {'root[0]': {'new_value': b'\xbc cup of flour', 'old_value': b' cup of flour'}}} - - -Back to :doc:`/index` diff --git a/docs/other.rst b/docs/other.rst new file mode 120000 index 00000000..a47e440d --- /dev/null +++ b/docs/other.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/other.rst \ No newline at end of file diff --git a/docs/search_doc.rst b/docs/search_doc.rst deleted file mode 100644 index 7039281f..00000000 --- a/docs/search_doc.rst +++ /dev/null @@ -1,388 +0,0 @@ -:orphan: - -**DeepHash** - -DeepHash calculates the hash of objects based on their contents in a deterministic way. -This way 2 objects with the same content should have the same hash. - -The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. -For example you can use DeepHash to calculate the hash of a set or a dictionary! - -At the core of it, DeepHash is a deterministic serialization of your object into a string so it -can be passed to a hash function. By default it uses SHA256. You have the option to pass any other hashing function to be used instead. - -**Import** - >>> from deepdiff import DeepHash - -**Parameters** - -obj : any object, The object to be hashed based on its content. - - -apply_hash: Boolean, default = True - DeepHash at its core is doing deterministic serialization of objects into strings. - Then it hashes the string. - The only time you want the apply_hash to be False is if you want to know what - the string representation of your object is BEFORE it gets hashed. - - -exclude_types: list, default = None - List of object types to exclude from hashing. - - -exclude_paths: list, default = None - List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path. - Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. - - -include_paths: list, default = None - List of the only paths to include in the report. If only one item, you can pass it as a string. - Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. - - -exclude_regex_paths: list, default = None - List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one regex path. - - -exclude_obj_callback - function, default = None - A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. - This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. - - -encodings: List, default = None - Character encodings to iterate through when we convert bytes into strings. You may want to pass an explicit list of encodings in your objects if you start getting UnicodeDecodeError from DeepHash. Also check out ignore_encoding_errors if you can get away with ignoring these errors and don't want to bother with an explicit list of encodings but it will come at the price of slightly less accuracy of the final results. Example: encodings=["utf-8", "latin-1"] - - -hashes: dictionary, default = empty dictionary - A dictionary of {object or object id: object hash} to start with. - Any object that is encountered and it is already in the hashes dictionary or its id is in the hashes dictionary, - will re-use the hash that is provided by this dictionary instead of re-calculating - its hash. This is typically used when you have a series of objects to be hashed and there might be repeats of the same object. - - -hasher: function. default = DeepHash.sha256hex - hasher is the hashing function. The default is DeepHash.sha256hex. - But you can pass another hash function to it if you want. - For example a cryptographic hash function or Python's builtin hash function. - All it needs is a function that takes the input in string format and returns the hash. - - You can use it by passing: hasher=hash for Python's builtin hash. - - The following alternative is already provided: - - - hasher=DeepHash.sha1hex - - Note that prior to DeepDiff 5.2, Murmur3 was the default hash function. - But Murmur3 is removed from DeepDiff dependencies since then. - - -ignore_repetition: Boolean, default = True - If repetitions in an iterable should cause the hash of iterable to be different. - Note that the deepdiff diffing functionality lets this to be the default at all times. - But if you are using DeepHash directly, you can set this parameter. - - -ignore_type_in_groups - Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. - The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. - - For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: - - 1. Set ignore_string_type_changes=True which is the default. - 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . - - Now what if you want also typeA and typeB to be ignored when comparing agains each other? - - 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] - 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] - -ignore_string_type_changes: Boolean, default = True - string type conversions should not affect the hash output when this is set to True. - For example "Hello" and b"Hello" should produce the same hash. - - By setting it to True, both the string and bytes of hello return the same hash. - - -ignore_numeric_type_changes: Boolean, default = False - numeric type conversions should not affect the hash output when this is set to True. - For example 10, 10.0 and Decimal(10) should produce the same hash. - When ignore_numeric_type_changes is set to True, all numbers are converted - to strings with the precision of significant_digits parameter and number_format_notation notation. - If no significant_digits is passed by the user, a default value of 12 is used. - - -ignore_type_subclasses - Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. - - -ignore_string_case - Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. - - -ignore_private_variables: Boolean, default = True - Whether to exclude the private variables in the calculations or not. It only affects variables that start with double underscores (__). - - -ignore_encoding_errors: Boolean, default = False - If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the encodings parameter. - -ignore_iterable_order: Boolean, default = True - If order of items in an iterable should not cause the hash of the iterable to be different. - -number_format_notation : string, default="f" - number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. - - -significant_digits : int >= 0, default=None - By default the significant_digits compares only that many digits AFTER the decimal point. However you can set override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. - - Important: This will affect ANY number comparison when it is set. - - Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 12. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. - - Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. - - Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 - - For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) - - When you set the number_format_notation="e", we use "{:.Xe}".format(Your Number) where X=significant_digits. - -truncate_datetime: string, default = None - Can take value one of 'second', 'minute', 'hour', 'day' and truncate with this value datetime objects before hashing it - - - -**Returns** - A dictionary of {item: item hash}. - If your object is nested, it will build hashes of all the objects it contains too. - - -.. note:: - DeepHash output is not like conventional hash functions. It is a dictionary of object IDs to their hashes. This happens because DeepHash calculates the hash of the object and any other objects found within the object in a recursive manner. If you only need the hash of the object you are passing, all you need to do is to do: - - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - >>> DeepHash(obj)[obj] # doctest: +SKIP - - -**Examples** - -Let's say you have a dictionary object. - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - -If you try to hash it: - >>> hash(obj) - Traceback (most recent call last): - File "", line 1, in - TypeError: unhashable type: 'dict' - -But with DeepHash: - - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - >>> DeepHash(obj) # doctest: +SKIP - - So what is exactly the hash of obj in this case? - DeepHash is calculating the hash of the obj and any other object that obj contains. - The output of DeepHash is a dictionary of object IDs to their hashes. - In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: - - >>> hashes = DeepHash(obj) - >>> hashes[obj] - 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' - - Which you can write as: - - >>> hashes = DeepHash(obj)[obj] - - At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. - - If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. - - If you do a deep copy of the obj, it should still give you the same hash: - - >>> from copy import deepcopy - >>> obj2 = deepcopy(obj) - >>> DeepHash(obj2)[obj2] - 'bf5478de322aa033da36bf3bcf9f0599e13a520773f50c6eb9f2487377a7929b' - - Note that by default DeepHash will include string type differences. So if your strings were bytes: - - >>> obj3 = {1: 2, b'a': b'b'} - >>> DeepHash(obj3)[obj3] - '71db3231177d49f78b52a356ca206e6179417b681604d00ed703a077049e3300' - - But if you want the same hash if string types are different, set ignore_string_type_changes to True: - - >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] - 'e60c2befb84be625037c75e1e26d0bfc85a0ffc1f3cde9500f68f6eac55e5ad6' - - ignore_numeric_type_changes is by default False too. - - >>> from decimal import Decimal - >>> obj1 = {4:10} - >>> obj2 = {4.0: Decimal(10.0)} - >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] - False - - But by setting it to True, we can get the same hash. - - >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] - True - -number_format_notation: String, default = "f" - number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. - - -ignore_string_type_changes: Boolean, default = True - By setting it to True, both the string and bytes of hello return the same hash. - - >>> DeepHash(b'hello', ignore_string_type_changes=True)[b'hello'] - '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' - >>> DeepHash('hello', ignore_string_type_changes=True)['hello'] - '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' - - -ignore_numeric_type_changes: Boolean, default = False - For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 - - That way they both produce the same hash. - - >>> t1 = {1: 1, 2: 2.22} - >>> DeepHash(t1)[1] - 'c1800a30c736483f13615542e7096f7973631fef8ca935ee1ed9f35fb06fd44e' - >>> DeepHash(t1, ignore_numeric_type_changes=True)[1] == DeepHash(t1, ignore_numeric_type_changes=True)[1.0] - True - - You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: - - >>> from deepdiff import DeepDiff - >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] - - -ignore_type_in_groups example with custom objects: - - >>> class Burrito: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> class Taco: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> burrito = Burrito() - >>> taco = Taco() - >>> - >>> burritos = [burrito] - >>> tacos = [taco] - >>> - >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) - >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) - >>> d1[burrito] == d2[taco] - True - - -ignore_type_subclasses - Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. - - >>> from deepdiff import DeepHash - >>> - >>> class ClassB: - ... def __init__(self, x): - ... self.x = x - ... def __repr__(self): - ... return "obj b" - ... - >>> - >>> class ClassC(ClassB): - ... def __repr__(self): - ... return "obj c" - ... - >>> obj_b = ClassB(1) - >>> obj_c = ClassC(1) - >>> - >>> # By default, subclasses are considered part of the type group. - ... # ignore_type_in_groups=[(ClassB, )] matches ClassC too since it's a subclass. - ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )]) - >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )]) - >>> hashes_b[obj_b] == hashes_c[obj_c] - True - >>> - >>> # With ignore_type_subclasses=True, only exact type matches count. - ... # ClassC no longer matches (ClassB, ) group, so hashes differ. - ... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) - >>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True) - >>> hashes_b[obj_b] != hashes_c[obj_c] - True - -ignore_string_case - Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. - - >>> from deepdiff import DeepHash - >>> DeepHash('hello')['hello'] == DeepHash('heLLO')['heLLO'] - False - >>> DeepHash('hello', ignore_string_case=True)['hello'] == DeepHash('heLLO', ignore_string_case=True)['heLLO'] - True - -exclude_obj_callback - function, default = None - A function that takes the object and its path and returns a Boolean. If True is returned, the object is excluded from the results, otherwise it is included. - This is to give the user a higher level of control than one can achieve via exclude_paths, exclude_regex_paths or other means. - - >>> def exclude_obj_callback(obj, path): - ... return True if isinstance(obj, str) and obj in ('x', 'y') else False - ... - >>> dic1 = {"x": 1, "y": 2, "z": 3} - >>> t1 = [dic1] - >>> t1_hash = DeepHash(t1, exclude_obj_callback=exclude_obj_callback) - >>> - >>> dic2 = {"z": 3} - >>> t2 = [dic2] - >>> t2_hash = DeepHash(t2, exclude_obj_callback=exclude_obj_callback) - >>> - >>> t1_hash[t1] == t2_hash[t2] - True - -number_format_notation : string, default="f" - When numbers are converted to the string, you have the choices between "f" as fixed point and "e" as scientific notation: - - >>> t1=10002 - >>> t2=10004 - >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="f") - >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="f") - >>> - >>> t1_hash[t1] == t2_hash[t2] - False - >>> - >>> - >>> # Now we use the scientific notation - ... t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e") - >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e") - >>> - >>> t1_hash[t1] == t2_hash[t2] - True - -Defining your own number_to_string_func - Lets say you want the hash of numbers below 100 to be the same for some reason. - - >>> from deepdiff import DeepHash - >>> from deepdiff.helper import number_to_string - >>> def custom_number_to_string(number, *args, **kwargs): - ... number = 100 if number < 100 else number - ... return number_to_string(number, *args, **kwargs) - ... - >>> t1 = [10, 12, 100000] - >>> t2 = [50, 63, 100021] - >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) - >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) - >>> t1_hash[t1] == t2_hash[t2] - True - - So both lists produced the same hash thanks to the low significant digits for 100000 vs 100021 and also the custom_number_to_string that converted all numbers below 100 to be 100! diff --git a/docs/search_doc.rst b/docs/search_doc.rst new file mode 120000 index 00000000..154f6277 --- /dev/null +++ b/docs/search_doc.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/search_doc.rst \ No newline at end of file diff --git a/docs/serialization.rst b/docs/serialization.rst deleted file mode 100644 index 3daba73b..00000000 --- a/docs/serialization.rst +++ /dev/null @@ -1,325 +0,0 @@ -:doc:`/index` - -.. _serialization_label: - -Serialization -============= - -.. _to_dict_label: - -To Dict -------- - -In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. -The result is always a text-view dictionary regardless of the original view used to create the DeepDiff object. - -**Parameters** - -verbose_level: int, default=None - Override the verbose_level for the serialized output. - When None, the behavior depends on the original view: - - - If the original view is 'text', the verbose_level from DeepDiff initialization is used. - - If the original view is 'tree', verbose_level=2 is used to provide the most detailed output. - - Valid values are 0, 1, or 2. - -Example: - >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> ddiff.to_dict() - {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} - - -When the original view is 'tree', to_dict() defaults to verbose_level=2 for the most detailed output: - -Example: - >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff.to_dict() - {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} - -You can also override the verbose_level: - -Example: - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff.to_dict(verbose_level=0) - {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': }}} - -.. _to_json_label: - -To Json -------- - -Dump json of the text view. - -In order to do safe json serialization, use the to_json() method. - -**Parameters** - -default_mapping : dictionary(optional), a dictionary of mapping of different types to json types. - -by default DeepDiff converts certain data types. For example Decimals into floats so they can be exported into json. -If you have a certain object type that the json serializer can not serialize it, please pass the appropriate type -conversion through this dictionary. - -verbose_level: int, default=None - Override the verbose_level for the serialized output. Same behavior as to_dict(). - -kwargs: Any other kwargs you pass will be passed on to Python's json.dumps() - - -Example 1 Serialize custom objects: - >>> class A: - ... pass - ... - >>> class B: - ... pass - ... - >>> t1 = A() - >>> t2 = B() - >>> ddiff = DeepDiff(t1, t2) - >>> ddiff.to_json() - TypeError: We do not know how to convert <__main__.A object at 0x10648> of type for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type. - - >>> default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} - >>> ddiff.to_json(default_mapping=default_mapping) - '{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' - - -Example 2: - >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff.to_json() - '{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' - - -.. _to_json_pickle_label: - -To Json Pickle --------------- - -If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and from_json_pickle() in order to serialize and deserialize its results into json. Note that json_pickle is unsafe and json pickle dumps from untrusted sources should never be loaded. It is recommended not to use this serialization unless you have to. - -.. note:: - You need to install the `jsonpickle `_ package to use the to_json_pickle() method. - -Serialize and then deserialize back to deepdiff - >>> t1 = {1: 1, 2: 2, 3: 3} - >>> t2 = {1: 1, 2: "2", 3: 3} - >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.to_json_pickle() - >>> jsoned - '{"type_changes": {"root[2]": {"new_type": {"py/type": "builtins.str"}, "new_value": "2", "old_type": {"py/type": "builtins.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json_pickle(jsoned) - >>> ddiff == ddiff_new - True - - -.. _from_json_pickle_label: - -From Json Pickle ----------------- - -Load the diff object from the json pickle dump. -Take a look at the above :ref:`to_json_pickle_label` for an example. - - -.. _delta_to_flat_rows_label: - -Delta Serialize To Flat Rows ----------------------------- - -Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat rows. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_rows to achieve the desired outcome. The rows are named tuples and can be converted to dictionaries using `._asdict()` - - >>> from pprint import pprint - >>> from deepdiff import DeepDiff, Delta - >>> t1 = {"key1": "value1"} - >>> t2 = {"field2": {"key2": "value2"}} - >>> diff = DeepDiff(t1, t2, verbose_level=2) - >>> pprint(diff, indent=2) - { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, - 'dictionary_item_removed': {"root['key1']": 'value1'}} - >>> delta = Delta(diff, bidirectional=True) - >>> flat_rows = delta.to_flat_rows() - >>> pprint(flat_rows, indent=2) - [ FlatDeltaRow(path=['field2', 'key2'], action='dictionary_item_added', value='value2'), - FlatDeltaRow(path=['key1'], action='dictionary_item_removed', value='value1')] - -.. note:: - When converting a delta to flat rows, nested dictionaries that have single keys in them are flattened too. - Notice that the diff object says - - { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}} - - but the flat row is: - - FlatDeltaRow(path=['field2', 'key2'], action='dictionary_item_added', value='value2') - - That means, when you recreate the delta from the flat rows, you need to set force=True to apply the delta: - - >>> t1 + delta == t2 - True - >>> t2 - delta == t1 - True - >>> delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True) - >>> t1 + delta2 == t2 - Expected the old value for root['field2']['key2'] to be None but it is not found. Error found on: 'field2' - False. You may want to set force=True, especially if this delta is created by passing flat_rows_list or flat_dict_list - >>> t1 + delta - {'field2': {'key2': 'value2'}} - >>> t1 + delta2 - {} - >>> delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True, force=True) # We need to set force=True - >>> t1 + delta2 - {'field2': {'key2': 'value2'}} - >>> - - - -Flat Row Specs: - - - class FlatDataAction(str, enum.Enum): - values_changed = 'values_changed' - type_changes = 'type_changes' - set_item_added = 'set_item_added' - set_item_removed = 'set_item_removed' - dictionary_item_added = 'dictionary_item_added' - dictionary_item_removed = 'dictionary_item_removed' - iterable_item_added = 'iterable_item_added' - iterable_item_removed = 'iterable_item_removed' - iterable_item_moved = 'iterable_item_moved' - iterable_items_inserted = 'iterable_items_inserted' # opcode - iterable_items_deleted = 'iterable_items_deleted' # opcode - iterable_items_replaced = 'iterable_items_replaced' # opcode - iterable_items_equal = 'iterable_items_equal' # opcode - attribute_removed = 'attribute_removed' - attribute_added = 'attribute_added' - unordered_iterable_item_added = 'unordered_iterable_item_added' - unordered_iterable_item_removed = 'unordered_iterable_item_removed' - - - UnkownValueCode = 'unknown___' - - - class FlatDeltaRow(NamedTuple): - path: List - action: FlatDataAction - value: Optional[Any] = UnkownValueCode - old_value: Optional[Any] = UnkownValueCode - type: Optional[Any] = UnkownValueCode - old_type: Optional[Any] = UnkownValueCode - new_path: Optional[List] = None - t1_from_index: Optional[int] = None - t1_to_index: Optional[int] = None - t2_from_index: Optional[int] = None - t2_to_index: Optional[int] = None - - -.. _delta_to_flat_dicts_label: - -Delta Serialize To Flat Dictionaries ------------------------------------- - -Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat dictionaries. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_dicts to achieve the desired outcome. - -Since None is a valid value, we use a special hard-coded string to signify "unknown": 'unknown___' - -.. note:: - Many new keys are added to the flat dicts in DeepDiff 7.0.0 - You may want to use :ref:`delta_to_flat_rows_label` instead of flat dicts. - -For example: - - >>> from pprint import pprint - >>> from deepdiff import DeepDiff, Delta - >>> t1 = {"key1": "value1"} - >>> t2 = {"field2": {"key2": "value2"}} - >>> diff = DeepDiff(t1, t2, verbose_level=2) - >>> pprint(diff, indent=2) - { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, - 'dictionary_item_removed': {"root['key1']": 'value1'}} - >>> delta = Delta(diff, bidirectional=True) - >>> flat_dicts = delta.to_flat_dicts() - >>> pprint(flat_dicts, indent=2) - [ { 'action': 'dictionary_item_added', - 'new_path': None, - 'old_type': 'unknown___', - 'old_value': 'unknown___', - 'path': ['field2', 'key2'], - 't1_from_index': None, - 't1_to_index': None, - 't2_from_index': None, - 't2_to_index': None, - 'type': 'unknown___', - 'value': 'value2'}, - { 'action': 'dictionary_item_removed', - 'new_path': None, - 'old_type': 'unknown___', - 'old_value': 'unknown___', - 'path': ['key1'], - 't1_from_index': None, - 't1_to_index': None, - 't2_from_index': None, - 't2_to_index': None, - 'type': 'unknown___', - 'value': 'value1'}] - - -Example 2: - - >>> t3 = ["A", "B"] - >>> t4 = ["A", "B", "C", "D"] - >>> diff = DeepDiff(t3, t4, verbose_level=2) - >>> pprint(diff, indent=2) - {'iterable_item_added': {'root[2]': 'C', 'root[3]': 'D'}} - >>> - >>> delta = Delta(diff, bidirectional=True) - >>> flat_dicts = delta.to_flat_dicts() - >>> pprint(flat_dicts, indent=2) - [ { 'action': 'iterable_item_added', - 'new_path': None, - 'old_type': 'unknown___', - 'old_value': 'unknown___', - 'path': [2], - 't1_from_index': None, - 't1_to_index': None, - 't2_from_index': None, - 't2_to_index': None, - 'type': 'unknown___', - 'value': 'C'}, - { 'action': 'iterable_item_added', - 'new_path': None, - 'old_type': 'unknown___', - 'old_value': 'unknown___', - 'path': [3], - 't1_from_index': None, - 't1_to_index': None, - 't2_from_index': None, - 't2_to_index': None, - 'type': 'unknown___', - 'value': 'D'}] - - -.. _delta_from_flat_dicts_label: - -Delta Load From Flat Dictionaries ------------------------------------- - - >>> from deepdiff import DeepDiff, Delta - >>> t3 = ["A", "B"] - >>> t4 = ["A", "B", "C", "D"] - >>> diff = DeepDiff(t3, t4, verbose_level=2) - >>> delta = Delta(diff, bidirectional=True) - >>> flat_dicts = delta.to_flat_dicts() - >>> - >>> delta2 = Delta(flat_dict_list=flat_dicts) - >>> t3 + delta == t4 - True - - -Back to :doc:`/index` diff --git a/docs/serialization.rst b/docs/serialization.rst new file mode 120000 index 00000000..8d199e99 --- /dev/null +++ b/docs/serialization.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/serialization.rst \ No newline at end of file diff --git a/docs/stats.rst b/docs/stats.rst deleted file mode 100644 index a3292aec..00000000 --- a/docs/stats.rst +++ /dev/null @@ -1,78 +0,0 @@ -:doc:`/index` - -.. _stats_n_logging_label: - -Stats and Logging -================= - -.. _log_frequency_in_sec_label: - -Log Frequency In Sec --------------------- - -log_frequency_in_sec: Integer, default = 0 - How often to log the progress. The default of 0 means logging progress is disabled. - If you set it to 20, it will log every 20 seconds. This is useful only when running DeepDiff - on massive objects that will take a while to run. If you are only dealing with small objects, keep it at 0 to disable progress logging. - -For example we have run a diff on 2 nested objects that took 2 seconds to get the results. By passing the log_frequency_in_sec=1, we get the following in the logs: - - >>> DeepDiff(t1, t2, log_frequency_in_sec=1) - INFO:deepdiff.diff:DeepDiff 1 seconds in progress. Pass #1634, Diff #8005 - INFO:deepdiff.diff:DeepDiff 2 seconds in progress. Pass #3319, Diff #16148 - INFO:deepdiff.diff:stats {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} - -.. note:: - The default python logger will omit the info logs. You can either set the logging filter to include info logs or pass a different logger via :ref:`progress_logger_label` - - >>> import logging - >>> logging.basicConfig(level=logging.INFO) - - -.. _progress_logger_label: - -Progress Logger ---------------- - -progress_logger: log function, default = logger.info - What logging function to use specifically for progress reporting. This function is only used when progress logging is enabled - by setting log_frequency_in_sec to anything above zero. The function that is passed as the progress_logger needs to be thread safe. - - -For example you can pass progress_logger=logger.warning to the example above and everything is logged as warning level: - - >>> DeepDiff(t1, t2, log_frequency_in_sec=1, progress_logger=logger.warning) - WARNING:deepdiff.diff:DeepDiff 1 seconds in progress. Pass #1634, Diff #8005 - WARNING:deepdiff.diff:DeepDiff 2 seconds in progress. Pass #3319, Diff #16148 - WARNING:deepdiff.diff:stats {'PASSES COUNT': 3960, 'DIFF COUNT': 19469, 'DISTANCE CACHE HIT COUNT': 11847, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, 'DURATION SEC': 2} - - -.. _get_stats_label: - -Get Stats ---------- - -You can run the get_stats() method on a diff object to get some stats on the object. -For example: - - >>> from pprint import pprint - >>> from deepdiff import DeepDiff - >>> - >>> t1 = [ - ... [1, 2, 3, 9], [9, 8, 5, 9] - ... ] - >>> - >>> t2 = [ - ... [1, 2, 4, 10], [4, 2, 5] - ... ] - >>> - >>> diff = DeepDiff(t1, t2, ignore_order=True, cache_size=5000, cutoff_intersection_for_pairs=1) - >>> pprint(diff.get_stats()) - {'DIFF COUNT': 37, - 'DISTANCE CACHE HIT COUNT': 0, - 'MAX DIFF LIMIT REACHED': False, - 'MAX PASS LIMIT REACHED': False, - 'PASSES COUNT': 7} - - -Back to :doc:`/index` diff --git a/docs/stats.rst b/docs/stats.rst new file mode 120000 index 00000000..f115811f --- /dev/null +++ b/docs/stats.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/stats.rst \ No newline at end of file diff --git a/docs/support.rst b/docs/support.rst deleted file mode 100644 index 7878f86a..00000000 --- a/docs/support.rst +++ /dev/null @@ -1,19 +0,0 @@ -:doc:`/index` - -Support -======= - -.. |qluster_link| raw:: html - - Qluster - -DeepDiff is now part of |qluster_link|. - -*If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* - -Thank you for using DeepDiff! -If you find a bug, please create a ticket on our `GitHub repo `__ - -We are **available for consulting** if you need immediate help or custom implementations of DeepDiff. You can reach us via filling up `this form `__ - -Back to :doc:`/index` diff --git a/docs/support.rst b/docs/support.rst new file mode 120000 index 00000000..6e6cb2c2 --- /dev/null +++ b/docs/support.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/support.rst \ No newline at end of file diff --git a/docs/troubleshoot.rst b/docs/troubleshoot.rst deleted file mode 100644 index 84e18b6f..00000000 --- a/docs/troubleshoot.rst +++ /dev/null @@ -1,29 +0,0 @@ -:doc:`/index` - -.. _troubleshoot_label: - -Troubleshoot -============ - -Murmur3 Installation -~~~~~~~~~~~~~~~~~~~~ - -NOTE: Murmur3 was removed from DeepDiff 5.2.0 - -If you are running into this issue, you are using an older version of DeepDiff. - -`Failed to build mmh3 when installing DeepDiff` - -DeepDiff prefers to use Murmur3 for hashing. However you have to manually install murmur3 by running: `pip install mmh3` - -On MacOS Mojave, some users experience difficulty when installing Murmur3. - -The problem can be solved by running: - - `xcode-select --install` - -And then running - - `pip install mmh3` - -Back to :doc:`/index` diff --git a/docs/troubleshoot.rst b/docs/troubleshoot.rst new file mode 120000 index 00000000..4439bf8e --- /dev/null +++ b/docs/troubleshoot.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/troubleshoot.rst \ No newline at end of file diff --git a/docs/view.rst b/docs/view.rst deleted file mode 100644 index 3cdec070..00000000 --- a/docs/view.rst +++ /dev/null @@ -1,365 +0,0 @@ -:doc:`/index` - -.. _view_label: - -View -==== - -You have the options of text view and tree view. -The main difference is that the tree view has the capabilities to traverse the objects to see what objects were compared to what other objects. - -While the view options decide the format of the output that is mostly machine readable, regardless of the view you choose, you can get a more human readable output by using the pretty() method. -DeepDiff also offers other specialized views such as the :doc:`colored_view` (which includes a compact variant) and :doc:`delta` view for specific use cases. - -.. _text_view_label: - -Text View ---------- - -Text view is the default view of DeepDiff. It is simpler than tree view. - -Example of using the text view. - >>> from decimal import Decimal - >>> from deepdiff import DeepDiff - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> print(ddiff) - {'dictionary_item_added': [root[5], root[6]], 'dictionary_item_removed': [root[4]]} - -So for example ddiff['dictionary_item_added'] is a set of string results. That's why this view is called the text view. -You can get this view by default or by passing `view='text'`. - -.. _tree_view_label: - -Tree View ---------- - -The tree view provides you with tree objects that you can traverse through to find -the parents of the objects that are diffed and the actual objects that are being diffed. -This view is very useful when dealing with nested objects. -Note that tree view always returns results in the form of Python sets. - -You can traverse through the tree elements! - -.. note:: - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual - representation for the text view. - -**Tree View Interface** - -.. code:: text - - +---------------------------------------------------------------+ - | | - | parent(t1) parent node parent(t2) |----level - | + ^ + | - +------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | - +------|----------------------|-------------------------|-------+ - | v v v | - | child(t1) child node child(t2) |----level - | | - +---------------------------------------------------------------+ - - -:up: Move up to the parent node aka parent level -:down: Move down to the child node aka child level -:path(): Get the path to the current node in string representation, path(output_format='list') gives you the path in list representation. path(use_t2=True) gives you the path to t2. -:t1: The first item in the current node that is being diffed -:t2: The second item in the current node that is being diffed -:additional: Additional information about the node i.e. repetition -:repetition: Shortcut to get the repetition report - - -The tree view allows you to have more than mere textual representaion of the diffed objects. -It gives you the actual objects (t1, t2) throughout the tree of parents and children. - -**Examples for Tree View** - -.. note:: - Set view='tree' in order to get the results in tree view. - -Value of an item has changed (Tree View) - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') - >>> ddiff_verbose0 - {'values_changed': []} - >>> - >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') - >>> ddiff_verbose1 - {'values_changed': []} - >>> set_of_values_changed = ddiff_verbose1['values_changed'] - >>> # since set_of_values_changed includes only one item in a set - >>> # in order to get that one item we can: - >>> (changed,) = set_of_values_changed - >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - - >>> changed.t1 - 2 - >>> changed.t2 - 4 - >>> # You can traverse through the tree, get to the parents! - >>> changed.up - - -List difference (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff - {'iterable_item_removed': [, ]} - >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. - >>> # One way to get one item from the set is to convert it to a list - >>> # And then get the first item of the list: - >>> removed = list(ddiff['iterable_item_removed'])[0] - >>> removed - - >>> - >>> parent = removed.up - >>> parent - - >>> parent.path() # gives you the string representation of the path - "root[4]['b']" - >>> parent.path(output_format='list') # gives you the list of keys and attributes that make up the path - [4, 'b'] - >>> parent.t1 - [1, 2, 3, 4] - >>> parent.t2 - [1, 2] - >>> parent.up - - >>> parent.up.up - - >>> parent.up.up.t1 - {1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} - >>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff - True - -List difference 2 (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint(ddiff, indent = 2) - { 'iterable_item_added': [], - 'values_changed': [, ]} - >>> - >>> # Note that iterable_item_added is a set with one item. - >>> # So in order to get that one item from it, we can do: - >>> - >>> (added,) = ddiff['iterable_item_added'] - >>> added - - >>> added.up.up - - >>> added.up.up.path() - 'root[4]' - >>> added.up.up.path(output_format='list') # gives you the list of keys and attributes that make up the path - [4] - >>> added.up.up.down - - >>> - >>> # going up twice and then down twice gives you the same node in the tree: - >>> added.up.up.down.down == added - True - -List difference ignoring order but reporting repetitions (Tree View) - >>> t1 = [1, 3, 1, 4] - >>> t2 = [4, 4, 1] - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') - >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': [], - 'repetition_change': [, ]} - >>> - >>> # repetition_change is a set with 2 items. - >>> # in order to get those 2 items, we can do the following. - >>> # or we can convert the set to list and get the list items. - >>> # or we can iterate through the set items - >>> - >>> (repeat1, repeat2) = ddiff['repetition_change'] - >>> repeat1 # the default verbosity is set to 1. - - >>> # The actual data regarding the repetitions can be found in the repetition attribute: - >>> repeat1.repetition - {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} - >>> - >>> # If you change the verbosity, you will see less: - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) - >>> ddiff - {'repetition_change': [, ], 'iterable_item_removed': []} - >>> (repeat1, repeat2) = ddiff['repetition_change'] - >>> repeat1 - - >>> - >>> # But the verbosity level does not change the actual report object. - >>> # It only changes the textual representaion of the object. We get the actual object here: - >>> repeat1.repetition - {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} - >>> repeat1.t1 - 4 - >>> repeat1.t2 - 4 - >>> repeat1.up - - -List that contains dictionary (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': [], - 'values_changed': []} - -Sets (Tree View): - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> print(ddiff) - {'set_item_removed': [], 'set_item_added': [, ]} - >>> # grabbing one item from set_item_removed set which has one item only - >>> (item,) = ddiff['set_item_removed'] - >>> item.up - - >>> item.up.t1 == t1 - True - -Named Tuples (Tree View): - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': []} - -Custom objects (Tree View): - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': []} - -Object attribute added (Tree View): - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2, view='tree')) - {'attribute_added': [], - 'values_changed': []} - -Approximate decimals comparison (Significant digits after the point) (Tree View): - >>> t1 = Decimal('1.52') - >>> t2 = Decimal('1.57') - >>> DeepDiff(t1, t2, significant_digits=0, view='tree') - {} - >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') - >>> ddiff - {'values_changed': []} - >>> (change1,) = ddiff['values_changed'] - >>> change1 - - >>> change1.t1 - Decimal('1.52') - >>> change1.t2 - Decimal('1.57') - >>> change1.path() - 'root' - -Approximate float comparison (Significant digits after the point) (Tree View): - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') - >>> ddiff - {} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint(ddiff, indent=2) - { 'values_changed': [, ]} - >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') - >>> ddiff - {'values_changed': []} - - -pretty() method ---------------- - -Use the pretty method for human readable output. This is regardless of what view you have used to generate the results. - >>> from deepdiff import DeepDiff - >>> t1={1,2,4} - >>> t2={2,3} - >>> print(DeepDiff(t1, t2).pretty()) - Item root[3] added to set. - Item root[4] removed from set. - Item root[1] removed from set. - -The pretty method has an optional parameter ``prefix`` that allows a prefix string before every output line (*e.g.* for logging): - >>> from deepdiff import DeepDiff - >>> t1={1,2,4} - >>> t2={2,3} - >>> print(DeepDiff(t1, t2).pretty(prefix='Diff: ')) - Diff: Item root[3] added to set. - Diff: Item root[4] removed from set. - Diff: Item root[1] removed from set. - -The ``prefix`` may also be a callable function. This function must accept ``**kwargs``; as of this version, the only parameter is ``diff`` but the signature allows for future expansion. -The ``diff`` given will be the ``DeepDiff`` that ``pretty`` was called on; this allows interesting capabilities such as: - -.. code:: python - - >>> from deepdiff import DeepDiff - >>> t1={1,2,4} - >>> t2={2,3} - >>> def callback(**kwargs): - ... """Helper function using a hidden variable on the diff that tracks which count prints next""" - ... kwargs['diff']._diff_count = 1 + getattr(kwargs['diff'], '_diff_count', 0) - ... return f"Diff #{kwargs['diff']._diff_count}: " - ... - >>> print(DeepDiff(t1, t2).pretty(prefix=callback)) - Diff #1: Item root[3] added to set. - Diff #2: Item root[4] removed from set. - Diff #3: Item root[1] removed from set. - - -Text view vs. Tree view vs. pretty() method ------------------------------------------------ - -Views are just different format of results. Each comes with its own set of features. At the end of the day the user can choose the right format based on the use case. - -- The text view is the default format of the results. It is the format that is the most suitable if you don't need to know the traversal history of the objects being compared. -- The tree view allows you to traverse back and forth through the tree and see what objects were compared to what other objects. -- The pretty() method is not a view. All the views are dictionaries. The pretty() method spits out a string output of what has changed and is designed to be human readable. - -For example - >>> from deepdiff import DeepDiff - >>> t1={1,2,4} - >>> t2={2,3} - -Text view (default) - >>> DeepDiff(t1, t2) # same as view='text' - {'set_item_removed': [root[4], root[1]], 'set_item_added': [root[3]]} - -Tree view - >>> tree = DeepDiff(t1, t2, view='tree') - >>> tree - {'set_item_removed': [, ], 'set_item_added': []} - >>> tree['set_item_added'][0] - - >>> tree['set_item_added'][0].t2 - 3 - -Pretty method. Regardless of what view was used, you can use the "pretty()" method to get a human readable output. - >>> print(DeepDiff(t1, t2).pretty()) - Item root[3] added to set. - Item root[4] removed from set. - Item root[1] removed from set. - - -Back to :doc:`/index` diff --git a/docs/view.rst b/docs/view.rst new file mode 120000 index 00000000..5f78d5cf --- /dev/null +++ b/docs/view.rst @@ -0,0 +1 @@ +../deepdiff/docstrings/view.rst \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index bd7a2848..043f9d28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi" [project] name = "deepdiff" -version = "8.7.0" +version = "9.0.0" dependencies = [ "orderly-set>=5.5.0,<6", ] @@ -58,6 +58,7 @@ dev = [ "nox==2026.2.9", "uuid6==2025.0.1", "pytz", + "flit-core==3.12.0", ] docs = [ "Sphinx~=8.1.3", diff --git a/uv.lock b/uv.lock index 638d47e6..fcf6a9b5 100644 --- a/uv.lock +++ b/uv.lock @@ -394,6 +394,7 @@ coverage = [ ] dev = [ { name = "bump2version" }, + { name = "flit-core" }, { name = "ipdb" }, { name = "jsonpickle" }, { name = "nox" }, @@ -437,6 +438,7 @@ requires-dist = [ { name = "coverage", marker = "extra == 'coverage'", specifier = "~=7.13.5" }, { name = "flake8", marker = "extra == 'static'", specifier = "~=7.3.0" }, { name = "flake8-pyproject", marker = "extra == 'static'", specifier = "~=1.2.4" }, + { name = "flit-core", marker = "extra == 'dev'", specifier = "==3.12.0" }, { name = "furo", marker = "extra == 'docs'", specifier = ">=2024.8.6" }, { name = "ipdb", marker = "extra == 'dev'", specifier = "~=0.13.13" }, { name = "jsonpickle", marker = "extra == 'dev'", specifier = "~=4.1.1" }, @@ -553,6 +555,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/85/6a/cdee9ff7f2b7c6ddc219fd95b7c70c0a3d9f0367a506e9793eedfc72e337/flake8_pyproject-1.2.4-py3-none-any.whl", hash = "sha256:ea34c057f9a9329c76d98723bb2bb498cc6ba8ff9872c4d19932d48c91249a77", size = 5694, upload-time = "2025-11-28T21:40:01.309Z" }, ] +[[package]] +name = "flit-core" +version = "3.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/59/b6fc2188dfc7ea4f936cd12b49d707f66a1cb7a1d2c16172963534db741b/flit_core-3.12.0.tar.gz", hash = "sha256:18f63100d6f94385c6ed57a72073443e1a71a4acb4339491615d0f16d6ff01b2", size = 53690, upload-time = "2025-03-25T08:03:23.969Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/65/b6ba90634c984a4fcc02c7e3afe523fef500c4980fec67cc27536ee50acf/flit_core-3.12.0-py3-none-any.whl", hash = "sha256:e7a0304069ea895172e3c7bb703292e992c5d1555dd1233ab7b5621b5b69e62c", size = 45594, upload-time = "2025-03-25T08:03:20.772Z" }, +] + [[package]] name = "furo" version = "2025.12.19" From a3496996a63df644b4abf713afd4e9a3522badc1 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 11:27:37 -0700 Subject: [PATCH 05/23] =?UTF-8?q?1.=20Nested=20namedtuple=20set/frozenset?= =?UTF-8?q?=20updates=20could=20replace=20the=20whole=20result=20with=20th?= =?UTF-8?q?e=20inner=20=20=20=20=20=20namedtuple,=20dropping=20the=20outer?= =?UTF-8?q?=20container.=20=20=20=20=20=20Fixed=20by=20updating=20the=20na?= =?UTF-8?q?medtuple=20in=20its=20actual=20parent=20when=20nested,=20while?= =?UTF-8?q?=20preserving=20root-level=20=20=20=20=20=20namedtuple=20behavi?= =?UTF-8?q?or.=20=20=202.=20Tuple=20deltas=20using=20iterable=20opcodes=20?= =?UTF-8?q?could=20silently=20do=20nothing=20for=20insert/delete-only=20ch?= =?UTF-8?q?anges.=20=20=20=20=20=20Fixed=20by=20writing=20the=20transforme?= =?UTF-8?q?d=20tuple=20back=20instead=20of=20reconstructing=20the=20origin?= =?UTF-8?q?al=20tuple.=20=20=203.=20Applying=20a=20delta=20with=20both=20m?= =?UTF-8?q?oved=20and=20added=20iterable=20items=20could=20mutate=20the=20?= =?UTF-8?q?delta=E2=80=99s=20own=20=20=20=20=20=20internal=20diff=20data.?= =?UTF-8?q?=20=20=20=20=20=20Fixed=20by=20copying=20the=20added-items=20ma?= =?UTF-8?q?pping=20before=20inserting=20temporary=20move=20placeholders.?= =?UTF-8?q?=20=20=204.=20Removing=20multiple=20dictionary=20items=20with?= =?UTF-8?q?=20complex=20keys=20could=20crash=20during=20path=20sorting.=20?= =?UTF-8?q?=20=20=20=20=20Fixed=20by=20correcting=20the=20None=20check=20a?= =?UTF-8?q?nd=20falling=20back=20to=20string=20comparison=20when=20same-ty?= =?UTF-8?q?pe=20path=20=20=20=20=20=20elements=20are=20still=20not=20order?= =?UTF-8?q?able.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Regression tests were added for each case, and the full Delta test suite passes. --- deepdiff/delta.py | 37 ++++++++++++++++++++++++------------- tests/test_delta.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/deepdiff/delta.py b/deepdiff/delta.py index d60474d9..c61493c8 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -391,7 +391,7 @@ def _del_elem(self, parent, parent_to_obj_elem, parent_to_obj_action, value=obj, action=parent_to_obj_action) def _do_iterable_item_added(self): - iterable_item_added = self.diff.get('iterable_item_added', {}) + iterable_item_added = dict(self.diff.get('iterable_item_added', {})) iterable_item_moved = self.diff.get('iterable_item_moved') # First we need to create a placeholder for moved items. @@ -448,7 +448,7 @@ def _sort_comparison(left, right): elif len(right_path) > len(left_path): right_path = right_path[:len(left_path)] for l_elem, r_elem in zip(left_path, right_path): - if type(l_elem) != type(r_elem) or type(l_elem) in None: + if type(l_elem) != type(r_elem) or l_elem is None or r_elem is None: l_elem = str(l_elem) r_elem = str(r_elem) try: @@ -457,7 +457,12 @@ def _sort_comparison(left, right): elif l_elem > r_elem: return 1 except TypeError: - continue + l_elem = str(l_elem) + r_elem = str(r_elem) + if l_elem < r_elem: + return -1 + elif l_elem > r_elem: + return 1 return 0 @@ -677,7 +682,7 @@ def _do_iterable_opcodes(self): # Items are the same in both lists, so we add them to the result transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index]) # type: ignore if is_obj_tuple: - obj = tuple(obj) # type: ignore + obj = tuple(transformed) # type: ignore # Making sure that the object is re-instated inside the parent especially if it was immutable # and we had to turn it into a mutable one. In such cases the object has a new id. self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem, @@ -725,18 +730,24 @@ def _do_set_item_removed(self): def _do_set_or_frozenset_item(self, items, func): for path, value in items.items(): - elements = _path_to_elements(path) - parent = self.get_nested_obj(obj=self, elements=elements[:-1]) - elem, action = elements[-1] + elem_and_details = self._get_elements_and_details(path) + if not elem_and_details: + continue + elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details obj = self._get_elem_and_compare_to_old_value( - parent, path_for_err_reporting=path, expected_old_value=None, elem=elem, action=action, forced_old_value=set()) + obj, path_for_err_reporting=path, expected_old_value=None, elem=elem, action=action, forced_old_value=set()) new_value = getattr(obj, func)(value) - if hasattr(parent, '_fields') and hasattr(parent, '_replace'): - # Handle parent NamedTuple by creating a new instance with _replace(). Will not work with nested objects. - new_parent = parent._replace(**{elem: new_value}) - self.root = new_parent + set_parent = self.get_nested_obj(obj=self, elements=elements[:-1]) + replace = getattr(set_parent, '_replace', None) + if hasattr(set_parent, '_fields') and callable(replace): + new_parent = replace(**{elem: new_value}) + if parent is None: + self.root = new_parent + else: + self._simple_set_elem_value(parent, path_for_err_reporting=path, elem=parent_to_obj_elem, + value=new_parent, action=parent_to_obj_action) else: - self._simple_set_elem_value(parent, path_for_err_reporting=path, elem=elem, value=new_value, action=action) + self._simple_set_elem_value(set_parent, path_for_err_reporting=path, elem=elem, value=new_value, action=action) def _do_ignore_order_get_old(self, obj, remove_indexes_per_path, fixed_indexes_values, path_for_err_reporting): """ diff --git a/tests/test_delta.py b/tests/test_delta.py index d5acbeee..38cac165 100644 --- a/tests/test_delta.py +++ b/tests/test_delta.py @@ -645,6 +645,32 @@ class Article(NamedTuple): diff = DeepDiff(a1, a2) delta = Delta(diff) assert a2 == a1 + delta + + def test_nested_namedtuple_frozenset_add_delta(self): + class Article(NamedTuple): + tags: frozenset + + t1 = {"article": Article(frozenset(["a"]))} + t2 = {"article": Article(frozenset(["a", "b"]))} + delta = Delta(DeepDiff(t1, t2)) + + assert t2 == t1 + delta + + def test_tuple_iterable_opcodes_with_insert_delete_delta(self): + t1 = tuple("A B C D H".split()) + t2 = tuple("B C D H Y Z".split()) + delta = Delta(DeepDiff(t1, t2), bidirectional=True) + + assert "_iterable_opcodes" in delta.diff + assert t2 == t1 + delta + + def test_complex_dictionary_keys_removed_delta(self): + t1 = {1 + 2j: "a", 3 + 4j: "b"} + t2 = {} + diff = DeepDiff(t1, t2, threshold_to_diff_deeper=0) + delta = Delta(diff, raise_errors=True) + + assert t2 == t1 + delta picklalbe_obj_without_item = PicklableClass(11) del picklalbe_obj_without_item.item @@ -2133,8 +2159,10 @@ def test_compare_func_with_duplicates_added(self): } assert expected == ddiff delta = Delta(ddiff) + flat_rows_before_apply = delta.to_flat_rows() recreated_t2 = t1 + delta assert t2 == recreated_t2 + assert flat_rows_before_apply == delta.to_flat_rows() def test_compare_func_swap(self): t1 = [{'id': 1, 'val': 1}, {'id': 1, 'val': 3}] From b697d65bddfee5c7e13fca82066c60bc100f8cb5 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 12:20:05 -0700 Subject: [PATCH 06/23] Changed deepdiff/delta.py:237 so dunder traversal from check_elem() raises immediately instead of going through _raise_or_log(). Also added full-path preflight validation in _get_elements_and_details() so the set_item_added path introduced in the last commit cannot silently skip malicious dunder paths. --- deepdiff/delta.py | 11 +++++------ tests/test_security.py | 8 ++++++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/deepdiff/delta.py b/deepdiff/delta.py index c61493c8..dc0df276 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -237,11 +237,7 @@ def _get_elem_and_compare_to_old_value( forced_old_value=None, next_element=None, ): - try: - check_elem(elem) - except ValueError as error: - self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path_for_err_reporting, error)) - return not_found + check_elem(elem) # if forced_old_value is not None: try: if action == GET: @@ -525,6 +521,8 @@ def _do_pre_process(self): def _get_elements_and_details(self, path): try: elements = _path_to_elements(path) + for elem, _ in elements: + check_elem(elem) if len(elements) > 1: elements_subset = elements[:-2] if len(elements_subset) != len(elements): @@ -546,8 +544,9 @@ def _get_elements_and_details(self, path): obj = self # obj = self.get_nested_obj(obj=self, elements=elements[:-1]) elem, action = elements[-1] # type: ignore - check_elem(elem) except Exception as e: + if isinstance(e, ValueError) and str(e) == "traversing dunder attributes is not allowed": + raise self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path, e)) return None else: diff --git a/tests/test_security.py b/tests/test_security.py index e2210182..a161747c 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -42,7 +42,9 @@ def test_builtins_int(self): assert 42 == int("41") + 1 # Apply Delta to mydict - result = mydict + Delta(pollute_int) + with pytest.raises(ValueError) as exc_info: + mydict + Delta(pollute_int) + assert "traversing dunder attributes is not allowed" == str(exc_info.value) assert 1337 == int("1337") @@ -128,6 +130,8 @@ def myfunc(self): PWNED = False delta = Delta(pollute_global) assert PWNED is False - b = Foo() + delta + with pytest.raises(ValueError) as exc_info: + Foo() + delta + assert "traversing dunder attributes is not allowed" == str(exc_info.value) assert PWNED is False From 8a607beb80ab4ebcc979cad50a665427943836b8 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 12:47:53 -0700 Subject: [PATCH 07/23] Implemented the cache replacement. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed: - Replaced the homegrown linked-list LFU implementation in deepdiff/lfucache.py with a small DistanceCache wrapper over native cachebox.LRUCache. - Kept LFUCache = DistanceCache and DummyLFU compatibility names so internal imports keep working. - Updated deepdiff/diff.py cache hot paths to avoid contains + get double lookups. - Moved cachebox>=5.2,<6 into core dependencies in pyproject.toml, since DeepDiff now imports it unconditionally. - Updated tests/test_lfucache.py to validate the new bounded distance-cache behavior instead of LFU frequency internals. Benchmark result from the same 1,000,000 operation local microbenchmark: - Old homegrown LFUCache: 1.901302s - Direct cachebox.LFUCache: 5.846142s - Direct cachebox.LRUCache: 0.537102s - New DistanceCache wrapper: 1.153068s So I used cachebox.LRUCache, not cachebox.LFUCache, because cachebox’s LFU policy is slower for this workload. --- deepdiff/diff.py | 19 ++-- deepdiff/lfucache.py | 213 +++++------------------------------------ pyproject.toml | 1 + tests/test_lfucache.py | 54 ++++++----- 4 files changed, 65 insertions(+), 222 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 2931cefd..2dded8ed 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -13,13 +13,13 @@ from enum import Enum from copy import deepcopy from math import isclose as is_close -from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol, Literal +from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol, Literal, cast from collections.abc import Mapping, Iterable, Sequence from collections import defaultdict from inspect import getmembers from itertools import zip_longest from functools import lru_cache -from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent, +from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent, not_found, IndexedHash, unprocessed, add_to_frozen_set, basic_types, convert_item_or_items_into_set_else_none, get_type, convert_item_or_items_into_compiled_regexes_else_none, @@ -1210,9 +1210,12 @@ def _get_rough_distance_of_hashed_objs( _distance = cache_key = None if self._stats[DISTANCE_CACHE_ENABLED]: cache_key = self._get_distance_cache_key(added_hash, removed_hash) - if cache_key in self._distance_cache: + cached_distance = self._distance_cache.get(cache_key) + if cached_distance is not_found: + _distance = None + else: self._stats[DISTANCE_CACHE_HIT_COUNT] += 1 - _distance = self._distance_cache.get(cache_key) + _distance = cast(float, cached_distance) if _distance is None: # We can only cache the rough distance and not the actual diff result for reuse. # The reason is that we have modified the parameters explicitly so they are different and can't @@ -1254,8 +1257,11 @@ def _get_most_in_common_pairs_in_iterables( cache_key = None if self._stats[DISTANCE_CACHE_ENABLED]: cache_key = combine_hashes_lists(items=[hashes_added, hashes_removed], prefix='pairs_cache') - if cache_key in self._distance_cache: - return self._distance_cache.get(cache_key).copy() + cached_pairs = self._distance_cache.get(cache_key) + if cached_pairs is not_found: + cached_pairs = None + else: + return cast(dict, cached_pairs).copy() # A dictionary of hashes to distances and each distance to an ordered set of hashes. # It tells us about the distance of each object from other objects. @@ -1296,6 +1302,7 @@ def defaultdict_orderedset(): if _distance is None: _distance = self._get_rough_distance_of_hashed_objs( added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type) + _distance = cast(float, _distance) # Left for future debugging # print(f'{Fore.RED}distance of {added_hash_obj.item} and {removed_hash_obj.item}: {_distance}{Style.RESET_ALL}') # Discard potential pairs that are too far. diff --git a/deepdiff/lfucache.py b/deepdiff/lfucache.py index 75d1708e..d548d86a 100644 --- a/deepdiff/lfucache.py +++ b/deepdiff/lfucache.py @@ -1,208 +1,39 @@ -""" -LFU cache Written by Shane Wang -https://medium.com/@epicshane/a-python-implementation-of-lfu-least-frequently-used-cache-with-o-1-time-complexity-e16b34a3c49b -https://github.com/luxigner/lfu_cache -Modified by Sep Dehpour -""" from collections import defaultdict -from threading import Lock -from statistics import mean -from deepdiff.helper import not_found, dict_, SetOrdered +from cachebox import LRUCache +from deepdiff.helper import SetOrdered, not_found -class CacheNode: - def __init__(self, key, report_type, value, freq_node, pre, nxt): - self.key = key - if report_type: - self.content = defaultdict(SetOrdered) - self.content[report_type].add(value) - else: - self.content = value - self.freq_node = freq_node - self.pre = pre # previous CacheNode - self.nxt = nxt # next CacheNode - - def free_myself(self): - if self.freq_node.cache_head == self.freq_node.cache_tail: # type: ignore - self.freq_node.cache_head = self.freq_node.cache_tail = None # type: ignore - elif self.freq_node.cache_head == self: # type: ignore - self.nxt.pre = None # type: ignore - self.freq_node.cache_head = self.nxt # type: ignore - elif self.freq_node.cache_tail == self: # type: ignore - self.pre.nxt = None # type: ignore - self.freq_node.cache_tail = self.pre # type: ignore - else: - self.pre.nxt = self.nxt # type: ignore - self.nxt.pre = self.pre # type: ignore - - self.pre = None - self.nxt = None - self.freq_node = None - - -class FreqNode: - def __init__(self, freq, pre, nxt): - self.freq = freq - self.pre = pre # previous FreqNode - self.nxt = nxt # next FreqNode - self.cache_head = None # CacheNode head under this FreqNode - self.cache_tail = None # CacheNode tail under this FreqNode - - def count_caches(self): - if self.cache_head is None and self.cache_tail is None: - return 0 - elif self.cache_head == self.cache_tail: - return 1 - else: - return '2+' - - def remove(self): - if self.pre is not None: - self.pre.nxt = self.nxt - if self.nxt is not None: - self.nxt.pre = self.pre - - pre = self.pre - nxt = self.nxt - self.pre = self.nxt = self.cache_head = self.cache_tail = None - - return (pre, nxt) - - def pop_head_cache(self): - if self.cache_head is None and self.cache_tail is None: - return None - elif self.cache_head == self.cache_tail: - cache_head = self.cache_head - self.cache_head = self.cache_tail = None - return cache_head - else: - cache_head = self.cache_head - self.cache_head.nxt.pre = None # type: ignore - self.cache_head = self.cache_head.nxt # type: ignore - return cache_head - - def append_cache_to_tail(self, cache_node): - cache_node.freq_node = self - - if self.cache_head is None and self.cache_tail is None: - self.cache_head = self.cache_tail = cache_node - else: - cache_node.pre = self.cache_tail - cache_node.nxt = None - self.cache_tail.nxt = cache_node # type: ignore - self.cache_tail = cache_node - - def insert_after_me(self, freq_node): - freq_node.pre = self - freq_node.nxt = self.nxt - - if self.nxt is not None: - self.nxt.pre = freq_node - - self.nxt = freq_node - - def insert_before_me(self, freq_node): - if self.pre is not None: - self.pre.nxt = freq_node - - freq_node.pre = self.pre - freq_node.nxt = self - self.pre = freq_node +class DistanceCache: + """ + Native bounded cache used by DeepDiff's distance calculations. - -class LFUCache: + DeepDiff historically used a pure Python LFU cache here. The distance-cache + hot path benefits more from cachebox's native mapping operations than from + preserving LFU eviction semantics. + """ def __init__(self, capacity): - self.cache = dict_() # {key: cache_node} if capacity <= 0: - raise ValueError('Capacity of LFUCache needs to be positive.') # pragma: no cover. - self.capacity = capacity - self.freq_link_head = None - self.lock = Lock() + raise ValueError('Capacity of DistanceCache needs to be positive.') # pragma: no cover. + self.cache = LRUCache(capacity) def get(self, key): - with self.lock: - if key in self.cache: - cache_node = self.cache[key] - freq_node = cache_node.freq_node - content = cache_node.content - - self.move_forward(cache_node, freq_node) - - return content - else: - return not_found + return self.cache.get(key, not_found) def set(self, key, report_type=None, value=None): - with self.lock: - if key in self.cache: - cache_node = self.cache[key] - if report_type: - cache_node.content[report_type].add(value) - else: - cache_node.content = value - else: - if len(self.cache) >= self.capacity: - self.dump_cache() - - self.create_cache_node(key, report_type, value) + if report_type: + content = self.cache.get(key, None) + if content is None: + content = defaultdict(SetOrdered) + content[report_type].add(value) + value = content + self.cache.insert(key, value) def __contains__(self, key): return key in self.cache - def move_forward(self, cache_node, freq_node): - if freq_node.nxt is None or freq_node.nxt.freq != freq_node.freq + 1: - target_freq_node = FreqNode(freq_node.freq + 1, None, None) - target_empty = True - else: - target_freq_node = freq_node.nxt - target_empty = False - - cache_node.free_myself() - target_freq_node.append_cache_to_tail(cache_node) - - if target_empty: - freq_node.insert_after_me(target_freq_node) - - if freq_node.count_caches() == 0: - if self.freq_link_head == freq_node: - self.freq_link_head = target_freq_node - - freq_node.remove() - def dump_cache(self): - head_freq_node = self.freq_link_head - self.cache.pop(head_freq_node.cache_head.key) # type: ignore - head_freq_node.pop_head_cache() # type: ignore - - if head_freq_node.count_caches() == 0: # type: ignore - self.freq_link_head = head_freq_node.nxt # type: ignore - head_freq_node.remove() # type: ignore - - def create_cache_node(self, key, report_type, value): - cache_node = CacheNode( - key=key, report_type=report_type, - value=value, freq_node=None, pre=None, nxt=None) - self.cache[key] = cache_node - - if self.freq_link_head is None or self.freq_link_head.freq != 0: - new_freq_node = FreqNode(0, None, None) - new_freq_node.append_cache_to_tail(cache_node) - - if self.freq_link_head is not None: - self.freq_link_head.insert_before_me(new_freq_node) - - self.freq_link_head = new_freq_node - else: - self.freq_link_head.append_cache_to_tail(cache_node) - - def get_sorted_cache_keys(self): - result = [(i, freq.freq_node.freq) for i, freq in self.cache.items()] - result.sort(key=lambda x: -x[1]) - return result - - def get_average_frequency(self): - return mean(freq.freq_node.freq for freq in self.cache.values()) +LFUCache = DistanceCache class DummyLFU: @@ -211,7 +42,9 @@ def __init__(self, *args, **kwargs): pass set = __init__ - get = __init__ + + def get(self, *args, **kwargs): + return not_found def __contains__(self, key): return False diff --git a/pyproject.toml b/pyproject.toml index 043f9d28..8be4a2c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ build-backend = "flit_core.buildapi" name = "deepdiff" version = "9.0.0" dependencies = [ + "cachebox>=5.2,<6", "orderly-set>=5.5.0,<6", ] requires-python = ">=3.10" diff --git a/tests/test_lfucache.py b/tests/test_lfucache.py index 80a99027..c99516e0 100644 --- a/tests/test_lfucache.py +++ b/tests/test_lfucache.py @@ -1,33 +1,35 @@ import random -import pytest import concurrent.futures -from deepdiff.lfucache import LFUCache - - -class TestLFUcache: - - @pytest.mark.parametrize("items, size, expected_results, expected_freq", [ - (['a', 'a', 'b', 'a', 'c', 'b', 'd'], 3, [('b', 2), ('c', 1), ('d', 1)], '1.333'), - (['a', 'a', 'b', 'a', 'c', 'b', 'd', 'e', 'c', 'b'], 3, [('b', 3), ('d', 1), ('e', 1)], '1.666'), - (['a', 'a', 'b', 'a', 'c', 'b', 'd', 'e', 'c', 'b', 'b', 'c', 'd', 'b'], 3, [('b', 5), ('c', 3), ('d', 2)], '3.333'), - ]) - def test_lfu(self, items, size, expected_results, expected_freq, benchmark): - benchmark(self._test_lfu, items, size, expected_results, expected_freq) - - def _test_lfu(self, items, size, expected_results, expected_freq): - lfucache = LFUCache(size) - for item in items: - lfucache.set(item, value='{}_cached'.format(item)) - for item in items: - lfucache.get(item) - results = lfucache.get_sorted_cache_keys() - assert expected_results == results - freq = lfucache.get_average_frequency() - assert expected_freq == str(freq)[:5] +from deepdiff.helper import not_found +from deepdiff.lfucache import DistanceCache + + +class TestDistanceCache: + + def test_lru_cache(self, benchmark): + benchmark(self._test_lru_cache) + + def _test_lru_cache(self): + cache = DistanceCache(2) + cache.set('a', value='a_cached') + cache.set('b', value='b_cached') + assert 'a' in cache + assert cache.get('a') == 'a_cached' + cache.set('c', value='c_cached') + assert cache.get('a') == 'a_cached' + assert cache.get('b') is not_found + assert cache.get('c') == 'c_cached' + assert cache.get('missing') is not_found + + def test_report_type_values_are_accumulated(self): + cache = DistanceCache(2) + cache.set('a', report_type='values_changed', value='root[0]') + cache.set('a', report_type='values_changed', value='root[1]') + assert cache.get('a') == {'values_changed': {'root[0]', 'root[1]'}} def test_get_multithreading(self): keys = 'aaaaaaaaaaaaaaaaaaaaaaaaaaabbc' - lfucache = LFUCache(2) + cache = DistanceCache(2) def _do_set(cache, key): cache.set(key, value='{}_cached'.format(key)) @@ -45,6 +47,6 @@ def _random_func(cache, key): return random.choice([_do_get, _do_get, _do_set])(cache, key) with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor: - futures = (executor.submit(_random_func, lfucache, key) for key in _key_gen()) + futures = (executor.submit(_random_func, cache, key) for key in _key_gen()) for future in concurrent.futures.as_completed(futures): future.result() From e352ed83978c04adbdc606f7acb79b27dd5d35bc Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 12:59:02 -0700 Subject: [PATCH 08/23] =?UTF-8?q?-=20deephash.py:=20corrected=20exclude=5F?= =?UTF-8?q?paths/include=5Fpaths=20type=20to=20SetOrdered=20=20=20-=20help?= =?UTF-8?q?er.py:=20relaxed=20add=5Fto=5Ffrozen=5Fset=20to=20Any=20(caller?= =?UTF-8?q?s=20use=20both=20int=20and=20str=20ids);=20changed=20=20=20type?= =?UTF-8?q?=5Fin=5Ftype=5Fgroup/type=5Fis=5Fsubclass=5Fof=5Ftype=5Fgroup?= =?UTF-8?q?=20to=20accept=20Iterable[Type]=20=20=20-=20delta.py:=20added?= =?UTF-8?q?=20elem=20is=20not=20None=20guard,=20narrowed=20tag=20type,=20t?= =?UTF-8?q?ype-ignored=20namedtuple=20=20=20=5Freplace/summarize=20=20=20-?= =?UTF-8?q?=20diff.py:=20typed=20=5Fcompare=5Fin=5Forder=20index=20params?= =?UTF-8?q?=20as=20Optional[int]=20with=20early=20return;=20fixed=20real?= =?UTF-8?q?=20=20=20bug=20len(other.indexes=20>=201)=20=E2=86=92=20len(oth?= =?UTF-8?q?er.indexes)=20>=201;=20cast=20UUID=20arg=20to=20str=20=20=20-?= =?UTF-8?q?=20distance.py:=20handled=20iterable=5Fcompare=5Ffunc=20None=20?= =?UTF-8?q?check;=20widened=20max=5F/replace=5Finf=5Fwith=20to=20float;=20?= =?UTF-8?q?=20=20switched=20memoryview-incompatible=20strings=20to=20str?= =?UTF-8?q?=20=20=20-=20path.py:=20fixed=20real=20bug=20obj.append(=5Fgues?= =?UTF-8?q?s=5Ftype(...),=20next=5Felement)=20(misplaced=20paren);=20coerc?= =?UTF-8?q?ed=20=20=20setattr=20name=20to=20str=20=20=20-=20serialization.?= =?UTF-8?q?py:=20type-ignored=20namedtuple=20=5Ffields=20access?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- deepdiff/deephash.py | 7 ++++--- deepdiff/delta.py | 8 ++++---- deepdiff/diff.py | 14 +++++++------- deepdiff/distance.py | 17 ++++++++++------- deepdiff/helper.py | 13 +++++++------ deepdiff/path.py | 4 ++-- deepdiff/serialization.py | 2 +- 7 files changed, 35 insertions(+), 30 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index d26338e2..214b1131 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -14,7 +14,8 @@ convert_item_or_items_into_compiled_regexes_else_none, get_id, type_is_subclass_of_type_group, type_in_type_group, number_to_string, datetime_normalize, KEY_TO_VAL_STR, - get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel) + get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel, + SetOrdered) from deepdiff.base import Base @@ -159,8 +160,8 @@ class DeepHash(Base): hashes: Dict[Any, Any] exclude_types_tuple: Tuple[type, ...] ignore_repetition: bool - exclude_paths: Optional[Set[str]] - include_paths: Optional[Set[str]] + exclude_paths: Optional[SetOrdered] + include_paths: Optional[SetOrdered] exclude_regex_paths: Optional[List[re.Pattern[str]]] hasher: Callable[[Union[str, bytes]], str] use_enum_value: bool diff --git a/deepdiff/delta.py b/deepdiff/delta.py index dc0df276..0d1e33dc 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -167,7 +167,7 @@ def _deserializer(obj, safe_to_import=None): self.reset() def __repr__(self): - return "".format(summarize(self.diff, max_length=100)) + return "".format(summarize(self.diff, max_length=100)) # type: ignore[arg-type] def reset(self): self.post_process_paths_to_convert = dict_() @@ -289,7 +289,7 @@ def _simple_set_elem_value(self, obj, path_for_err_reporting, elem=None, value=N except IndexError: if elem == len(obj): obj.append(value) - elif self.fill is not not_found and elem > len(obj): + elif self.fill is not not_found and elem is not None and elem > len(obj): while len(obj) < elem: if callable(self.fill): obj.append(self.fill(obj, value, path_for_err_reporting)) @@ -334,7 +334,7 @@ def _set_new_value(self, parent, parent_to_obj_elem, parent_to_obj_action, # Check if it's a NamedTuple and use _replace() to generate a new copy with the change if hasattr(obj, '_fields') and hasattr(obj, '_replace'): if action == GETATTR: - obj = obj._replace(**{elem: new_value}) + obj = obj._replace(**{elem: new_value}) # type: ignore[attr-defined] if parent: self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem, value=obj, @@ -887,7 +887,7 @@ def _get_reverse_diff(self): for path, op_codes in info.items(): r_diff[action][path] = [] for op_code in op_codes: - tag = op_code.tag + tag: str = op_code.tag tag = {'delete': 'insert', 'insert': 'delete'}.get(tag, tag) new_op_code = Opcode( tag=tag, diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 2dded8ed..81c9344a 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -638,8 +638,8 @@ def _diff_dict( parents_ids: FrozenSet[int]=frozenset([]), print_as_attribute: bool=False, override: bool=False, - override_t1: Optional[Any]=None, - override_t2: Optional[Any]=None, + override_t1: Any=None, + override_t2: Any=None, local_tree: Optional[Any]=None, ) -> None: """Difference of 2 dictionaries""" @@ -788,14 +788,14 @@ def _diff_iterable(self, level: Any, parents_ids: FrozenSet[int]=frozenset(), _o def _compare_in_order( self, level, - t1_from_index=None, t1_to_index=None, - t2_from_index=None, t2_to_index=None + t1_from_index: Optional[int]=None, t1_to_index: Optional[int]=None, + t2_from_index: Optional[int]=None, t2_to_index: Optional[int]=None ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]: """ Default compare if `iterable_compare_func` is not provided. This will compare in sequence order. """ - if t1_from_index is None: + if t1_from_index is None or t2_from_index is None: return [((i, i), (x, y)) for i, (x, y) in enumerate( zip_longest( level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))] @@ -1432,7 +1432,7 @@ def get_other_pair(hash_value, in_t1=True): # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. - if other.item is notpresent or len(other.indexes > 1): + if other.item is notpresent or len(other.indexes) > 1: index2 = None else: index2 = other.indexes[0] @@ -1759,7 +1759,7 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree= if self.ignore_uuid_types and isinstance(level.t2, uuids): try: # Convert string to UUID for comparison - t1_uuid = uuid.UUID(level.t1) + t1_uuid = uuid.UUID(str(level.t1)) if t1_uuid.int != level.t2.int: self._report_result('values_changed', level, local_tree=local_tree) except (ValueError, AttributeError): diff --git a/deepdiff/distance.py b/deepdiff/distance.py index 3f3001a2..32f7d4a1 100644 --- a/deepdiff/distance.py +++ b/deepdiff/distance.py @@ -107,10 +107,13 @@ def __calculate_item_deephash(self: "DistanceProtocol", item: Any) -> None: def _precalculate_distance_by_custom_compare_func( self: "DistanceProtocol", hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): pre_calced_distances = dict_() + if self.iterable_compare_func is None: + return pre_calced_distances + compare_func = self.iterable_compare_func for added_hash in hashes_added: for removed_hash in hashes_removed: try: - is_close_distance = self.iterable_compare_func(t2_hashtable[added_hash].item, t1_hashtable[removed_hash].item) + is_close_distance = compare_func(t2_hashtable[added_hash].item, t1_hashtable[removed_hash].item) except CannotCompare: pass else: @@ -189,8 +192,8 @@ def _get_item_length(item, parents_ids=frozenset([])): # internal keys such as _numpy_paths should not count towards the distance. # old_type and old_value are metadata about the previous state, not additional operations. - if isinstance(key, strings) and (key.startswith('_') or key == 'deep_distance' or key == 'new_path' - or key == 'old_type' or key == 'old_value'): + if isinstance(key, str) and (key.startswith('_') or key == 'deep_distance' or key == 'new_path' + or key == 'old_type' or key == 'old_value'): continue item_id = id(subitem) @@ -250,7 +253,7 @@ def _get_numbers_distance(num1, num2, max_=1, use_log_scale=False, log_scale_sim return max_ # pragma: no cover -def _numpy_div(a, b, replace_inf_with=1): +def _numpy_div(a, b, replace_inf_with: float=1): max_array = np.full(shape=a.shape, fill_value=replace_inf_with, dtype=np_float64) result = np.divide(a, b, out=max_array, where=b != 0, dtype=np_float64) # wherever 2 numbers are the same, make sure the distance is zero. This is mainly for 0 divided by zero. @@ -284,15 +287,15 @@ def logarithmic_similarity(a: NumberType, b: NumberType, threshold: float=0.1) - def logarithmic_distance(a: NumberType, b: NumberType) -> float: # Apply logarithm to the absolute values and consider the sign - a = float(a) - b = float(b) + a = float(a) # type: ignore[arg-type] + b = float(b) # type: ignore[arg-type] log_a = math.copysign(math.log(abs(a) + MATH_LOG_OFFSET), a) log_b = math.copysign(math.log(abs(b) + MATH_LOG_OFFSET), b) return abs(log_a - log_b) -def _get_numpy_array_distance(num1, num2, max_=1, use_log_scale=False, log_scale_similarity_threshold=0.1): +def _get_numpy_array_distance(num1, num2, max_: float=1, use_log_scale=False, log_scale_similarity_threshold=0.1): """ Get the distance of 2 numbers. The output is a number between 0 to the max. The reason is the diff --git a/deepdiff/helper.py b/deepdiff/helper.py index e8d051ff..3386f020 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -313,7 +313,7 @@ class indexed_set(set): """ -def add_to_frozen_set(parents_ids: FrozenSet[int], item_id: int) -> FrozenSet[int]: +def add_to_frozen_set(parents_ids: FrozenSet[Any], item_id: Any) -> FrozenSet[Any]: return parents_ids | {item_id} @@ -386,14 +386,15 @@ def numpy_dtype_string_to_type(dtype_str: str) -> Type[Any]: return numpy_dtype_str_to_type[dtype_str] -def type_in_type_group(item: Any, type_group: Tuple[Type[Any], ...]) -> bool: +def type_in_type_group(item: Any, type_group: Iterable[Type[Any]]) -> bool: return get_type(item) in type_group -def type_is_subclass_of_type_group(item: Any, type_group: Tuple[Type[Any], ...]) -> bool: - return isinstance(item, type_group) \ - or (isinstance(item, type) and issubclass(item, type_group)) \ - or type_in_type_group(item, type_group) +def type_is_subclass_of_type_group(item: Any, type_group: Iterable[Type[Any]]) -> bool: + type_group_tuple = tuple(type_group) + return isinstance(item, type_group_tuple) \ + or (isinstance(item, type) and issubclass(item, type_group_tuple)) \ + or type_in_type_group(item, type_group_tuple) def get_doc(doc_filename: str) -> str: diff --git a/deepdiff/path.py b/deepdiff/path.py index e5b64c70..9cd766ac 100644 --- a/deepdiff/path.py +++ b/deepdiff/path.py @@ -158,7 +158,7 @@ def _get_nested_obj_and_force(obj, elements, next_element=None): except IndexError: if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj): obj.extend([None] * (elem - len(obj))) - obj.append(_guess_type(elements, elem, index), next_element) + obj.append(_guess_type(elements, elem, index, next_element)) obj = obj[-1] prev_obj = _prev_obj elif isinstance(obj, list) and len(obj) == 0 and prev_elem: @@ -168,7 +168,7 @@ def _get_nested_obj_and_force(obj, elements, next_element=None): if prev_action == GET: prev_obj[prev_elem] = obj else: - setattr(prev_obj, prev_elem, obj) + setattr(prev_obj, str(prev_elem), obj) obj = obj[elem] elif action == GETATTR: obj = getattr(obj, elem) diff --git a/deepdiff/serialization.py b/deepdiff/serialization.py index 24e23922..07be29bd 100644 --- a/deepdiff/serialization.py +++ b/deepdiff/serialization.py @@ -807,7 +807,7 @@ def _convert_oversized_ints(obj): converted = [_convert_oversized_ints(v) for v in obj] if hasattr(obj, '_fields'): # NamedTuple: reconstruct using keyword arguments - return type(obj)(**dict(zip(obj._fields, converted))) + return type(obj)(**dict(zip(obj._fields, converted))) # type: ignore[attr-defined] return type(obj)(converted) return obj From fbb1adbcccb793ec0c29e4acd3cf8bf37f5d4b56 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 13:03:02 -0700 Subject: [PATCH 09/23] fixing the failing tests --- tests/test_cache.py | 10 +++++----- tests/test_serialization.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_cache.py b/tests/test_cache.py index b5e4b658..419b6f7f 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -40,11 +40,11 @@ def test_cache_deeply_nested_a2(self, nested_a_t1, nested_a_t2, nested_a_result) stats = diff.get_stats() # Somehow just in python 3.5 the cache stats are different. Weird. expected_stats = { - 'PASSES COUNT': 3960, - 'DIFF COUNT': 19469, - 'DISTANCE CACHE HIT COUNT': 11847, - 'MAX PASS LIMIT REACHED': False, - 'MAX DIFF LIMIT REACHED': False + "PASSES COUNT": 5324, + "DIFF COUNT": 28020, + "DISTANCE CACHE HIT COUNT": 17243, + "MAX PASS LIMIT REACHED": False, + "MAX DIFF LIMIT REACHED": False, } assert not DeepDiff(expected_stats, stats, use_log_scale=True) assert nested_a_result == diff diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 300ecc76..cb6dd8a3 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -542,7 +542,7 @@ def sig_to_bytes(inp: Dict[str, Union[str, bytes]]): (4, Decimal(2017.1), None), (5, {1, 2, 10}, set), (6, datetime.datetime(2023, 10, 11), datetime.datetime.fromisoformat), - (7, datetime.datetime.utcnow(), datetime.datetime.fromisoformat), + (7, datetime.datetime.now(datetime.UTC), datetime.datetime.fromisoformat), (8, field_stats1, lambda x: SomeStats(**x)), (9, np.array([[ 101, 3533, 1998, 4532, 2024, 3415, 1012, 102]]), np.array), (10, memoryview(b"hello"), lambda x: memoryview(x.encode('utf-8'))), From c7901549e3ba213dd1de5cb9fadacd15472364c6 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 13:42:07 -0700 Subject: [PATCH 10/23] Phase 1 is in. Summary of what landed: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code: - deepdiff/_multiprocessing.py (new) — MPConfig, normalize_mp_config, picklability check, _distance_worker (module-level for spawn), compute_distances_parallel with stable job-index ordering. - deepdiff/diff.py — three new opt-in params, normalized into self._mp_config, propagated via _parameters. New _maybe_compute_pair_distances_parallel helper. One extra dict lookup in _get_most_in_common_pairs_in_iterables before the existing serial _get_rough_distance_of_hashed_objs call. Tests: tests/test_multiprocessing.py (23 tests) — config validation, 10× serial-vs-parallel determinism on nested dicts/repeated items/ties/sets/exclude_paths/ignore_string_case/custom hasher, unpickleable-callback fallback, no-nested-pool guarantee. Full suite: 1149 passed, 10 skipped, 0 regressions. Pyright clean. Doc: docs/multi_processing.md now opens with an "Implementation Status" section listing what's in, the code locations, and what's deferred (subtickets #2/#4/#5/#6 extended matrix/#7) with the reasons each is held back. Two notable design points worth flagging: 1. Workers are spawned without _shared_parameters, so they think they're root and would purge _distance_cache/hashes mid-call. Fixed by passing cache_purge_level=0 to the worker DeepDiff (commented in _distance_worker). 2. Sanitization sets both multiprocessing=False and _mp_config=MPConfig(enabled=False, ...) because recursive DeepDiff with _parameters=... skips the constructor's normalization branch. --- deepdiff/_multiprocessing.py | 220 +++++++++++++++ deepdiff/diff.py | 70 +++++ docs/multi_processing.md | 486 ++++++++++++++++++++++++++++++++++ tests/test_multiprocessing.py | 205 ++++++++++++++ 4 files changed, 981 insertions(+) create mode 100644 deepdiff/_multiprocessing.py create mode 100644 docs/multi_processing.md create mode 100644 tests/test_multiprocessing.py diff --git a/deepdiff/_multiprocessing.py b/deepdiff/_multiprocessing.py new file mode 100644 index 00000000..a011f781 --- /dev/null +++ b/deepdiff/_multiprocessing.py @@ -0,0 +1,220 @@ +""" +Internal multiprocessing helpers for DeepDiff. + +Phase 1 scope: parallelize the (added_hash x removed_hash) rough-distance loop +in ``DeepDiff._get_most_in_common_pairs_in_iterables`` for ``ignore_order=True``. + +Determinism contract (see docs/multi_processing.md): +- Pair selection happens in the parent only. +- Workers compute distances. The parent submits jobs in a stable index order + matching the serial nested loop and merges results by that index. +- Worker completion order (``as_completed``) never affects the public output. + +Only module-level callables live here so the module is safe under the +``spawn`` start method (macOS/Windows). +""" + +import os +import pickle +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional, Tuple, cast + + +DEFAULT_MAX_WORKERS = 4 +DEFAULT_THRESHOLD = 64 + + +@dataclass(frozen=True) +class MPConfig: + """Normalized internal multiprocessing configuration.""" + enabled: bool + workers: int + threshold: int + + def should_parallelize(self, n_jobs: int) -> bool: + return self.enabled and self.workers > 1 and n_jobs >= self.threshold + + +def normalize_mp_config( + multiprocessing: Any, + multiprocessing_workers: Optional[int], + multiprocessing_threshold: Optional[int], +) -> MPConfig: + """Validate and normalize the public multiprocessing parameters. + + ``multiprocessing`` accepts True/False. ``multiprocessing_workers`` accepts + None or a positive int. ``multiprocessing_threshold`` accepts None or a + non-negative int. + """ + if multiprocessing not in (True, False, 0, 1): + raise ValueError( + "multiprocessing must be True or False; got %r" % (multiprocessing,) + ) + enabled = bool(multiprocessing) + + if multiprocessing_workers is None: + cpu = os.cpu_count() or 1 + workers = min(DEFAULT_MAX_WORKERS, cpu) + else: + if not isinstance(multiprocessing_workers, int) or multiprocessing_workers < 1: + raise ValueError( + "multiprocessing_workers must be None or a positive integer; got %r" + % (multiprocessing_workers,) + ) + workers = multiprocessing_workers + + if multiprocessing_threshold is None: + threshold = DEFAULT_THRESHOLD + else: + if not isinstance(multiprocessing_threshold, int) or multiprocessing_threshold < 0: + raise ValueError( + "multiprocessing_threshold must be None or a non-negative integer; got %r" + % (multiprocessing_threshold,) + ) + threshold = multiprocessing_threshold + + return MPConfig(enabled=enabled, workers=workers, threshold=threshold) + + +def is_pickleable(obj: Any) -> bool: + """Return True if ``obj`` round-trips through ``pickle.dumps`` cleanly. + + Used to decide whether parallel execution is safe for a given input. + A False result triggers serial fallback for that section. + """ + try: + pickle.dumps(obj) + return True + except Exception: + return False + + +def _sanitize_parameters_for_worker(parameters: Dict[str, Any]) -> Dict[str, Any]: + """Strip parent-process-only state from a ``_parameters`` snapshot. + + The parent's ``_parameters`` may carry references that should not be reused + inside a worker (mutable shared caches) or that would cause nested + multiprocessing inside the worker. This produces a copy safe to ship. + """ + sanitized = dict(parameters) + # Force serial inside the worker: a nested ProcessPoolExecutor would + # deadlock or just waste process spawn time. Both the public flag and + # the normalized config object must be neutralized — recursive DeepDiff + # calls read ``_mp_config`` directly when ``_parameters`` is supplied. + sanitized['multiprocessing'] = False + sanitized['_mp_config'] = MPConfig(enabled=False, workers=1, threshold=0) + sanitized.pop('_distance_cache', None) + sanitized.pop('hashes', None) + sanitized.pop('_numpy_paths', None) + sanitized.pop('_stats', None) + sanitized.pop('group_by_keys', None) + sanitized.pop('tree', None) + sanitized.pop('_iterable_opcodes', None) + sanitized.pop('is_root', None) + return sanitized + + +def _distance_worker(job: Tuple[int, Dict[str, Any], Any, Any, Any, Any]) -> Tuple[int, float]: + """Compute the rough distance between two items in a worker process. + + ``job`` layout matches what ``compute_distances_parallel`` ships: + ``(job_index, sanitized_parameters, removed_item, added_item, + original_type, iterable_compare_func)``. + + The worker constructs a fresh root ``DeepDiff`` (no shared parent state), + requests the DELTA_VIEW so we hit the same code path as the serial call in + ``_get_rough_distance_of_hashed_objs``, and returns the resulting float. + """ + # Imported here to keep module import cheap and to dodge any circular + # import surprises under spawn. + from deepdiff.diff import DeepDiff + from deepdiff.helper import DELTA_VIEW + + job_index, parameters, removed_item, added_item, original_type, iterable_compare_func = job + diff = DeepDiff( + removed_item, + added_item, + _parameters=parameters, + view=DELTA_VIEW, + _original_type=original_type, + iterable_compare_func=iterable_compare_func, + # The worker is spawned without _shared_parameters, so DeepDiff treats + # it as a root run and would purge ``_distance_cache``/``hashes`` at + # the end of __init__. We need them alive for the _get_rough_distance + # call below, hence cache_purge_level=0. + cache_purge_level=0, + ) + return job_index, cast(float, diff._get_rough_distance()) + + +def compute_distances_parallel( + jobs: List[Tuple[Any, Any, Any, Any]], + parameters: Dict[str, Any], + original_type: Any, + iterable_compare_func: Optional[Callable], + config: MPConfig, +) -> Optional[Dict[Tuple[Any, Any], float]]: + """Run ``_distance_worker`` over ``jobs`` and return distances by pair. + + ``jobs`` is a list of ``(added_hash, removed_hash, added_item, removed_item)`` + tuples in the exact order the serial nested loop visits them. The parent + is responsible for that ordering; this helper does not reorder anything. + + Returns: + A dict ``{(added_hash, removed_hash): distance}``, or ``None`` if the + section is unsafe to parallelize (unpickleable inputs/parameters, + worker import error, etc.). On ``None`` the caller MUST fall back to + the serial path so correctness is preserved. + + Workers may finish out of order; we collect results into a dict keyed by + the original job index, so callers see the same result regardless of + completion order. + """ + if not jobs: + return {} + + sanitized_params = _sanitize_parameters_for_worker(parameters) + + # Picklability check. Failing fast here means a clear serial fallback + # rather than an opaque worker crash. + if not is_pickleable(sanitized_params): + return None + if iterable_compare_func is not None and not is_pickleable(iterable_compare_func): + return None + # Sample-pickle items: full check of every job is expensive, but pickling + # the first job catches the common "lambda in custom_operators" failure + # while keeping overhead bounded. + if not is_pickleable(jobs[0]): + return None + + # Imported lazily so importing this module does not pay the cost when + # multiprocessing is disabled. + from concurrent.futures import ProcessPoolExecutor, as_completed + + payloads = [] + for i, job in enumerate(jobs): + added_item = job[2] + removed_item = job[3] + payloads.append( + (i, sanitized_params, removed_item, added_item, original_type, iterable_compare_func) + ) + + results_by_index: Dict[int, float] = {} + try: + with ProcessPoolExecutor(max_workers=config.workers) as executor: + futures = [executor.submit(_distance_worker, payload) for payload in payloads] + for future in as_completed(futures): + # Re-raise worker exceptions in the parent so they surface as + # normal DeepDiff exceptions instead of being swallowed. + idx, distance = future.result() + results_by_index[idx] = distance + except (pickle.PicklingError, AttributeError, TypeError): + # Pickling/spawn-related failures: surface as a serial fallback rather + # than crashing the diff. Other exceptions (worker logic bugs, user + # callback errors) propagate. + return None + + out: Dict[Tuple[Any, Any], float] = {} + for i, job in enumerate(jobs): + out[(job[0], job[1])] = results_by_index[i] + return out diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 81c9344a..3dcb633f 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -43,6 +43,9 @@ from deepdiff.base import Base from deepdiff.lfucache import LFUCache, DummyLFU from deepdiff.colored_view import ColoredView +from deepdiff._multiprocessing import ( + MPConfig, normalize_mp_config, compute_distances_parallel, +) if TYPE_CHECKING: from pytz.tzinfo import BaseTzInfo @@ -182,6 +185,9 @@ def __init__(self, math_epsilon: Optional[float]=None, max_diffs: Optional[int]=None, max_passes: int=10000000, + multiprocessing: bool=False, + multiprocessing_workers: Optional[int]=None, + multiprocessing_threshold: Optional[int]=None, number_format_notation: Literal["f", "e"]="f", number_to_string_func: Optional[Callable]=None, progress_logger: Callable[[str], None]=logger.info, @@ -210,6 +216,7 @@ def __init__(self, "cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, log_stacktrace," "math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, default_timezone " "ignore_order_func, custom_operators, encodings, ignore_encoding_errors, use_log_scale, log_scale_similarity_threshold " + "multiprocessing, multiprocessing_workers, multiprocessing_threshold, " "_parameters and _shared_parameters.") % ', '.join(kwargs.keys())) if _parameters: @@ -302,6 +309,8 @@ def _group_by_sort_key(x): # DeepDiff _parameters are transformed to DeepHash _parameters via _get_deephash_params method. self.progress_logger = progress_logger self.cache_size = cache_size + self._mp_config = normalize_mp_config( + multiprocessing, multiprocessing_workers, multiprocessing_threshold) _parameters = self.__dict__.copy() _parameters['group_by'] = None # overwriting since these parameters will be passed on to other passes. if log_stacktrace: @@ -1233,6 +1242,57 @@ def _get_rough_distance_of_hashed_objs( self._distance_cache.set(cache_key, value=_distance) return _distance + def _maybe_compute_pair_distances_parallel( + self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, + parents_ids, _original_type, pre_calced_distances): + """Optionally run distance computation for non-cached pairs in workers. + + Returns a dict ``{(added_hash, removed_hash): distance}`` for pairs + whose distance was computed in parallel, or ``None`` if the section + ran serially (below threshold, unsafe inputs, no _mp_config, etc.). + + The job list is built in the exact order of the serial nested loop + so the parent merge order is identical regardless of how many workers + run or which one finishes first. + """ + mp_config = getattr(self, '_mp_config', None) + if mp_config is None or not mp_config.enabled: + return None + + # Build candidate job list in stable nested-loop order. We skip pairs + # that the serial loop also skips (loop detection, pre-calculated + # distance, distance cache hit) so workers only get real work. + jobs = [] + cache_enabled = self._stats[DISTANCE_CACHE_ENABLED] + for added_hash in hashes_added: + for removed_hash in hashes_removed: + added_hash_obj = t2_hashtable[added_hash] + removed_hash_obj = t1_hashtable[removed_hash] + if id(removed_hash_obj.item) in parents_ids: + continue + if pre_calced_distances and pre_calced_distances.get( + "{}--{}".format(added_hash, removed_hash)) is not None: + continue + if cache_enabled: + cache_key = self._get_distance_cache_key(added_hash, removed_hash) + if self._distance_cache.get(cache_key) is not not_found: + # Serial path will pull this from cache; no worker + # needed and we keep cache-hit accounting in the + # parent. + continue + jobs.append((added_hash, removed_hash, added_hash_obj.item, removed_hash_obj.item)) + + if not mp_config.should_parallelize(len(jobs)): + return None + + return compute_distances_parallel( + jobs=jobs, + parameters=self._parameters, + original_type=_original_type, + iterable_compare_func=self.iterable_compare_func, + config=mp_config, + ) + def _get_most_in_common_pairs_in_iterables( self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type): """ @@ -1287,6 +1347,14 @@ def defaultdict_orderedset(): pre_calced_distances = self._precalculate_distance_by_custom_compare_func( hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) + # Optionally precompute non-cached distances in worker processes. + # Returns a dict keyed by (added_hash, removed_hash). Pair selection + # below stays serial and walks the same nested loop order, so the + # public output is independent of worker completion order. + parallel_distances = self._maybe_compute_pair_distances_parallel( + hashes_added, hashes_removed, t1_hashtable, t2_hashtable, + parents_ids, _original_type, pre_calced_distances) + for added_hash in hashes_added: for removed_hash in hashes_removed: added_hash_obj = t2_hashtable[added_hash] @@ -1299,6 +1367,8 @@ def defaultdict_orderedset(): _distance = None if pre_calced_distances: _distance = pre_calced_distances.get("{}--{}".format(added_hash, removed_hash)) + if _distance is None and parallel_distances is not None: + _distance = parallel_distances.get((added_hash, removed_hash)) if _distance is None: _distance = self._get_rough_distance_of_hashed_objs( added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type) diff --git a/docs/multi_processing.md b/docs/multi_processing.md new file mode 100644 index 00000000..7a994c82 --- /dev/null +++ b/docs/multi_processing.md @@ -0,0 +1,486 @@ +# Ticket: Add Deterministic Internal Multiprocessing for DeepDiff and DeepHash + +## Implementation Status + +**Phase 1 — landed (2026-04-27).** Subtickets #1 (config + safety fallback) and #3 +(parallel rough-distance loop) are implemented. Subtickets #2, #4, #5, #6 (extended +matrix), and #7 are still open. + +What works today: + +- `DeepDiff(..., multiprocessing=True, multiprocessing_workers=N, multiprocessing_threshold=K)`. + Defaults are `False`, `min(4, cpu_count())`, and 64 jobs respectively. Defaults to + off, so existing users see no behavior change. +- The `(added_hash, removed_hash)` distance loop in + `_get_most_in_common_pairs_in_iterables` (the `ignore_order=True` hot path) is + optionally parallelized through `concurrent.futures.ProcessPoolExecutor`. + Workers compute distances only; pair selection runs in the parent in the same + serial nested-loop order, so worker completion order never reaches the + output. +- Safe by construction: pre-calculated distances and distance-cache hits are + filtered out in the parent before jobs are dispatched. Workers run with + `cache_purge_level=0` and a sanitized `_parameters` snapshot + (`multiprocessing=False`, `_mp_config` disabled, no shared mutable caches), + so they cannot fork-bomb or write back to parent state. +- Picklability of the parameters dict, the iterable compare func, and a + representative job is checked up front. Any failure causes a clean serial + fallback rather than an opaque worker crash. +- 23 determinism / fallback tests in `tests/test_multiprocessing.py` (10x + serial-vs-parallel comparison, tied distances, repeated items in both + `report_repetition` modes, sets, exclude_paths, ignore_string_case, custom + module-level hasher, lambda compare-func fallback, recursive-no-nesting). + All 1149 existing tests still pass. + +Code locations: + +- `deepdiff/_multiprocessing.py` — `MPConfig`, `normalize_mp_config`, + `is_pickleable`, `_distance_worker` (module-level for `spawn`), + `compute_distances_parallel`. +- `deepdiff/diff.py::DeepDiff.__init__` — three new parameters, normalized into + `self._mp_config`, propagated through `_parameters`. +- `deepdiff/diff.py::DeepDiff._maybe_compute_pair_distances_parallel` — the + per-call decision/dispatch helper. +- `deepdiff/diff.py::DeepDiff._get_most_in_common_pairs_in_iterables` — gains + one extra lookup before `_get_rough_distance_of_hashed_objs`. + +Not yet implemented (deferred, intentional): + +- **Subticket #2** — parallel `_create_hashtable` / `_prep_iterable` / + `_prep_dict`. The doc itself flags cycle-handling and identity-after-pickle + risks; these need their own test pass. +- **Subticket #4** — subtree diff parallelism after pairing. `DiffLevel` + pickling and custom-operator interaction require dedicated work. +- **Subticket #5** — multiprocessing-aware stats semantics. Parent-only stats + remain meaningful in Phase 1, but no aggregation across workers. +- **Subticket #6** — extended test matrix (numpy, pydantic, namedtuple, group_by, + large-mixed structures, worker exception propagation tests). Phase 1 ships + the core determinism harness; the rest is additive. +- **Subticket #7** — benchmarks. The doc says default thresholds shouldn't + change before benchmarks land; the current `DEFAULT_THRESHOLD = 64` is a + conservative placeholder. + +--- + +## Goal + +Add an opt-in internal multiprocessing mode that can speed up expensive deep hashing and diffing workloads while keeping the final DeepDiff/DeepHash outcome deterministic. + +The most important target is `DeepDiff(..., ignore_order=True)`, because that mode often spends the most time hashing iterable items, calculating candidate pair distances, and recursively diffing nested structures. + +The result of a multiprocessing run must be the same as a single-process run for supported inputs. Worker completion order must never affect reports, matching decisions, paths, or output ordering. + +## Non-Goals + +- Do not make the whole recursive engine concurrently mutate one `DeepDiff` instance. +- Do not share `self.tree`, `self.hashes`, `_distance_cache`, or `_stats` directly between worker processes. +- Do not make `max_diffs` and `max_passes` exact replicas of serial accounting. They are stop guards. It is acceptable for their counts to differ in multiprocessing mode as long as they still cap runaway work. +- Do not silently parallelize unsafe callables. If callbacks, custom operators, hashers, or compare functions cannot be safely pickled or executed in workers, fall back to serial behavior or disable only the unsafe parallel section. + +## Current Baseline + +DeepDiff is already safe to call from multiple separate processes as independent top-level calls. See: + +- `tests/test_diff_other.py::TestDiffOther::test_multi_processing1` +- `tests/test_diff_other.py::TestDiffOther::test_multi_processing2_with_ignore_order` +- `tests/test_diff_other.py::TestDiffOther::test_multi_processing3_deephash` + +Those tests do not cover internal multiprocessing inside one `DeepDiff` run. This ticket is about one DeepDiff invocation splitting part of its own work across workers. + +Important implementation points in the current code: + +- `deepdiff/diff.py::DeepDiff.__init__` creates shared mutable state for one diff run: + - `self.tree` + - `self.hashes` + - `self._distance_cache` + - `self._stats` + - `self.group_by_keys` + - `self._numpy_paths` +- `deepdiff/diff.py::_diff` is the main recursive dispatcher. +- `deepdiff/diff.py::_diff_iterable_with_deephash` is the main expensive path for `ignore_order=True`. +- `deepdiff/diff.py::_create_hashtable` hashes iterable items via `DeepHash`. +- `deepdiff/diff.py::_get_most_in_common_pairs_in_iterables` calculates distances between added and removed hashes, then serially chooses pairs. +- `deepdiff/deephash.py::_hash`, `_prep_dict`, and `_prep_iterable` recursively hash child objects. +- Result reporting goes through `deepdiff/diff.py::_report_result`, which writes to `TreeResult` containers backed by `SetOrdered`. + +## Determinism Contract + +Multiprocessing mode must obey these invariants: + +1. A supported multiprocessing run must produce the same public DeepDiff result as the equivalent serial run. +2. Pair selection in `ignore_order=True` must be independent of worker completion order. +3. Result merge order must be based on serial traversal order, not `as_completed()` order. +4. Hash aggregation order must match existing semantics: + - dictionaries and unordered iterables still sort the hash components where the current implementation sorts them. + - ordered iterable hashing must preserve item index order when order matters. +5. Workers must not mutate parent process state. +6. Any worker exception must surface as a normal DeepDiff exception, not be swallowed or turned into partial output. +7. Multiprocessing mode must have a reliable serial fallback for unsupported or unsafe inputs. + +## Proposed API + +Add conservative, opt-in parameters to `DeepDiff` and possibly `DeepHash`. + +Suggested names: + +```python +DeepDiff( + t1, + t2, + multiprocessing=False, + multiprocessing_workers=None, + multiprocessing_threshold=None, +) +``` + +Open design choice: `multiprocessing` may also accept an integer worker count. If so, keep the API unambiguous and document it. + +Suggested behavior: + +- `multiprocessing=False`: existing serial behavior. +- `multiprocessing=True`: use `os.cpu_count()` or a conservative default such as `min(4, os.cpu_count() or 1)`. +- `multiprocessing_workers=N`: explicit worker count. +- `multiprocessing_threshold`: minimum amount of work before spawning tasks. Default should avoid slowing small diffs. + +The first implementation can keep the parameters private or experimental if preferred, but tests should exercise them explicitly. + +## Architecture + +Use multiprocessing only around deterministic batches of independent work. The parent process owns traversal decisions, pair selection, result merging, stats finalization, and public result conversion. + +Recommended internal structure: + +- A small execution helper module or class, for example `deepdiff/multiprocessing.py` or private helpers in `diff.py`. +- A worker input dataclass or plain dict containing: + - job kind + - stable job index + - path string + - t1/t2 or item object + - sanitized DeepDiff/DeepHash parameters + - relevant context such as `_original_type` +- A worker output dataclass or plain dict containing: + - job index + - path string + - computed hash/result/distance/local tree + - local stats + - exception details if needed + +Do not return live `DiffLevel` objects across process boundaries unless tests prove they pickle reliably and preserve path behavior. Prefer returning plain serializable data for hash and distance tasks. For subtree diff tasks, returning a `TreeResult` may work but must be tested heavily; a safer approach is to return text/delta-style plain result data and merge at the parent. + +## Subtickets + +### 1. Add Multiprocessing Configuration and Serial Fallback + +Implement opt-in configuration without changing serial behavior. + +Tasks: + +- Add constructor parameters to `DeepDiff`. +- Store normalized multiprocessing settings in `_parameters` so recursive child `DeepDiff` instances receive the same configuration where appropriate. +- Add validation: + - worker count must be `None` or a positive integer. + - threshold must be `None` or a non-negative integer. +- Add a helper that decides whether a section may parallelize. +- Add a helper that detects unsafe worker state: + - unpickleable `custom_operators` + - unpickleable `hasher` + - unpickleable `exclude_obj_callback` + - unpickleable `include_obj_callback` + - unpickleable `ignore_order_func` + - unpickleable `iterable_compare_func` + - objects that fail pickling +- If unsafe, fall back to serial for that section. + +Acceptance criteria: + +- All existing tests pass with default parameters. +- `DeepDiff(..., multiprocessing=False)` is exactly the current path. +- Unsupported multiprocessing inputs fall back to serial or raise a clear documented error if fallback is not possible. + +### 2. Parallelize DeepHash Child Hashing + +Start with hashing because parent hash aggregation is already naturally deterministic when child hashes are gathered and combined in serial order. + +Candidate locations: + +- `deepdiff/deephash.py::_prep_iterable` +- `deepdiff/deephash.py::_prep_dict` +- `deepdiff/diff.py::_create_hashtable` + +Recommended first implementation: + +- Parallelize `_create_hashtable` for large iterables in `ignore_order=True`. +- Create one job per item, including the item index and parent path. +- Each worker runs `DeepHash(item, hashes=None, parent=parent, apply_hash=True, **deephash_parameters)`. +- Parent sorts outputs by original item index before calling `_add_hash`. +- Parent may merge returned object hashes into `self.hashes` only in deterministic job-index order. + +Risks: + +- Shared `self.hashes` currently avoids recalculating repeated object hashes. Worker-local hashing loses some cache reuse. +- Some objects cannot be pickled. +- Object identity and cycles may not behave the same after pickling. + +Mitigations: + +- Enable only above a threshold where process overhead is likely worth it. +- Detect pickling failures and use serial hashing. +- Add cycle tests before enabling parallel hashing for arbitrary recursive objects. Until then, fall back to serial when cycles are detected or suspected. + +Acceptance criteria: + +- Serial and multiprocessing results match for large lists of dicts, lists of lists, sets, repeated items, and nested mixed structures. +- Result order matches serial output. +- Tests include both `report_repetition=False` and `report_repetition=True`. + +### 3. Parallelize Ignore-Order Distance Calculation + +This is likely the highest-value optimization for `ignore_order=True`. + +Candidate location: + +- `deepdiff/diff.py::_get_most_in_common_pairs_in_iterables` + +Current serial shape: + +1. Build `hashes_added` and `hashes_removed`. +2. Calculate rough distances for candidate `(added_hash, removed_hash)` pairs. +3. Store candidates under `most_in_common_pairs`. +4. Select final pairs serially by ascending distance and `SetOrdered` iteration behavior. + +Required deterministic design: + +- Parent creates candidate pair jobs in a stable nested-loop order matching current code: + - outer loop: `hashes_added` + - inner loop: `hashes_removed` +- Workers compute only distance for one or more candidate pairs. +- Parent receives distance outputs and sorts by original job index before inserting into `most_in_common_pairs`. +- Parent runs the final pairing algorithm serially and unchanged as much as possible. + +Do not let workers choose pairs. + +Risks: + +- Worker-local `_distance_cache` changes cache hit statistics and performance shape. +- `DeepDiff(..., view=DELTA_VIEW)` inside `_get_rough_distance_of_hashed_objs` must receive equivalent parameters. +- `iterable_compare_func` may be unpickleable or side-effectful. +- Floating-point distances must compare the same after process boundaries. + +Mitigations: + +- Cache stats do not need to match exactly, but final results must. +- Fall back to serial when `iterable_compare_func` is unsafe. +- Keep the final `sorted(distances_to_from_hashes.keys())` pairing step in the parent. +- Add tests that run the same multiprocessing diff many times and compare with serial output. + +Acceptance criteria: + +- `ignore_order=True` output matches serial for all existing `tests/test_ignore_order.py` cases where multiprocessing mode is enabled. +- Repeated runs with multiprocessing produce identical output. +- Tests include collisions/ties where multiple candidate pairs have the same rough distance. + +### 4. Parallelize Selected Subtree Diffs After Pairing + +Once `ignore_order=True` pairing is fixed, paired item diffs can be farmed out in some cases. + +Candidate locations: + +- `deepdiff/diff.py::_diff_iterable_with_deephash` +- `deepdiff/diff.py::_diff_by_forming_pairs_and_comparing_one_by_one` +- dictionary shared-key child comparisons in `_diff_dict` + +Recommended approach: + +- Parent first determines the exact child jobs in serial traversal order. +- Workers compute local diffs for child pairs. +- Parent merges child results in job index order. + +Important: do not parallelize parent-level reporting of added/removed items by completion order. Parent should report or merge in the same order serial traversal would have used. + +Risks: + +- `DiffLevel` paths and `up/down` links may not be safe to construct in one process and merge in another. +- `TreeResult` contains `DiffLevel` objects and `SetOrdered`; pickling and equality need explicit tests. +- Custom operators can call `custom_report_result` and mutate the diff instance. + +Mitigations: + +- Initially disable subtree parallelism when custom operators are present. +- Prefer plain result payloads over cross-process `DiffLevel` objects if pickling proves fragile. +- Keep `values_changed`, `iterable_item_added`, `iterable_item_removed`, and `type_changes` merge logic centralized in the parent. + +Acceptance criteria: + +- Serial and multiprocessing output match for text view, tree view, delta view where supported, and verbose levels 0, 1, and 2. +- Existing delta tests pass if subtree multiprocessing is enabled for delta-compatible cases. +- Custom operators either work deterministically or force serial fallback. + +### 5. Stats, Limits, and Progress Logging + +Multiprocessing stats do not need to be byte-for-byte identical to serial stats, but they must remain meaningful. + +Tasks: + +- Define stats semantics for multiprocessing: + - parent diff count + - worker diff count aggregate + - worker pass count aggregate + - cache hits from parent only, or aggregate worker-local hits separately +- Keep `max_diffs` and `max_passes` as approximate stop guards. +- Ensure workers can stop early if a shared or parent-supplied budget is exhausted. +- Do not run one progress timer per worker. + +Suggested behavior: + +- Parent owns the progress timer. +- Worker stats are returned and merged after each batch. +- If `max_diffs` or `max_passes` is reached in parent or aggregated worker stats, stop scheduling new work and report the existing warning. + +Acceptance criteria: + +- `get_stats()` still returns the existing keys. +- Existing `max_diffs` and `max_passes` tests still pass in serial mode. +- Multiprocessing mode has tests showing limits stop runaway work, without requiring exact serial counts. + +### 6. Test Matrix for Determinism and Flake Prevention + +Add tests that compare serial and multiprocessing outputs directly. + +Required test categories: + +- `ignore_order=True`, nested lists of dicts. +- `ignore_order=True`, repeated items with `report_repetition=True`. +- `ignore_order=True`, repeated items with `report_repetition=False`. +- Tied candidate distances where more than one pairing is plausible. +- Large mixed structures that trigger the multiprocessing threshold. +- Sets and frozensets. +- Custom `hasher`. +- `ignore_string_case`, `ignore_numeric_type_changes`, `ignore_string_type_changes`. +- `exclude_paths`, `include_paths`, and regex path exclusions. +- `group_by` and `group_by_sort_key`. +- Numpy arrays if numpy is available. +- Objects with `__dict__`, `__slots__`, namedtuple, and pydantic objects if the existing optional dependency setup supports it. +- Pickle failure fallback. +- Worker exception propagation. + +Determinism test pattern: + +```python +serial = DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1) +for _ in range(20): + parallel = DeepDiff( + t1, + t2, + ignore_order=True, + cutoff_intersection_for_pairs=1, + multiprocessing=True, + multiprocessing_workers=4, + multiprocessing_threshold=0, + ) + assert parallel == serial +``` + +Also compare `parallel.to_dict()` or equivalent public representation for views where direct object equality is too sensitive. + +### 7. Benchmarks + +Add benchmark coverage before tuning thresholds. + +Candidate workloads: + +- Large list of nested dictionaries with `ignore_order=True`. +- Existing benchmark shapes referenced in `docs/optimizations.rst`: + - deeply nested object with cache disabled/enabled + - large array-like structures + - big JSON-like blobs +- Large iterable where many added/removed items require rough distance pairing. + +Measure: + +- wall time +- peak memory if available +- process spawn overhead +- pickle time if practical +- speedup vs serial +- correctness vs serial result + +Acceptance criteria: + +- Multiprocessing mode is not enabled by default until benchmarks show a clear win for targeted workloads. +- Default threshold avoids slowdowns on small inputs. + +## Implementation Notes + +### Stable Job Ordering + +Every batch must assign a monotonically increasing `job_index` before submitting work. Parent code must merge by `job_index`. + +Do not use `as_completed()` order except to collect results into a temporary map. + +### Pairing in `ignore_order=True` + +The final pair-selection algorithm is part of the observable behavior. Keep it serial. + +Workers may compute distances, but the parent must insert distances into `most_in_common_pairs` in the same order the serial nested loops would have inserted them. This matters when distances tie. + +### Caches + +Avoid process-shared mutable caches in the first implementation. + +Accept that worker-local hashing/distance calculation may reduce cache reuse. A later optimization can add a deterministic parent-owned cache merge, but correctness should come first. + +If merging hash cache entries from workers: + +- merge in job index order. +- do not overwrite an existing parent entry with a different value. +- add tests for repeated equal-but-not-identical objects. + +### Pickling and Start Methods + +Use the standard library `concurrent.futures.ProcessPoolExecutor`. + +Do not assume Linux `fork` behavior. The implementation should work with `spawn`, especially for macOS and Windows users. + +This means worker functions must be module-level functions, not nested closures. + +### Thresholds + +Multiprocessing should only run when there is enough work to offset serialization and process overhead. + +Possible heuristics: + +- iterable length above a threshold. +- candidate distance pair count above a threshold. +- estimated nested item count from `DeepHash` count data. + +Start conservative. Add benchmarks before changing defaults. + +### Unsupported Inputs + +Fallback to serial for: + +- unpickleable objects. +- unpickleable callables. +- active custom operators unless explicitly tested. +- detected cycles until cycle behavior is proven equivalent. +- generator inputs, because multiprocessing may consume or pickle them differently. + +## Risks + +- **Non-deterministic pair choices**: if distance jobs are merged by completion order, tied distances can produce different pairings. Mitigation: stable job indices and serial parent pairing. +- **Different object identity after pickling**: cycle detection and identity-sensitive behavior may change in workers. Mitigation: fallback for cycles and tests for self-referential inputs. +- **Callback side effects**: callbacks and custom operators may depend on process-local state or mutate global state. Mitigation: fallback unless proven safe. +- **Result ordering drift**: `TreeResult` and `TextResult` depend on insertion order through `SetOrdered`. Mitigation: parent-only ordered merge. +- **Cache behavior drift**: multiprocessing changes cache locality and stats. Mitigation: do not require exact stats equality; require result equality. +- **Memory growth**: large objects must be pickled and copied into workers. Mitigation: thresholds and benchmarks. +- **Platform differences**: `fork` can hide pickling issues that fail under `spawn`. Mitigation: tests should force or simulate spawn where possible. + +## Definition of Done + +- Multiprocessing is opt-in. +- Default serial behavior is unchanged. +- `ignore_order=True` multiprocessing results match serial results across the new determinism test matrix. +- Repeated multiprocessing runs are stable. +- Unsupported inputs fall back to serial or raise a clear documented error. +- Tests cover worker exception propagation and pickle fallback. +- Benchmarks demonstrate speedup for at least one realistic `ignore_order=True` workload. +- Documentation explains the experimental status, supported cases, and known limitations. diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py new file mode 100644 index 00000000..648949b0 --- /dev/null +++ b/tests/test_multiprocessing.py @@ -0,0 +1,205 @@ +"""Determinism and safety tests for internal multiprocessing. + +Phase 1 covers the parallel rough-distance loop in +``DeepDiff._get_most_in_common_pairs_in_iterables`` (the ``ignore_order=True`` +path). Each parallel run is compared against the equivalent serial run; on +ties or many candidate pairs the merge order must come from the parent's +serial nested loop, not from worker completion order. + +We use ``multiprocessing_threshold=0`` to force the parallel path even on +small inputs, then loop the run multiple times to flush out any +non-determinism. +""" + +import pytest + +from deepdiff import DeepDiff +from deepdiff._multiprocessing import ( + MPConfig, + normalize_mp_config, + is_pickleable, + compute_distances_parallel, +) + + +REPEATS = 10 # tradeoff between flake-detection and CI time + + +def _run_parallel(t1, t2, **kwargs): + return DeepDiff( + t1, t2, + multiprocessing=True, + multiprocessing_workers=4, + multiprocessing_threshold=0, + **kwargs, + ) + + +class TestMPConfig: + + def test_disabled_by_default(self): + cfg = normalize_mp_config(False, None, None) + assert cfg.enabled is False + assert cfg.should_parallelize(10_000) is False + + def test_enabled_default_workers(self): + cfg = normalize_mp_config(True, None, None) + assert cfg.enabled is True + assert cfg.workers >= 1 + + def test_explicit_workers(self): + cfg = normalize_mp_config(True, 3, None) + assert cfg.workers == 3 + + def test_threshold_gates_parallelism(self): + cfg = normalize_mp_config(True, 4, 100) + assert cfg.should_parallelize(50) is False + assert cfg.should_parallelize(100) is True + + def test_invalid_workers(self): + with pytest.raises(ValueError): + normalize_mp_config(True, 0, None) + with pytest.raises(ValueError): + normalize_mp_config(True, -1, None) + + def test_invalid_threshold(self): + with pytest.raises(ValueError): + normalize_mp_config(True, None, -1) + + def test_invalid_multiprocessing_value(self): + with pytest.raises(ValueError): + normalize_mp_config("yes", None, None) # type: ignore[arg-type] + + def test_single_worker_does_not_parallelize(self): + cfg = MPConfig(enabled=True, workers=1, threshold=0) + assert cfg.should_parallelize(10_000) is False + + +class TestParamWiring: + + def test_default_serial_path_unchanged(self): + t1 = [{"a": 1}, {"a": 2}] + t2 = [{"a": 2}, {"a": 1}] + # No multiprocessing parameter at all — must hit the existing path. + assert DeepDiff(t1, t2, ignore_order=True) == {} + + def test_explicit_multiprocessing_false(self): + t1 = [1, 2, 3] + t2 = [3, 2, 1] + assert DeepDiff(t1, t2, ignore_order=True, multiprocessing=False) == {} + + def test_invalid_workers_surfaces_at_diff_level(self): + with pytest.raises(ValueError): + DeepDiff([1], [2], multiprocessing=True, multiprocessing_workers=0) + + +class TestDeterminism: + """Each test compares serial vs. parallel many times. Any drift is a bug.""" + + def _assert_determinism(self, t1, t2, **kwargs): + kwargs.setdefault("ignore_order", True) + kwargs.setdefault("cutoff_intersection_for_pairs", 1) + serial = DeepDiff(t1, t2, **kwargs) + for _ in range(REPEATS): + parallel = _run_parallel(t1, t2, **kwargs) + assert parallel == serial, ( + "parallel != serial after run; difference: %r vs %r" + % (parallel, serial) + ) + + def test_nested_lists_of_dicts(self): + t1 = [{"id": i, "data": {"x": i * 2, "y": [i, i + 1]}} for i in range(20)] + t2 = [{"id": i, "data": {"x": i * 2 + (1 if i % 5 == 0 else 0), "y": [i, i + 1]}} + for i in range(20)] + self._assert_determinism(t1, t2) + + def test_repeated_items_report_repetition_false(self): + t1 = [1, 1, 1, 2, 3, 3] + t2 = [3, 1, 2, 2, 4] + self._assert_determinism(t1, t2, report_repetition=False) + + def test_repeated_items_report_repetition_true(self): + t1 = [1, 1, 1, 2, 3, 3] + t2 = [3, 1, 2, 2, 4] + self._assert_determinism(t1, t2, report_repetition=True) + + def test_tied_distances(self): + # Multiple candidate pairs with the same rough distance. Worker-order + # merge would surface here as flapping pairings between runs. + t1 = [{"k": "a", "v": 1}, {"k": "b", "v": 1}, {"k": "c", "v": 1}] + t2 = [{"k": "a", "v": 2}, {"k": "b", "v": 2}, {"k": "c", "v": 2}] + self._assert_determinism(t1, t2) + + def test_sets(self): + t1 = {frozenset({1, 2}), frozenset({3, 4}), frozenset({5, 6})} + t2 = {frozenset({1, 2}), frozenset({3, 5}), frozenset({7, 8})} + self._assert_determinism(t1, t2) + + def test_exclude_paths(self): + t1 = [{"id": i, "secret": i * 100, "v": i} for i in range(10)] + t2 = [{"id": i, "secret": i * 999, "v": i + (1 if i == 5 else 0)} for i in range(10)] + self._assert_determinism(t1, t2, exclude_paths=["root[0]['secret']"]) + + def test_ignore_string_case(self): + t1 = [{"name": "Alice"}, {"name": "Bob"}, {"name": "Carol"}] + t2 = [{"name": "alice"}, {"name": "bob"}, {"name": "DAVE"}] + self._assert_determinism(t1, t2, ignore_string_case=True) + + def test_custom_pickleable_hasher(self): + # Module-level callable below is pickleable; lambdas are not. + self._assert_determinism( + [{"x": 1}, {"x": 2}, {"x": 3}], + [{"x": 1}, {"x": 4}, {"x": 5}], + hasher=_simple_hasher, + ) + + +class TestSafetyFallback: + """Unsafe inputs must not crash; they fall back to serial.""" + + def test_unpickleable_iterable_compare_func_falls_back(self): + # A lambda is not pickleable. The parallel section must give up and + # the result must still match a serial run. + t1 = [{"k": 1, "v": "a"}, {"k": 2, "v": "b"}] + t2 = [{"k": 1, "v": "a"}, {"k": 2, "v": "c"}] + cmp = lambda x, y: x["k"] == y["k"] # noqa: E731 + serial = DeepDiff(t1, t2, ignore_order=True, iterable_compare_func=cmp) + parallel = _run_parallel(t1, t2, ignore_order=True, iterable_compare_func=cmp) + assert parallel == serial + + def test_is_pickleable_helper(self): + assert is_pickleable({"a": 1}) is True + assert is_pickleable(lambda x: x) is False + + def test_compute_distances_parallel_returns_none_on_unpickleable_compare_func(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + # Empty params dict pickles fine; the lambda compare func does not. + result = compute_distances_parallel( + jobs=[("h1", "h2", {"x": 1}, {"x": 2})], + parameters={"foo": "bar"}, + original_type=None, + iterable_compare_func=lambda *args, **kwargs: None, + config=cfg, + ) + assert result is None + + +class TestRecursiveNoNesting: + """The worker must disable its own multiprocessing so we don't fork-bomb.""" + + def test_worker_subdiff_runs_serial(self): + # The worker invokes DeepDiff(item1, item2, _parameters=sanitized). + # Sanitization sets _mp_config to disabled; if it didn't, this nested + # workload would either deadlock or be very slow under spawn. The + # bound on REPEATS plus pytest's default timeout keeps that visible. + t1 = [{"deep": {"deeper": {"deepest": [i, i + 1, i + 2]}}} for i in range(8)] + t2 = [{"deep": {"deeper": {"deepest": [i, i + 1, i + 3]}}} for i in range(8)] + serial = DeepDiff(t1, t2, ignore_order=True) + parallel = _run_parallel(t1, t2, ignore_order=True) + assert parallel == serial + + +# Module-level helper so it pickles cleanly under the spawn start method. +def _simple_hasher(obj, *args, **kwargs): + import hashlib + return hashlib.sha1(repr(obj).encode("utf-8")).hexdigest() From 0450c8de0cf0bc741c561f776bbb367c20864774 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 14:26:50 -0700 Subject: [PATCH 11/23] =?UTF-8?q?Phase=202=20implementation=20is=20complet?= =?UTF-8?q?e=20=E2=80=94=20all=20subticket=20#2=20acceptance=20criteria=20?= =?UTF-8?q?are=20met:?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ✅ Parallel _create_hashtable lands behind the existing multiprocessing=True opt-in - ✅ Serial and parallel results match for large lists of dicts, lists of lists, sets, repeated items, nested mixed structures - ✅ Both report_repetition=False and report_repetition=True covered - ✅ Result order matches serial output (verified via 10× repeat-comparison) - ✅ Pickling fallback (unpickleable hasher) tested end-to-end - ✅ Full suite green (1160 passed, 10 skipped); pyright clean --- deepdiff/_multiprocessing.py | 86 ++++++++++++++++++++++++ deepdiff/diff.py | 67 ++++++++++++++++++- docs/multi_processing.md | 31 ++++++--- tests/test_multiprocessing.py | 122 ++++++++++++++++++++++++++++++++++ 4 files changed, 297 insertions(+), 9 deletions(-) diff --git a/deepdiff/_multiprocessing.py b/deepdiff/_multiprocessing.py index a011f781..90f490de 100644 --- a/deepdiff/_multiprocessing.py +++ b/deepdiff/_multiprocessing.py @@ -218,3 +218,89 @@ def compute_distances_parallel( for i, job in enumerate(jobs): out[(job[0], job[1])] = results_by_index[i] return out + + +def _hash_worker(job: Tuple[int, Any, str, Dict[str, Any]]) -> Tuple[int, Optional[str]]: + """Hash a single iterable item in a worker process. + + ``job`` layout: ``(job_index, item, parent_path, deephash_parameters)``. + The worker constructs a fresh ``DeepHash`` (no shared parent state) and + looks up the resulting top-level hash for ``item``. Returns + ``(job_index, item_hash)`` where ``item_hash`` is None if the item could + not be processed — the parent treats that exactly like the serial path's + ``KeyError`` / ``unprocessed`` skip. + + UnicodeDecodeError and NotImplementedError propagate as in the serial + path; other exceptions surface in the parent through ``future.result()``. + """ + # Imported here to dodge spawn/import-cycle surprises. + from deepdiff.deephash import DeepHash + from deepdiff.helper import unprocessed + + job_index, item, parent_path, parameters = job + deep_hash = DeepHash( + item, + hashes=None, + parent=parent_path, + apply_hash=True, + **parameters, + ) + try: + item_hash = deep_hash[item] + except KeyError: + return job_index, None + if item_hash is unprocessed: + return job_index, None + return job_index, item_hash + + +def compute_hashes_parallel( + jobs: List[Tuple[Any, str]], + deephash_parameters: Dict[str, Any], + config: MPConfig, +) -> Optional[List[Optional[str]]]: + """Run ``_hash_worker`` over ``jobs`` and return per-item hashes. + + ``jobs`` is a list of ``(item, parent_path)`` tuples in the exact order + the serial enumerate-loop visits them. Returns a list aligned to that + order, with ``None`` for items the worker could not hash. Returns + ``None`` when the section is unsafe to parallelize (unpickleable + parameters/items, worker import error). On ``None`` the caller MUST fall + back to the serial path. + + Workers may finish out of order; results are collected by their original + index so callers see the same output regardless of completion order. + Note: child object hashes computed inside each worker are NOT merged + back into the parent's ``self.hashes`` — id-based keys for unhashable + sub-objects would not match across process boundaries. Parent code that + relies on the iterable-level hash being present must continue to compute + it serially after the per-item parallel pass. + """ + if not jobs: + return [] + + if not is_pickleable(deephash_parameters): + return None + # Sample-pickle the first job; cheap shield against the common + # "lambda in custom_operators" or unpickleable item failure. + if not is_pickleable(jobs[0]): + return None + + from concurrent.futures import ProcessPoolExecutor, as_completed + + payloads = [ + (i, item, parent_path, deephash_parameters) + for i, (item, parent_path) in enumerate(jobs) + ] + + results_by_index: Dict[int, Optional[str]] = {} + try: + with ProcessPoolExecutor(max_workers=config.workers) as executor: + futures = [executor.submit(_hash_worker, payload) for payload in payloads] + for future in as_completed(futures): + idx, item_hash = future.result() + results_by_index[idx] = item_hash + except (pickle.PicklingError, AttributeError, TypeError): + return None + + return [results_by_index[i] for i in range(len(jobs))] diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 3dcb633f..4b64d16b 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -45,6 +45,7 @@ from deepdiff.colored_view import ColoredView from deepdiff._multiprocessing import ( MPConfig, normalize_mp_config, compute_distances_parallel, + compute_hashes_parallel, ) if TYPE_CHECKING: @@ -1147,14 +1148,75 @@ def _add_hash(self, hashes, item_hash, item, i): else: hashes[item_hash] = IndexedHash(indexes=[i], item=item) + def _maybe_compute_hashes_parallel(self, level, obj): + """Optionally hash iterable items in worker processes. + + Returns a list of per-index ``item_hash`` values (or ``None`` for + items the worker could not process), aligned to ``enumerate(obj)`` + order. Returns ``None`` when the section ran serially (no + ``_mp_config``, below threshold, generator without ``__len__``, + unsafe inputs). + + Iteration order is captured here once via ``list(obj)`` so the + parent loop and the worker job list see the same items even for + order-sensitive iterables like sets. + """ + mp_config = getattr(self, '_mp_config', None) + if mp_config is None or not mp_config.enabled: + return None, None + try: + n = len(obj) + except TypeError: + # Generators / unsized iterables: serial fallback. Materializing + # would change semantics (single-pass consumption). + return None, None + if not mp_config.should_parallelize(n): + return None, None + + items = list(obj) + parent_base = level.path() + jobs = [ + (item, "{}[{}]".format(parent_base, i)) + for i, item in enumerate(items) + ] + hashes = compute_hashes_parallel( + jobs=jobs, + deephash_parameters=self.deephash_parameters, + config=mp_config, + ) + if hashes is None: + return None, None + return hashes, items + def _create_hashtable(self, level, t): """Create hashtable of {item_hash: (indexes, item)}""" obj = getattr(level, t) + # Optionally precompute item hashes in worker processes. Workers + # operate on serial-order job indices and the parent merges back + # in that same order, so output is independent of worker + # completion order. ``items`` is the materialized iterable when + # parallel ran (set/dict iteration is deterministic per run but we + # need a single pass we can re-walk here). + parallel_hashes, materialized_items = self._maybe_compute_hashes_parallel(level, obj) + iterator = enumerate(materialized_items) if materialized_items is not None else enumerate(obj) + local_hashes = dict_() - for (i, item) in enumerate(obj): + for (i, item) in iterator: try: parent = "{}[{}]".format(level.path(), i) + if parallel_hashes is not None: + item_hash = parallel_hashes[i] + if item_hash is None: + # Worker could not process this item (KeyError or + # unprocessed marker). Mirror the serial pass: + # log once, skip. + self.log_err("Item %s was not processed while hashing " + "thus not counting this object." % + level.path()) + continue + self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i) + continue # Note: in the DeepDiff we only calculate the hash of items when we have to. # So self.hashes does not include hashes of all objects in t1 and t2. # It only includes the ones needed when comparing iterables. @@ -1190,6 +1252,9 @@ def _create_hashtable(self, level, t): self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i) # Also we hash the iterables themselves too so that we can later create cache keys from those hashes. + # When the per-item loop ran in parallel, child hashes were not merged into ``self.hashes`` + # (cross-process id keys would not match). The iterable-level pass therefore re-hashes + # children serially; this is intentional — correctness over cache reuse for now. DeepHash( obj, hashes=self.hashes, diff --git a/docs/multi_processing.md b/docs/multi_processing.md index 7a994c82..1c06481b 100644 --- a/docs/multi_processing.md +++ b/docs/multi_processing.md @@ -3,8 +3,16 @@ ## Implementation Status **Phase 1 — landed (2026-04-27).** Subtickets #1 (config + safety fallback) and #3 -(parallel rough-distance loop) are implemented. Subtickets #2, #4, #5, #6 (extended -matrix), and #7 are still open. +(parallel rough-distance loop) are implemented. + +**Phase 2 — landed (2026-04-27).** Subticket #2 (parallel `_create_hashtable`) is +implemented. Workers compute per-item DeepHash strings; the parent merges them +back in stable enumerate-order. The iterable-level hash still runs serially in +the parent so cross-process id-keyed sub-object cache entries do not need to +travel back. Unsafe inputs (unpickleable hasher / params, generators without +`__len__`) fall back to serial. + +Subtickets #4, #5, #6 (extended matrix), and #7 are still open. What works today: @@ -34,20 +42,27 @@ What works today: Code locations: - `deepdiff/_multiprocessing.py` — `MPConfig`, `normalize_mp_config`, - `is_pickleable`, `_distance_worker` (module-level for `spawn`), - `compute_distances_parallel`. + `is_pickleable`, `_distance_worker` and `_hash_worker` (module-level for + `spawn`), `compute_distances_parallel`, `compute_hashes_parallel`. - `deepdiff/diff.py::DeepDiff.__init__` — three new parameters, normalized into `self._mp_config`, propagated through `_parameters`. - `deepdiff/diff.py::DeepDiff._maybe_compute_pair_distances_parallel` — the - per-call decision/dispatch helper. + per-call decision/dispatch helper for the distance loop. +- `deepdiff/diff.py::DeepDiff._maybe_compute_hashes_parallel` — the per-call + decision/dispatch helper for `_create_hashtable`. - `deepdiff/diff.py::DeepDiff._get_most_in_common_pairs_in_iterables` — gains one extra lookup before `_get_rough_distance_of_hashed_objs`. +- `deepdiff/diff.py::DeepDiff._create_hashtable` — gains a parallel + pre-pass that fills per-index item hashes; serial body unchanged for + the fallback path. Not yet implemented (deferred, intentional): -- **Subticket #2** — parallel `_create_hashtable` / `_prep_iterable` / - `_prep_dict`. The doc itself flags cycle-handling and identity-after-pickle - risks; these need their own test pass. +- **Subticket #2 (partial)** — `_prep_iterable` / `_prep_dict` inner-recursion + parallelism is still serial. `_create_hashtable` parallelization landed in + Phase 2; the deeper recursion levels remain serial for now because their + identity-after-pickle and cross-call cache reuse risks are not yet covered + by tests. - **Subticket #4** — subtree diff parallelism after pairing. `DiffLevel` pickling and custom-operator interaction require dedicated work. - **Subticket #5** — multiprocessing-aware stats semantics. Parent-only stats diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py index 648949b0..8471f955 100644 --- a/tests/test_multiprocessing.py +++ b/tests/test_multiprocessing.py @@ -19,6 +19,7 @@ normalize_mp_config, is_pickleable, compute_distances_parallel, + compute_hashes_parallel, ) @@ -203,3 +204,124 @@ def test_worker_subdiff_runs_serial(self): def _simple_hasher(obj, *args, **kwargs): import hashlib return hashlib.sha1(repr(obj).encode("utf-8")).hexdigest() + + +class TestHashtableParallel: + """Phase 2: ``_create_hashtable`` per-item DeepHash parallelism. + + These exercise the parallel hashing path with ``multiprocessing_threshold=0`` + so even small fixtures hit the worker pool. Result must match the equivalent + serial run, repeatedly, regardless of worker completion order. + """ + + def _assert_determinism(self, t1, t2, **kwargs): + kwargs.setdefault("ignore_order", True) + kwargs.setdefault("cutoff_intersection_for_pairs", 1) + serial = DeepDiff(t1, t2, **kwargs) + for _ in range(REPEATS): + parallel = _run_parallel(t1, t2, **kwargs) + assert parallel == serial, ( + "parallel != serial after run; difference: %r vs %r" + % (parallel, serial) + ) + + def test_large_list_of_dicts(self): + # Bigger N so spawn cost is not pathological; results must still match. + t1 = [{"i": i, "name": "item-%d" % i, "tags": [i, i + 1]} for i in range(40)] + t2 = [{"i": i, "name": "item-%d" % i, "tags": [i, i + 1]} for i in range(40)] + # Add a single change deep in the middle + t2[17]["name"] = "changed" + self._assert_determinism(t1, t2) + + def test_list_of_lists(self): + t1 = [[i, i + 1, i + 2] for i in range(15)] + t2 = [[i, i + 1, i + 2] for i in range(15)] + t2[5] = [99, 100, 101] + self._assert_determinism(t1, t2) + + def test_set_of_hashables(self): + t1 = set(range(30)) + t2 = set(range(30)) + t2.discard(7) + t2.add(99) + self._assert_determinism(t1, t2) + + def test_repeated_items_report_repetition_false(self): + # Repeated items: cache reuse path. Parent merges per-index hashes + # in serial order so duplicates collapse the same way. + t1 = [{"k": i % 3} for i in range(20)] + t2 = [{"k": (i + 1) % 3} for i in range(20)] + self._assert_determinism(t1, t2, report_repetition=False) + + def test_repeated_items_report_repetition_true(self): + t1 = [{"k": i % 3} for i in range(20)] + t2 = [{"k": (i + 1) % 3} for i in range(20)] + self._assert_determinism(t1, t2, report_repetition=True) + + def test_nested_mixed_structures(self): + t1 = [ + {"id": i, "data": {"vals": [j for j in range(i)], "meta": {"k": i}}} + for i in range(12) + ] + t2 = [ + {"id": i, "data": {"vals": [j for j in range(i)], "meta": {"k": i + (1 if i == 6 else 0)}}} + for i in range(12) + ] + self._assert_determinism(t1, t2) + + def test_below_threshold_uses_serial(self): + # Default threshold is 64; small inputs without the override stay serial. + t1 = [1, 2, 3] + t2 = [3, 2, 1] + # No multiprocessing_threshold=0 override here on purpose. + out = DeepDiff(t1, t2, ignore_order=True, multiprocessing=True) + assert out == DeepDiff(t1, t2, ignore_order=True) + + def test_unpickleable_hasher_falls_back(self): + # A lambda hasher is not pickleable. Must not crash; result must match + # the serial run. + bad_hasher = lambda obj: _simple_hasher(obj) # noqa: E731 + t1 = [{"x": i} for i in range(10)] + t2 = [{"x": i + (1 if i == 3 else 0)} for i in range(10)] + serial = DeepDiff(t1, t2, ignore_order=True, hasher=bad_hasher) + parallel = _run_parallel(t1, t2, ignore_order=True, hasher=bad_hasher) + assert parallel == serial + + +class TestHashesParallelHelper: + """Direct unit tests for ``compute_hashes_parallel``.""" + + def test_empty_jobs_returns_empty_list(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + assert compute_hashes_parallel(jobs=[], deephash_parameters={}, config=cfg) == [] + + def test_unpickleable_params_returns_none(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + # A lambda inside the params dict cannot be pickled under spawn. + params = {"hasher": lambda obj: "x"} + result = compute_hashes_parallel( + jobs=[(1, "root[0]"), (2, "root[1]")], + deephash_parameters=params, + config=cfg, + ) + assert result is None + + def test_returns_one_hash_per_item_in_index_order(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + jobs = [(i, "root[%d]" % i) for i in range(5)] + # Minimal deephash params — keep keys aligned with what DeepDiff + # would normally pass. An empty dict is sufficient for primitives. + result = compute_hashes_parallel( + jobs=jobs, + deephash_parameters={}, + config=cfg, + ) + assert result is not None + assert len(result) == 5 + # All entries are non-None for primitives. + assert all(h is not None for h in result) + # Same int hashed twice yields identical hashes. + again = compute_hashes_parallel( + jobs=jobs, deephash_parameters={}, config=cfg + ) + assert again == result From 794aa9d1e42b3e8d51a905b4179d9dd6bfaefe38 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 15:08:42 -0700 Subject: [PATCH 12/23] =?UTF-8?q?-=20deepdiff/=5Fmultiprocessing.py:=20=5F?= =?UTF-8?q?subtree=5Fdiff=5Fworker=20+=20compute=5Fsubtree=5Fdiffs=5Fparal?= =?UTF-8?q?lel=20=E2=80=94=20workers=20=20=20compute=20fresh=20DeepDiff=20?= =?UTF-8?q?per=20pair=20and=20ship=20back=20[(report=5Ftype,=20leaf),=20..?= =?UTF-8?q?.].=20=20=20-=20deepdiff/diff.py::=5Fdiff=5Fiterable=5Fwith=5Fd?= =?UTF-8?q?eephash:=20paired=20=5Fdiff(change=5Flevel,=20...)=20calls=20in?= =?UTF-8?q?=20both=20=20=20report=5Frepetition=20branches=20are=20deferred?= =?UTF-8?q?=20into=20a=20queue=20and=20dispatched=20at=20the=20end=20via?= =?UTF-8?q?=20=20=20=5Fdispatch=5Fsubtree=5Fjobs.=20Inline=20serial=20beha?= =?UTF-8?q?vior=20unchanged=20when=20mp=20is=20off.=20=20=20-=20deepdiff/d?= =?UTF-8?q?iff.py:=20three=20new=20helpers=20=E2=80=94=20=5Fsubtree=5Fpara?= =?UTF-8?q?llel=5Fsafe=20(gates=20against=20custom=5Foperators=20/=20=20?= =?UTF-8?q?=20*=5Fobj=5Fcallback*=20/=20ignore=5Forder=5Ffunc),=20=5Frebas?= =?UTF-8?q?e=5Fsubtree=5Fleaf=20(splices=20the=20worker's=20leaf=20chain?= =?UTF-8?q?=20onto=20=20=20a=20fresh=20copy=20of=20change=5Flevel=20and=20?= =?UTF-8?q?clears=20path=20caches),=20=5Fdispatch=5Fsubtree=5Fjobs=20=20?= =?UTF-8?q?=20(parallel-or-serial-in-job-order,=20plus=20parent-side=20=5F?= =?UTF-8?q?skip=5Fthis=20re-filter=20for=20exclude=5Fpaths).=20=20=20-=20d?= =?UTF-8?q?eepdiff/helper.py:=20NotPresent=20/=20Unprocessed=20/=20Skipped?= =?UTF-8?q?=20/=20NotHashed=20got=20=5F=5Freduce=5F=5F=20so=20the=20=20=20?= =?UTF-8?q?singleton=20sentinels=20survive=20pickle=20round-trips.=20Witho?= =?UTF-8?q?ut=20this,=20change.t2=20is=20not=20notpresent=20(used=20=20=20?= =?UTF-8?q?by=20TextResult.=5Ffrom=5Ftree=5Fdefault)=20silently=20flips=20?= =?UTF-8?q?for=20any=20DiffLevel=20that=20travels=20through=20a=20worker.?= =?UTF-8?q?=20=20=20-=209=20new=20tests=20in=20tests/test=5Fmultiprocessin?= =?UTF-8?q?g.py=20covering=20paired-subtree=20determinism,=20multiple=20?= =?UTF-8?q?=20=20changes=20per=20pair,=20dict=20add/remove,=20type=20chang?= =?UTF-8?q?es,=20report=5Frepetition=3DTrue,=20exclude=5Fpaths=20re-filter?= =?UTF-8?q?,=20=20=20=20custom=5Foperators/exclude=5Fobj=5Fcallback=20fall?= =?UTF-8?q?back,=20and=20direct=20unit=20tests.=20=20=20-=20docs/multi=5Fp?= =?UTF-8?q?rocessing.md:=20updated=20Implementation=20Status,=20Code=20loc?= =?UTF-8?q?ations,=20and=20partial=20Subticket=20=20=20#4=20deferred=20ite?= =?UTF-8?q?ms=20(=5Fdiff=5Fdict=20shared=20keys,=20ordered-pair=20path,=20?= =?UTF-8?q?=5Fiterable=5Fopcodes=20propagation).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- deepdiff/_multiprocessing.py | 90 +++++++++++++++++ deepdiff/diff.py | 177 ++++++++++++++++++++++++++++++++-- deepdiff/helper.py | 35 ++++++- docs/multi_processing.md | 60 ++++++++++-- tests/test_multiprocessing.py | 148 ++++++++++++++++++++++++++++ 5 files changed, 490 insertions(+), 20 deletions(-) diff --git a/deepdiff/_multiprocessing.py b/deepdiff/_multiprocessing.py index 90f490de..aa4f4b04 100644 --- a/deepdiff/_multiprocessing.py +++ b/deepdiff/_multiprocessing.py @@ -254,6 +254,96 @@ def _hash_worker(job: Tuple[int, Any, str, Dict[str, Any]]) -> Tuple[int, Option return job_index, item_hash +def _subtree_diff_worker( + job: Tuple[int, Dict[str, Any], Any, Any, Any], +) -> Tuple[int, List[Tuple[str, Any]]]: + """Run one paired-item subtree diff in a worker process. + + ``job`` layout: ``(job_index, sanitized_parameters, t1, t2, _original_type)``. + The worker constructs a fresh root ``DeepDiff`` (no shared parent state), + requests the TREE_VIEW so ``self.tree`` is populated and walks it once to + flatten the leaves into ``[(report_type, leaf_difflevel), ...]``. + + The parent rebases each leaf's up-chain onto its own ``change_level`` so + paths come out as if the diff had run inline. Returning bare DiffLevel + objects is acceptable here because we already proved they pickle and + re-attach cleanly (see tests/test_multiprocessing.py). + """ + # Imported here to keep module import cheap and to dodge any circular + # import surprises under spawn. + from deepdiff.diff import DeepDiff + from deepdiff.helper import TREE_VIEW + + job_index, parameters, t1, t2, _original_type = job + diff = DeepDiff( + t1, t2, + _parameters=parameters, + view=TREE_VIEW, + _original_type=_original_type, + # Keep cache+tree alive past __init__ so the post-walk below sees the + # populated tree (cache_purge_level mirrors what _distance_worker uses). + cache_purge_level=0, + ) + entries: List[Tuple[str, Any]] = [] + for report_type, levels in diff.tree.items(): + if report_type == 'deep_distance': + continue + for leaf in levels: + entries.append((report_type, leaf)) + return job_index, entries + + +def compute_subtree_diffs_parallel( + jobs: List[Tuple[Any, Any]], + parameters: Dict[str, Any], + original_type: Any, + config: MPConfig, +) -> Optional[List[List[Tuple[str, Any]]]]: + """Run ``_subtree_diff_worker`` over ``jobs`` and return per-job entries. + + ``jobs`` is a list of ``(t1_item, t2_item)`` tuples in the exact order + the serial paired-iteration code visits them. Returns a list aligned to + that order; each element is ``[(report_type, leaf_difflevel), ...]`` + suitable for the parent to rebase and merge into its tree. Returns + ``None`` when the section is unsafe to parallelize (unpickleable + parameters/items, worker import error). On ``None`` the caller MUST run + the same jobs serially so correctness is preserved. + + Workers may finish out of order; results are collected by their original + job index so the merge order is identical regardless of completion order. + """ + if not jobs: + return [] + + sanitized_params = _sanitize_parameters_for_worker(parameters) + + if not is_pickleable(sanitized_params): + return None + # Sample-pickle the first job; cheap shield against the common + # "lambda in custom_operators" / unpickleable item failure. + if not is_pickleable(jobs[0]): + return None + + from concurrent.futures import ProcessPoolExecutor, as_completed + + payloads = [ + (i, sanitized_params, t1_item, t2_item, original_type) + for i, (t1_item, t2_item) in enumerate(jobs) + ] + + results_by_index: Dict[int, List[Tuple[str, Any]]] = {} + try: + with ProcessPoolExecutor(max_workers=config.workers) as executor: + futures = [executor.submit(_subtree_diff_worker, payload) for payload in payloads] + for future in as_completed(futures): + idx, entries = future.result() + results_by_index[idx] = entries + except (pickle.PicklingError, AttributeError, TypeError): + return None + + return [results_by_index[i] for i in range(len(jobs))] + + def compute_hashes_parallel( jobs: List[Tuple[Any, str]], deephash_parameters: Dict[str, Any], diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 4b64d16b..f38681ba 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -37,15 +37,15 @@ DictRelationship, AttributeRelationship, REPORT_KEYS, SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, - FORCE_DEFAULT, + FORCE_DEFAULT, ChildRelationship, ) from deepdiff.deephash import DeepHash, combine_hashes_lists from deepdiff.base import Base from deepdiff.lfucache import LFUCache, DummyLFU from deepdiff.colored_view import ColoredView from deepdiff._multiprocessing import ( - MPConfig, normalize_mp_config, compute_distances_parallel, - compute_hashes_parallel, + normalize_mp_config, compute_distances_parallel, + compute_hashes_parallel, compute_subtree_diffs_parallel, ) if TYPE_CHECKING: @@ -1474,6 +1474,138 @@ def defaultdict_orderedset(): self._distance_cache.set(cache_key, value=pairs) return pairs.copy() + def _subtree_parallel_safe(self): + """Return True if paired-subtree diffs in this run can be sent to workers. + + Excluded features are ones whose semantics depend on the *parent's* + absolute path or on parent-process state, neither of which is visible + in a worker: + + - ``custom_operators`` (per docs/multi_processing.md) can call + ``custom_report_result`` and mutate the parent diff instance. + - ``exclude_obj_callback`` / ``include_obj_callback`` (and their + ``_strict`` variants) receive the level path; in a worker that path + is rooted at the subtree, not the original tree, so they would fire + on the wrong paths. + - ``ignore_order_func`` is also called with the level and would see + worker-local paths. + + Path-only filters (``exclude_paths`` / ``include_paths`` / + ``exclude_regex_paths``) are handled by re-applying ``_skip_this`` + after rebasing rather than disabling parallelism. + """ + if self.custom_operators: + return False + if self.exclude_obj_callback or self.exclude_obj_callback_strict: + return False + if self.include_obj_callback or self.include_obj_callback_strict: + return False + if self.ignore_order_func: + return False + return True + + def _rebase_subtree_leaf(self, leaf, change_level): + """Splice a worker-built leaf chain onto the parent's ``change_level``. + + The worker constructed ``leaf`` inside a fresh ``DeepDiff`` whose root + DiffLevel holds the paired items themselves; that root is irrelevant + once we're back in the parent. We replace it with a *fresh copy* of + ``change_level`` (so each leaf gets its own up-chain — DiffLevel.up + is shared by reference, and reusing one chain across leaves would + scramble paths). + + Returns the rebased leaf. Path caches along the chain are cleared so + ``leaf.path()`` recomputes against the new up-chain. + """ + # Walk up to find the worker root (up=None). + worker_root = leaf + while worker_root.up is not None: + worker_root = worker_root.up + + new_cl = change_level.copy() # fresh, independent chain; new_cl is bottom + + if worker_root is leaf: + # The worker reported at the very root of its diff (e.g. the two + # paired items differ at the top level — type_changes, + # values_changed). Transfer the report payload onto our fresh + # change_level copy. + new_cl.report_type = leaf.report_type + new_cl.additional = leaf.additional + cur = new_cl + while cur is not None: + cur._path = dict_() + cur = cur.up + return new_cl + + first_under_root = worker_root.down + # Splice: new_cl takes worker_root's place. Setting .down auto-sets + # the opposite .up link (see DiffLevel.__setattr__). + new_cl.down = first_under_root + if worker_root.t1_child_rel is not None: + new_cl.t1_child_rel = ChildRelationship.create( + klass=worker_root.t1_child_rel.__class__, + parent=new_cl.t1, child=first_under_root.t1, + param=worker_root.t1_child_rel.param, + ) + if worker_root.t2_child_rel is not None: + new_cl.t2_child_rel = ChildRelationship.create( + klass=worker_root.t2_child_rel.__class__, + parent=new_cl.t2, child=first_under_root.t2, + param=worker_root.t2_child_rel.param, + ) + # Clear path cache on the entire chain so path() recomputes against + # the new up-chain. + cur = leaf + while cur is not None: + cur._path = dict_() + cur = cur.up + return leaf + + def _dispatch_subtree_jobs(self, pending_jobs, _original_type, local_tree): + """Run deferred paired-subtree diffs (parallel when allowed, else serial). + + ``pending_jobs`` is the list of ``(change_level, t1_item, t2_item, + parents_ids_added)`` tuples in the exact order the inline serial code + would have visited them. Parallel results are merged in that same + order regardless of worker completion order, so output is identical + to the equivalent serial run. + """ + if not pending_jobs: + return + + mp_config = getattr(self, '_mp_config', None) + parallel_results = None + if (mp_config is not None and mp_config.enabled + and mp_config.should_parallelize(len(pending_jobs))): + jobs_payload = [(t1_item, t2_item) for (_, t1_item, t2_item, _) in pending_jobs] + parallel_results = compute_subtree_diffs_parallel( + jobs=jobs_payload, + parameters=self._parameters, + original_type=_original_type, + config=mp_config, + ) + + if parallel_results is None: + # Below threshold or unsafe inputs — run inline-equivalent serial. + # Walking pending_jobs in order matches how inline serial would + # have run them; the parent tree fills up the same way. + for change_level, _t1_item, _t2_item, parents_ids_added in pending_jobs: + self._diff(change_level, parents_ids_added, local_tree=local_tree) + return + + target_tree = self.tree if local_tree is None else local_tree + for (change_level, _t1_item, _t2_item, _parents_ids_added), entries in zip( + pending_jobs, parallel_results): + for report_type, leaf in entries: + rebased_leaf = self._rebase_subtree_leaf(leaf, change_level) + # Re-apply path-based filters in the parent — exclude_paths + # and friends were not applied correctly inside the worker + # because the worker's level paths are subtree-relative. + if self._skip_this(rebased_leaf): + continue + rebased_leaf.report_type = report_type + target_tree[report_type].add(rebased_leaf) + def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None, local_tree=None): """Diff of hashable or unhashable iterables. Only used when ignoring the order.""" @@ -1532,6 +1664,18 @@ def get_other_pair(hash_value, in_t1=True): other = hashtable[other] return other + # Phase 3: paired-subtree diffs may be deferred so they can run in a + # worker pool. ``pending_subtree_jobs is None`` keeps the inline + # serial path (and the existing ordering of inline ``_diff`` calls + # vs. ``_report_result`` calls) — so any feature that disables + # subtree parallelism degrades cleanly to today's behavior. + mp_config = getattr(self, '_mp_config', None) + use_mp = ( + mp_config is not None and mp_config.enabled + and self._subtree_parallel_safe() + ) + pending_subtree_jobs = [] if use_mp else None + if self.report_repetition: for hash_value in hashes_added: if self._count_diff() is StopIteration: @@ -1558,7 +1702,11 @@ def get_other_pair(hash_value, in_t1=True): self._report_result('iterable_item_added', change_level, local_tree=local_tree) else: parents_ids_added = add_to_frozen_set(parents_ids, item_id) - self._diff(change_level, parents_ids_added, local_tree=local_tree) + if pending_subtree_jobs is None: + self._diff(change_level, parents_ids_added, local_tree=local_tree) + else: + pending_subtree_jobs.append( + (change_level, other.item, t2_hashtable[hash_value].item, parents_ids_added)) for hash_value in hashes_removed: if self._count_diff() is StopIteration: return # pragma: no cover. This is already covered for addition. @@ -1586,7 +1734,11 @@ def get_other_pair(hash_value, in_t1=True): # getting resolved above in the hashes_added calcs. However I am leaving these 2 lines # in case things change in future. parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. - self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. + if pending_subtree_jobs is None: # pragma: no cover. + self._diff(change_level, parents_ids_added, local_tree=local_tree) + else: # pragma: no cover. + pending_subtree_jobs.append( + (change_level, t1_hashtable[hash_value].item, other.item, parents_ids_added)) items_intersect = t2_hashes.intersection(t1_hashes) @@ -1630,7 +1782,11 @@ def get_other_pair(hash_value, in_t1=True): self._report_result('iterable_item_added', change_level, local_tree=local_tree) else: parents_ids_added = add_to_frozen_set(parents_ids, item_id) - self._diff(change_level, parents_ids_added, local_tree=local_tree) + if pending_subtree_jobs is None: + self._diff(change_level, parents_ids_added, local_tree=local_tree) + else: + pending_subtree_jobs.append( + (change_level, other.item, t2_hashtable[hash_value].item, parents_ids_added)) for hash_value in hashes_removed: if self._count_diff() is StopIteration: @@ -1652,7 +1808,14 @@ def get_other_pair(hash_value, in_t1=True): # Just like the case when report_repetition = True, these lines never run currently. # However they will stay here in case things change in future. parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. - self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. + if pending_subtree_jobs is None: # pragma: no cover. + self._diff(change_level, parents_ids_added, local_tree=local_tree) + else: # pragma: no cover. + pending_subtree_jobs.append( + (change_level, t1_hashtable[hash_value].item, other.item, parents_ids_added)) + + if pending_subtree_jobs: + self._dispatch_subtree_jobs(pending_subtree_jobs, _original_type, local_tree) def _diff_booleans(self, level, local_tree=None): if level.t1 != level.t2: diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 3386f020..679ebd3f 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -239,16 +239,42 @@ def __repr__(self) -> str: __str__ = __repr__ +# Sentinels below carry meaning by *identity*, not equality — e.g. +# ``change.t2 is not notpresent`` in TextResult selects t2-vs-t1 reporting. +# Pickle, however, makes a fresh instance on unpickle, which would silently +# break those identity checks across process boundaries (multiprocessing). +# ``__reduce__`` rewires unpickle to return the parent process's singleton, +# preserving ``is`` semantics under spawn-based multiprocessing. + +def _resolve_skipped(): + return skipped + + +def _resolve_unprocessed(): + return unprocessed + + +def _resolve_not_hashed(): + return not_hashed + + +def _resolve_notpresent(): + return notpresent + + class Skipped(OtherTypes): - pass + def __reduce__(self): + return (_resolve_skipped, ()) class Unprocessed(OtherTypes): - pass + def __reduce__(self): + return (_resolve_unprocessed, ()) class NotHashed(OtherTypes): - pass + def __reduce__(self): + return (_resolve_not_hashed, ()) class NotPresent: # pragma: no cover @@ -258,6 +284,9 @@ class NotPresent: # pragma: no cover We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D """ + def __reduce__(self): + return (_resolve_notpresent, ()) + def __repr__(self) -> str: return 'not present' # pragma: no cover diff --git a/docs/multi_processing.md b/docs/multi_processing.md index 1c06481b..78442b09 100644 --- a/docs/multi_processing.md +++ b/docs/multi_processing.md @@ -12,7 +12,28 @@ the parent so cross-process id-keyed sub-object cache entries do not need to travel back. Unsafe inputs (unpickleable hasher / params, generators without `__len__`) fall back to serial. -Subtickets #4, #5, #6 (extended matrix), and #7 are still open. +**Phase 3 — landed (2026-04-27).** Subticket #4 (parallel paired-subtree diffs) +is implemented for the `ignore_order=True` hot path. After +`_get_most_in_common_pairs_in_iterables` decides pairs, each paired +`_diff(change_level, ...)` call inside `_diff_iterable_with_deephash` is +deferred into a job queue. When the queue is above threshold and the run is +"subtree-safe" (no `custom_operators`, no `*_obj_callback*`, no +`ignore_order_func`), a `ProcessPoolExecutor` runs them in workers; otherwise +the deferred jobs run inline-equivalent in the parent. Each worker returns the +leaves of its subtree-local `TreeResult`; the parent splices each leaf's +up-chain onto a fresh copy of the original `change_level` so paths come out +identical to the inline serial run, then re-applies `_skip_this` to honor +`exclude_paths` / `include_paths` / `exclude_regex_paths` (which the worker +could not enforce because its level paths are subtree-relative). + +A small but load-bearing fix landed alongside this: `NotPresent`, +`Unprocessed`, `Skipped`, and `NotHashed` in `deepdiff/helper.py` now define +`__reduce__` so unpickling resolves back to the parent process's singleton. +Without this, identity checks like `change.t2 is not notpresent` (used by +`TextResult._from_tree_default` to decide t1-vs-t2 reporting) break on any +DiffLevel that travels through `pickle`, which is exactly the Phase 3 path. + +Subtickets #5, #6 (extended matrix), and #7 are still open. What works today: @@ -33,17 +54,21 @@ What works today: - Picklability of the parameters dict, the iterable compare func, and a representative job is checked up front. Any failure causes a clean serial fallback rather than an opaque worker crash. -- 23 determinism / fallback tests in `tests/test_multiprocessing.py` (10x - serial-vs-parallel comparison, tied distances, repeated items in both - `report_repetition` modes, sets, exclude_paths, ignore_string_case, custom - module-level hasher, lambda compare-func fallback, recursive-no-nesting). - All 1149 existing tests still pass. +- Phase 3 adds 9 more determinism / fallback tests in + `tests/test_multiprocessing.py` (paired-subtree determinism across nested + dicts, multiple changes per pair, dict_item add/remove, type changes, + `report_repetition=True`, `exclude_paths` re-filter, `custom_operators` + fallback, `exclude_obj_callback` fallback, plus direct unit tests for + `compute_subtree_diffs_parallel`). All other test files still pass + unchanged. Code locations: - `deepdiff/_multiprocessing.py` — `MPConfig`, `normalize_mp_config`, - `is_pickleable`, `_distance_worker` and `_hash_worker` (module-level for - `spawn`), `compute_distances_parallel`, `compute_hashes_parallel`. + `is_pickleable`, `_distance_worker` / `_hash_worker` / + `_subtree_diff_worker` (module-level for `spawn`), + `compute_distances_parallel`, `compute_hashes_parallel`, + `compute_subtree_diffs_parallel`. - `deepdiff/diff.py::DeepDiff.__init__` — three new parameters, normalized into `self._mp_config`, propagated through `_parameters`. - `deepdiff/diff.py::DeepDiff._maybe_compute_pair_distances_parallel` — the @@ -55,6 +80,15 @@ Code locations: - `deepdiff/diff.py::DeepDiff._create_hashtable` — gains a parallel pre-pass that fills per-index item hashes; serial body unchanged for the fallback path. +- `deepdiff/diff.py::DeepDiff._diff_iterable_with_deephash` — paired + `_diff` calls are deferred into a job queue; the queue is dispatched at + the end of the function via `_dispatch_subtree_jobs`. +- `deepdiff/diff.py::DeepDiff._subtree_parallel_safe`, + `_rebase_subtree_leaf`, `_dispatch_subtree_jobs` — Phase 3 helpers that + gate, splice, and merge subtree diff results. +- `deepdiff/helper.py` — `NotPresent` / `Unprocessed` / `Skipped` / + `NotHashed` gained `__reduce__` so the singleton sentinels survive + `spawn`-based pickle round-trips. Not yet implemented (deferred, intentional): @@ -63,8 +97,14 @@ Not yet implemented (deferred, intentional): Phase 2; the deeper recursion levels remain serial for now because their identity-after-pickle and cross-call cache reuse risks are not yet covered by tests. -- **Subticket #4** — subtree diff parallelism after pairing. `DiffLevel` - pickling and custom-operator interaction require dedicated work. +- **Subticket #4 (partial)** — `_diff_dict` shared-key child diffs and the + ordered `_diff_by_forming_pairs_and_comparing_one_by_one` path are still + serial. The Phase 3 splice helper assumes paths come from a single + `branch_deeper` call (the paired `change_level`); extending it to dict + shared keys and sequence pairs is straightforward but adds surface area + the current tests don't cover. Worker-side `_iterable_opcodes` are also + not propagated, so `DELTA_VIEW` of a paired subtree containing ordered + iterables is not yet covered by Phase 3. - **Subticket #5** — multiprocessing-aware stats semantics. Parent-only stats remain meaningful in Phase 1, but no aggregation across workers. - **Subticket #6** — extended test matrix (numpy, pydantic, namedtuple, group_by, diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py index 8471f955..7d5a4715 100644 --- a/tests/test_multiprocessing.py +++ b/tests/test_multiprocessing.py @@ -20,6 +20,7 @@ is_pickleable, compute_distances_parallel, compute_hashes_parallel, + compute_subtree_diffs_parallel, ) @@ -325,3 +326,150 @@ def test_returns_one_hash_per_item_in_index_order(self): jobs=jobs, deephash_parameters={}, config=cfg ) assert again == result + + +# Module-level callables/classes so they pickle cleanly under spawn. +def _drop_secret_callback(obj, path): + # Mirrors a real-world exclude_obj_callback that inspects the path. + return "secret" in path + + +from deepdiff.operator import BaseOperator # noqa: E402 + + +class _NoopOperator(BaseOperator): + # No types/regex_paths configured, so match() never fires — but its mere + # presence in custom_operators must force the parent to keep subtree + # diffs serial (the worker would not be able to run custom_report_result + # back into the parent's tree). + def __init__(self): + super().__init__() + + def give_up_diffing(self, level, diff_instance): + return False + + def normalize_value_for_hashing(self, parent, obj): + # Required for ignore_order=True compatibility when this operator + # ships through DeepHash. We don't normalize anything — pass through. + return obj + + +class TestSubtreeParallel: + """Phase 3: paired-subtree diffs run in worker processes after pairing. + + Workers compute a fresh DeepDiff per pair and return tree leaves; the + parent rebases each leaf's up-chain onto its own ``change_level``. The + public output must equal the equivalent serial run regardless of worker + completion order, and unsafe inputs (custom_operators, path-aware + callbacks) must fall back to inline serial. + """ + + def _assert_determinism(self, t1, t2, **kwargs): + kwargs.setdefault("ignore_order", True) + kwargs.setdefault("cutoff_intersection_for_pairs", 1) + serial = DeepDiff(t1, t2, **kwargs) + for _ in range(REPEATS): + parallel = _run_parallel(t1, t2, **kwargs) + assert parallel == serial, ( + "parallel != serial after run; difference: %r vs %r" + % (parallel, serial) + ) + + def test_paired_subtree_changes_match_serial(self): + # Each pair has exactly one nested change. Rebased paths must match + # the inline serial paths character-for-character. + t1 = [{"id": i, "data": {"x": i, "y": [i, i + 1]}} for i in range(20)] + t2 = [{"id": i, "data": {"x": i, "y": [i, i + 2]}} for i in range(20)] + self._assert_determinism(t1, t2) + + def test_paired_subtree_multiple_changes_per_pair(self): + # Multiple values_changed entries per pair — verifies that each leaf + # in the worker's tree gets an independent rebased up-chain. + t1 = [{"a": i, "b": i * 2, "c": i * 3, "d": [i, i, i]} for i in range(15)] + t2 = [{"a": i + 100, "b": i * 2, "c": i * 3 + 1, "d": [i, i, i + 1]} for i in range(15)] + self._assert_determinism(t1, t2) + + def test_paired_subtree_with_added_and_removed_keys(self): + # Non-values_changed report types in the subtree: + # dictionary_item_added / dictionary_item_removed. + t1 = [{"id": i, "old_only": i} for i in range(12)] + t2 = [{"id": i, "new_only": i} for i in range(12)] + self._assert_determinism(t1, t2) + + def test_paired_subtree_with_type_changes(self): + t1 = [{"id": i, "v": i} for i in range(10)] + t2 = [{"id": i, "v": str(i)} for i in range(10)] + self._assert_determinism(t1, t2) + + def test_paired_subtree_report_repetition_true(self): + # Exercises the report_repetition=True branch where the inner _diff + # is also deferred to workers. + t1 = [{"k": i % 3, "extra": [i]} for i in range(20)] + t2 = [{"k": (i + 1) % 3, "extra": [i + 1]} for i in range(20)] + self._assert_determinism(t1, t2, report_repetition=True) + + def test_exclude_paths_re_applied_in_parent(self): + # Worker sees subtree-relative paths, so exclude_paths cannot be + # enforced inside the worker; the parent re-filters via _skip_this + # after rebasing. This test would fail if that re-filter was missing. + t1 = [{"id": i, "secret": i * 100, "v": i} for i in range(15)] + t2 = [{"id": i, "secret": i * 999, "v": i + (1 if i == 7 else 0)} for i in range(15)] + self._assert_determinism( + t1, t2, exclude_paths=["root[0]['secret']"], + ) + + +class TestSubtreeFallback: + """Subtree parallelism must degrade cleanly when features can't ship to workers.""" + + def test_custom_operators_force_serial(self): + # custom_operators can call custom_report_result and mutate the + # parent diff — they must not run in workers. Even with mp turned on + # the result must still match the serial run. + op = _NoopOperator() + t1 = [{"id": i, "v": i} for i in range(20)] + t2 = [{"id": i, "v": i + (1 if i == 5 else 0)} for i in range(20)] + serial = DeepDiff(t1, t2, ignore_order=True, custom_operators=[op]) + parallel = _run_parallel( + t1, t2, ignore_order=True, custom_operators=[op], + ) + assert parallel == serial + + def test_exclude_obj_callback_forces_serial(self): + # exclude_obj_callback receives the level path; in a worker the path + # is subtree-relative, so the callback would fire on the wrong paths. + # The parent must keep this case serial. + t1 = [{"id": i, "secret": i, "v": i} for i in range(15)] + t2 = [{"id": i, "secret": i, "v": i + (1 if i == 3 else 0)} for i in range(15)] + serial = DeepDiff( + t1, t2, ignore_order=True, + exclude_obj_callback=_drop_secret_callback, + ) + parallel = _run_parallel( + t1, t2, ignore_order=True, + exclude_obj_callback=_drop_secret_callback, + ) + assert parallel == serial + + +class TestSubtreeParallelHelper: + """Direct unit tests for ``compute_subtree_diffs_parallel``.""" + + def test_empty_jobs_returns_empty_list(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + result = compute_subtree_diffs_parallel( + jobs=[], parameters={}, original_type=None, config=cfg, + ) + assert result == [] + + def test_unpickleable_parameters_returns_none(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + # A lambda in parameters cannot be pickled under spawn. + params = {"some_param": lambda x: x} + result = compute_subtree_diffs_parallel( + jobs=[({"x": 1}, {"x": 2})], + parameters=params, + original_type=None, + config=cfg, + ) + assert result is None From dd2c678bca00a7f8d9919ef23771d96b8c4e74e8 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 15:23:13 -0700 Subject: [PATCH 13/23] =?UTF-8?q?-=20REPEATS=2010=20=E2=86=92=202=20(colle?= =?UTF-8?q?ction=20is=20index-keyed,=20so=20completion=20order=20can't=20d?= =?UTF-8?q?rift;=20one=20run=20is=20=20=20mathematically=20sufficient,=20t?= =?UTF-8?q?wo=20is=20cheap=20insurance).=20=20=20-=20Dropped=2013=20redund?= =?UTF-8?q?ant=20determinism=20cases=20=E2=80=94=20kept=20one=20per=20beha?= =?UTF-8?q?vior=20(tied=20distances,=20repetition,=20=20=20exclude=5Fpaths?= =?UTF-8?q?,=20subtree=20rebasing,=20subtree=20add/remove=20keys,=20no=20r?= =?UTF-8?q?ecursive=20spawn,=20threshold=20gating).=20=20=20-=20Marked=20t?= =?UTF-8?q?he=2010=20spawn-heavy=20tests=20@pytest.mark.slow=20so=20they?= =?UTF-8?q?=20only=20run=20under=20--runslow.=20=20=20-=20Kept=20all=20the?= =?UTF-8?q?=20helper/config=20unit=20tests=20in=20the=20fast=20path=20?= =?UTF-8?q?=E2=80=94=20they=20test=20the=20same=20fallback=20logic=20=20?= =?UTF-8?q?=20without=20paying=20spawn=20cost.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_multiprocessing.py | 428 +++++++++------------------------- 1 file changed, 109 insertions(+), 319 deletions(-) diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py index 7d5a4715..19ebdc35 100644 --- a/tests/test_multiprocessing.py +++ b/tests/test_multiprocessing.py @@ -1,14 +1,10 @@ """Determinism and safety tests for internal multiprocessing. -Phase 1 covers the parallel rough-distance loop in -``DeepDiff._get_most_in_common_pairs_in_iterables`` (the ``ignore_order=True`` -path). Each parallel run is compared against the equivalent serial run; on -ties or many candidate pairs the merge order must come from the parent's -serial nested loop, not from worker completion order. - -We use ``multiprocessing_threshold=0`` to force the parallel path even on -small inputs, then loop the run multiple times to flush out any -non-determinism. +Workers return ``(job_index, result)`` tuples and the parent reassembles by +index, so completion order is structurally irrelevant — one parallel run +verifies determinism just as well as ten. We keep ``REPEATS=2`` as cheap +insurance and mark the spawn-heavy cases ``@pytest.mark.slow`` so the default +``pytest`` run stays fast; ``--runslow`` exercises the full matrix. """ import pytest @@ -24,7 +20,7 @@ ) -REPEATS = 10 # tradeoff between flake-detection and CI time +REPEATS = 2 def _run_parallel(t1, t2, **kwargs): @@ -82,7 +78,6 @@ class TestParamWiring: def test_default_serial_path_unchanged(self): t1 = [{"a": 1}, {"a": 2}] t2 = [{"a": 2}, {"a": 1}] - # No multiprocessing parameter at all — must hit the existing path. assert DeepDiff(t1, t2, ignore_order=True) == {} def test_explicit_multiprocessing_false(self): @@ -95,87 +90,55 @@ def test_invalid_workers_surfaces_at_diff_level(self): DeepDiff([1], [2], multiprocessing=True, multiprocessing_workers=0) -class TestDeterminism: - """Each test compares serial vs. parallel many times. Any drift is a bug.""" +class TestHashesParallelHelper: + """Direct unit tests for ``compute_hashes_parallel`` — no DeepDiff overhead.""" - def _assert_determinism(self, t1, t2, **kwargs): - kwargs.setdefault("ignore_order", True) - kwargs.setdefault("cutoff_intersection_for_pairs", 1) - serial = DeepDiff(t1, t2, **kwargs) - for _ in range(REPEATS): - parallel = _run_parallel(t1, t2, **kwargs) - assert parallel == serial, ( - "parallel != serial after run; difference: %r vs %r" - % (parallel, serial) - ) + def test_empty_jobs_returns_empty_list(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + assert compute_hashes_parallel(jobs=[], deephash_parameters={}, config=cfg) == [] - def test_nested_lists_of_dicts(self): - t1 = [{"id": i, "data": {"x": i * 2, "y": [i, i + 1]}} for i in range(20)] - t2 = [{"id": i, "data": {"x": i * 2 + (1 if i % 5 == 0 else 0), "y": [i, i + 1]}} - for i in range(20)] - self._assert_determinism(t1, t2) + def test_unpickleable_params_returns_none(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + params = {"hasher": lambda obj: "x"} + result = compute_hashes_parallel( + jobs=[(1, "root[0]"), (2, "root[1]")], + deephash_parameters=params, + config=cfg, + ) + assert result is None - def test_repeated_items_report_repetition_false(self): - t1 = [1, 1, 1, 2, 3, 3] - t2 = [3, 1, 2, 2, 4] - self._assert_determinism(t1, t2, report_repetition=False) - def test_repeated_items_report_repetition_true(self): - t1 = [1, 1, 1, 2, 3, 3] - t2 = [3, 1, 2, 2, 4] - self._assert_determinism(t1, t2, report_repetition=True) - - def test_tied_distances(self): - # Multiple candidate pairs with the same rough distance. Worker-order - # merge would surface here as flapping pairings between runs. - t1 = [{"k": "a", "v": 1}, {"k": "b", "v": 1}, {"k": "c", "v": 1}] - t2 = [{"k": "a", "v": 2}, {"k": "b", "v": 2}, {"k": "c", "v": 2}] - self._assert_determinism(t1, t2) +class TestSubtreeParallelHelper: + """Direct unit tests for ``compute_subtree_diffs_parallel``.""" - def test_sets(self): - t1 = {frozenset({1, 2}), frozenset({3, 4}), frozenset({5, 6})} - t2 = {frozenset({1, 2}), frozenset({3, 5}), frozenset({7, 8})} - self._assert_determinism(t1, t2) + def test_empty_jobs_returns_empty_list(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + result = compute_subtree_diffs_parallel( + jobs=[], parameters={}, original_type=None, config=cfg, + ) + assert result == [] - def test_exclude_paths(self): - t1 = [{"id": i, "secret": i * 100, "v": i} for i in range(10)] - t2 = [{"id": i, "secret": i * 999, "v": i + (1 if i == 5 else 0)} for i in range(10)] - self._assert_determinism(t1, t2, exclude_paths=["root[0]['secret']"]) - - def test_ignore_string_case(self): - t1 = [{"name": "Alice"}, {"name": "Bob"}, {"name": "Carol"}] - t2 = [{"name": "alice"}, {"name": "bob"}, {"name": "DAVE"}] - self._assert_determinism(t1, t2, ignore_string_case=True) - - def test_custom_pickleable_hasher(self): - # Module-level callable below is pickleable; lambdas are not. - self._assert_determinism( - [{"x": 1}, {"x": 2}, {"x": 3}], - [{"x": 1}, {"x": 4}, {"x": 5}], - hasher=_simple_hasher, + def test_unpickleable_parameters_returns_none(self): + cfg = MPConfig(enabled=True, workers=2, threshold=0) + params = {"some_param": lambda x: x} + result = compute_subtree_diffs_parallel( + jobs=[({"x": 1}, {"x": 2})], + parameters=params, + original_type=None, + config=cfg, ) + assert result is None class TestSafetyFallback: """Unsafe inputs must not crash; they fall back to serial.""" - def test_unpickleable_iterable_compare_func_falls_back(self): - # A lambda is not pickleable. The parallel section must give up and - # the result must still match a serial run. - t1 = [{"k": 1, "v": "a"}, {"k": 2, "v": "b"}] - t2 = [{"k": 1, "v": "a"}, {"k": 2, "v": "c"}] - cmp = lambda x, y: x["k"] == y["k"] # noqa: E731 - serial = DeepDiff(t1, t2, ignore_order=True, iterable_compare_func=cmp) - parallel = _run_parallel(t1, t2, ignore_order=True, iterable_compare_func=cmp) - assert parallel == serial - def test_is_pickleable_helper(self): assert is_pickleable({"a": 1}) is True assert is_pickleable(lambda x: x) is False def test_compute_distances_parallel_returns_none_on_unpickleable_compare_func(self): cfg = MPConfig(enabled=True, workers=2, threshold=0) - # Empty params dict pickles fine; the lambda compare func does not. result = compute_distances_parallel( jobs=[("h1", "h2", {"x": 1}, {"x": 2})], parameters={"foo": "bar"}, @@ -186,151 +149,13 @@ def test_compute_distances_parallel_returns_none_on_unpickleable_compare_func(se assert result is None -class TestRecursiveNoNesting: - """The worker must disable its own multiprocessing so we don't fork-bomb.""" - - def test_worker_subdiff_runs_serial(self): - # The worker invokes DeepDiff(item1, item2, _parameters=sanitized). - # Sanitization sets _mp_config to disabled; if it didn't, this nested - # workload would either deadlock or be very slow under spawn. The - # bound on REPEATS plus pytest's default timeout keeps that visible. - t1 = [{"deep": {"deeper": {"deepest": [i, i + 1, i + 2]}}} for i in range(8)] - t2 = [{"deep": {"deeper": {"deepest": [i, i + 1, i + 3]}}} for i in range(8)] - serial = DeepDiff(t1, t2, ignore_order=True) - parallel = _run_parallel(t1, t2, ignore_order=True) - assert parallel == serial - - -# Module-level helper so it pickles cleanly under the spawn start method. +# Module-level helpers — pickleable under spawn. def _simple_hasher(obj, *args, **kwargs): import hashlib return hashlib.sha1(repr(obj).encode("utf-8")).hexdigest() -class TestHashtableParallel: - """Phase 2: ``_create_hashtable`` per-item DeepHash parallelism. - - These exercise the parallel hashing path with ``multiprocessing_threshold=0`` - so even small fixtures hit the worker pool. Result must match the equivalent - serial run, repeatedly, regardless of worker completion order. - """ - - def _assert_determinism(self, t1, t2, **kwargs): - kwargs.setdefault("ignore_order", True) - kwargs.setdefault("cutoff_intersection_for_pairs", 1) - serial = DeepDiff(t1, t2, **kwargs) - for _ in range(REPEATS): - parallel = _run_parallel(t1, t2, **kwargs) - assert parallel == serial, ( - "parallel != serial after run; difference: %r vs %r" - % (parallel, serial) - ) - - def test_large_list_of_dicts(self): - # Bigger N so spawn cost is not pathological; results must still match. - t1 = [{"i": i, "name": "item-%d" % i, "tags": [i, i + 1]} for i in range(40)] - t2 = [{"i": i, "name": "item-%d" % i, "tags": [i, i + 1]} for i in range(40)] - # Add a single change deep in the middle - t2[17]["name"] = "changed" - self._assert_determinism(t1, t2) - - def test_list_of_lists(self): - t1 = [[i, i + 1, i + 2] for i in range(15)] - t2 = [[i, i + 1, i + 2] for i in range(15)] - t2[5] = [99, 100, 101] - self._assert_determinism(t1, t2) - - def test_set_of_hashables(self): - t1 = set(range(30)) - t2 = set(range(30)) - t2.discard(7) - t2.add(99) - self._assert_determinism(t1, t2) - - def test_repeated_items_report_repetition_false(self): - # Repeated items: cache reuse path. Parent merges per-index hashes - # in serial order so duplicates collapse the same way. - t1 = [{"k": i % 3} for i in range(20)] - t2 = [{"k": (i + 1) % 3} for i in range(20)] - self._assert_determinism(t1, t2, report_repetition=False) - - def test_repeated_items_report_repetition_true(self): - t1 = [{"k": i % 3} for i in range(20)] - t2 = [{"k": (i + 1) % 3} for i in range(20)] - self._assert_determinism(t1, t2, report_repetition=True) - - def test_nested_mixed_structures(self): - t1 = [ - {"id": i, "data": {"vals": [j for j in range(i)], "meta": {"k": i}}} - for i in range(12) - ] - t2 = [ - {"id": i, "data": {"vals": [j for j in range(i)], "meta": {"k": i + (1 if i == 6 else 0)}}} - for i in range(12) - ] - self._assert_determinism(t1, t2) - - def test_below_threshold_uses_serial(self): - # Default threshold is 64; small inputs without the override stay serial. - t1 = [1, 2, 3] - t2 = [3, 2, 1] - # No multiprocessing_threshold=0 override here on purpose. - out = DeepDiff(t1, t2, ignore_order=True, multiprocessing=True) - assert out == DeepDiff(t1, t2, ignore_order=True) - - def test_unpickleable_hasher_falls_back(self): - # A lambda hasher is not pickleable. Must not crash; result must match - # the serial run. - bad_hasher = lambda obj: _simple_hasher(obj) # noqa: E731 - t1 = [{"x": i} for i in range(10)] - t2 = [{"x": i + (1 if i == 3 else 0)} for i in range(10)] - serial = DeepDiff(t1, t2, ignore_order=True, hasher=bad_hasher) - parallel = _run_parallel(t1, t2, ignore_order=True, hasher=bad_hasher) - assert parallel == serial - - -class TestHashesParallelHelper: - """Direct unit tests for ``compute_hashes_parallel``.""" - - def test_empty_jobs_returns_empty_list(self): - cfg = MPConfig(enabled=True, workers=2, threshold=0) - assert compute_hashes_parallel(jobs=[], deephash_parameters={}, config=cfg) == [] - - def test_unpickleable_params_returns_none(self): - cfg = MPConfig(enabled=True, workers=2, threshold=0) - # A lambda inside the params dict cannot be pickled under spawn. - params = {"hasher": lambda obj: "x"} - result = compute_hashes_parallel( - jobs=[(1, "root[0]"), (2, "root[1]")], - deephash_parameters=params, - config=cfg, - ) - assert result is None - - def test_returns_one_hash_per_item_in_index_order(self): - cfg = MPConfig(enabled=True, workers=2, threshold=0) - jobs = [(i, "root[%d]" % i) for i in range(5)] - # Minimal deephash params — keep keys aligned with what DeepDiff - # would normally pass. An empty dict is sufficient for primitives. - result = compute_hashes_parallel( - jobs=jobs, - deephash_parameters={}, - config=cfg, - ) - assert result is not None - assert len(result) == 5 - # All entries are non-None for primitives. - assert all(h is not None for h in result) - # Same int hashed twice yields identical hashes. - again = compute_hashes_parallel( - jobs=jobs, deephash_parameters={}, config=cfg - ) - assert again == result - - -# Module-level callables/classes so they pickle cleanly under spawn. def _drop_secret_callback(obj, path): - # Mirrors a real-world exclude_obj_callback that inspects the path. return "secret" in path @@ -338,10 +163,6 @@ def _drop_secret_callback(obj, path): class _NoopOperator(BaseOperator): - # No types/regex_paths configured, so match() never fires — but its mere - # presence in custom_operators must force the parent to keep subtree - # diffs serial (the worker would not be able to run custom_report_result - # back into the parent's tree). def __init__(self): super().__init__() @@ -349,127 +170,96 @@ def give_up_diffing(self, level, diff_instance): return False def normalize_value_for_hashing(self, parent, obj): - # Required for ignore_order=True compatibility when this operator - # ships through DeepHash. We don't normalize anything — pass through. return obj -class TestSubtreeParallel: - """Phase 3: paired-subtree diffs run in worker processes after pairing. +def _assert_parallel_matches_serial(t1, t2, **kwargs): + kwargs.setdefault("ignore_order", True) + kwargs.setdefault("cutoff_intersection_for_pairs", 1) + serial = DeepDiff(t1, t2, **kwargs) + for _ in range(REPEATS): + parallel = _run_parallel(t1, t2, **kwargs) + assert parallel == serial, ( + "parallel != serial: %r vs %r" % (parallel, serial) + ) + - Workers compute a fresh DeepDiff per pair and return tree leaves; the - parent rebases each leaf's up-chain onto its own ``change_level``. The - public output must equal the equivalent serial run regardless of worker - completion order, and unsafe inputs (custom_operators, path-aware - callbacks) must fall back to inline serial. - """ +@pytest.mark.slow +class TestDeterminismSlow: + """End-to-end parallel-vs-serial checks. Each test pays a pool-spawn tax.""" - def _assert_determinism(self, t1, t2, **kwargs): - kwargs.setdefault("ignore_order", True) - kwargs.setdefault("cutoff_intersection_for_pairs", 1) - serial = DeepDiff(t1, t2, **kwargs) - for _ in range(REPEATS): - parallel = _run_parallel(t1, t2, **kwargs) - assert parallel == serial, ( - "parallel != serial after run; difference: %r vs %r" - % (parallel, serial) - ) + def test_tied_distances(self): + # Multiple candidate pairs with identical rough distance — would expose + # any worker-completion-order leakage in pair selection. + t1 = [{"k": "a", "v": 1}, {"k": "b", "v": 1}, {"k": "c", "v": 1}] + t2 = [{"k": "a", "v": 2}, {"k": "b", "v": 2}, {"k": "c", "v": 2}] + _assert_parallel_matches_serial(t1, t2) + + def test_repeated_items_report_repetition_true(self): + t1 = [1, 1, 1, 2, 3, 3] + t2 = [3, 1, 2, 2, 4] + _assert_parallel_matches_serial(t1, t2, report_repetition=True) + + def test_exclude_paths(self): + t1 = [{"id": i, "secret": i * 100, "v": i} for i in range(8)] + t2 = [{"id": i, "secret": i * 999, "v": i + (1 if i == 5 else 0)} for i in range(8)] + _assert_parallel_matches_serial(t1, t2, exclude_paths=["root[0]['secret']"]) + + def test_below_threshold_uses_serial(self): + # Default threshold (64) keeps small inputs serial even with mp on. + t1 = [1, 2, 3] + t2 = [3, 2, 1] + out = DeepDiff(t1, t2, ignore_order=True, multiprocessing=True) + assert out == DeepDiff(t1, t2, ignore_order=True) def test_paired_subtree_changes_match_serial(self): - # Each pair has exactly one nested change. Rebased paths must match - # the inline serial paths character-for-character. - t1 = [{"id": i, "data": {"x": i, "y": [i, i + 1]}} for i in range(20)] - t2 = [{"id": i, "data": {"x": i, "y": [i, i + 2]}} for i in range(20)] - self._assert_determinism(t1, t2) - - def test_paired_subtree_multiple_changes_per_pair(self): - # Multiple values_changed entries per pair — verifies that each leaf - # in the worker's tree gets an independent rebased up-chain. - t1 = [{"a": i, "b": i * 2, "c": i * 3, "d": [i, i, i]} for i in range(15)] - t2 = [{"a": i + 100, "b": i * 2, "c": i * 3 + 1, "d": [i, i, i + 1]} for i in range(15)] - self._assert_determinism(t1, t2) - - def test_paired_subtree_with_added_and_removed_keys(self): - # Non-values_changed report types in the subtree: - # dictionary_item_added / dictionary_item_removed. - t1 = [{"id": i, "old_only": i} for i in range(12)] - t2 = [{"id": i, "new_only": i} for i in range(12)] - self._assert_determinism(t1, t2) - - def test_paired_subtree_with_type_changes(self): - t1 = [{"id": i, "v": i} for i in range(10)] - t2 = [{"id": i, "v": str(i)} for i in range(10)] - self._assert_determinism(t1, t2) - - def test_paired_subtree_report_repetition_true(self): - # Exercises the report_repetition=True branch where the inner _diff - # is also deferred to workers. - t1 = [{"k": i % 3, "extra": [i]} for i in range(20)] - t2 = [{"k": (i + 1) % 3, "extra": [i + 1]} for i in range(20)] - self._assert_determinism(t1, t2, report_repetition=True) - - def test_exclude_paths_re_applied_in_parent(self): - # Worker sees subtree-relative paths, so exclude_paths cannot be - # enforced inside the worker; the parent re-filters via _skip_this - # after rebasing. This test would fail if that re-filter was missing. - t1 = [{"id": i, "secret": i * 100, "v": i} for i in range(15)] - t2 = [{"id": i, "secret": i * 999, "v": i + (1 if i == 7 else 0)} for i in range(15)] - self._assert_determinism( - t1, t2, exclude_paths=["root[0]['secret']"], - ) + # Parent rebases worker leaves; verifies path reconstruction. + t1 = [{"id": i, "data": {"x": i, "y": [i, i + 1]}} for i in range(10)] + t2 = [{"id": i, "data": {"x": i, "y": [i, i + 2]}} for i in range(10)] + _assert_parallel_matches_serial(t1, t2) + + def test_paired_subtree_added_and_removed_keys(self): + t1 = [{"id": i, "old_only": i} for i in range(8)] + t2 = [{"id": i, "new_only": i} for i in range(8)] + _assert_parallel_matches_serial(t1, t2) + + def test_worker_does_not_recursively_spawn(self): + # Sanitization must disable mp inside the worker; without it, nested + # spawn either deadlocks or runs absurdly slowly. + t1 = [{"deep": {"deeper": {"deepest": [i, i + 1, i + 2]}}} for i in range(8)] + t2 = [{"deep": {"deeper": {"deepest": [i, i + 1, i + 3]}}} for i in range(8)] + _assert_parallel_matches_serial(t1, t2) -class TestSubtreeFallback: - """Subtree parallelism must degrade cleanly when features can't ship to workers.""" +@pytest.mark.slow +class TestSubtreeFallbackSlow: + """Subtree parallelism degrades cleanly when features can't ship to workers.""" def test_custom_operators_force_serial(self): - # custom_operators can call custom_report_result and mutate the - # parent diff — they must not run in workers. Even with mp turned on - # the result must still match the serial run. op = _NoopOperator() - t1 = [{"id": i, "v": i} for i in range(20)] - t2 = [{"id": i, "v": i + (1 if i == 5 else 0)} for i in range(20)] + t1 = [{"id": i, "v": i} for i in range(10)] + t2 = [{"id": i, "v": i + (1 if i == 5 else 0)} for i in range(10)] serial = DeepDiff(t1, t2, ignore_order=True, custom_operators=[op]) - parallel = _run_parallel( - t1, t2, ignore_order=True, custom_operators=[op], - ) + parallel = _run_parallel(t1, t2, ignore_order=True, custom_operators=[op]) assert parallel == serial def test_exclude_obj_callback_forces_serial(self): - # exclude_obj_callback receives the level path; in a worker the path - # is subtree-relative, so the callback would fire on the wrong paths. - # The parent must keep this case serial. - t1 = [{"id": i, "secret": i, "v": i} for i in range(15)] - t2 = [{"id": i, "secret": i, "v": i + (1 if i == 3 else 0)} for i in range(15)] + # The callback receives a path; subtree-relative paths inside a worker + # would mis-fire, so the parent must keep this serial. + t1 = [{"id": i, "secret": i, "v": i} for i in range(8)] + t2 = [{"id": i, "secret": i, "v": i + (1 if i == 3 else 0)} for i in range(8)] serial = DeepDiff( - t1, t2, ignore_order=True, - exclude_obj_callback=_drop_secret_callback, + t1, t2, ignore_order=True, exclude_obj_callback=_drop_secret_callback, ) parallel = _run_parallel( - t1, t2, ignore_order=True, - exclude_obj_callback=_drop_secret_callback, + t1, t2, ignore_order=True, exclude_obj_callback=_drop_secret_callback, ) assert parallel == serial - -class TestSubtreeParallelHelper: - """Direct unit tests for ``compute_subtree_diffs_parallel``.""" - - def test_empty_jobs_returns_empty_list(self): - cfg = MPConfig(enabled=True, workers=2, threshold=0) - result = compute_subtree_diffs_parallel( - jobs=[], parameters={}, original_type=None, config=cfg, - ) - assert result == [] - - def test_unpickleable_parameters_returns_none(self): - cfg = MPConfig(enabled=True, workers=2, threshold=0) - # A lambda in parameters cannot be pickled under spawn. - params = {"some_param": lambda x: x} - result = compute_subtree_diffs_parallel( - jobs=[({"x": 1}, {"x": 2})], - parameters=params, - original_type=None, - config=cfg, - ) - assert result is None + def test_unpickleable_hasher_falls_back(self): + bad_hasher = lambda obj: _simple_hasher(obj) # noqa: E731 + t1 = [{"x": i} for i in range(8)] + t2 = [{"x": i + (1 if i == 3 else 0)} for i in range(8)] + serial = DeepDiff(t1, t2, ignore_order=True, hasher=bad_hasher) + parallel = _run_parallel(t1, t2, ignore_order=True, hasher=bad_hasher) + assert parallel == serial From 061e11b4d424d819b4b0f2a1210e7d735d000164 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 15:44:01 -0700 Subject: [PATCH 14/23] Code (deepdiff/_multiprocessing.py) - New helpers _extract_worker_stats and _aggregate_worker_stats. - _distance_worker and _subtree_diff_worker now return a stats delta as a third tuple element. - compute_distances_parallel and compute_subtree_diffs_parallel now return (result, aggregated_stats) instead of bare result. Code (deepdiff/diff.py) - New stats keys WORKER_DIFF_COUNT, WORKER_PASSES_COUNT, WORKER_DISTANCE_CACHE_HIT_COUNT, WORKER_BATCH_COUNT added to _stats init. - New helper _merge_worker_stats (sums counters, OR-merges limit flags). - _maybe_compute_pair_distances_parallel and _dispatch_subtree_jobs unpack the new orchestrator return shape and merge. Tests - New classes TestWorkerStatsUnit, TestStatsKeys, TestWorkerStatsAggregationSlow (8 tests). - Updated TestSubtreeParallelHelper.test_empty_jobs_returns_empty_list for new return shape. - Updated expected_stats dicts in tests/test_cache.py (3 tests) and tests/test_ignore_order.py (2 tests) with the four new zeroed keys. - Full suite: 1148 pass, 35 multiprocessing pass with --runslow. Doc (docs/multi_processing.md) - Phase 4 implementation status, code locations, test summary, and Subticket #5 removed from "Not yet implemented". --- deepdiff/_multiprocessing.py | 99 +++++++++++++++++++------ deepdiff/diff.py | 43 ++++++++++- docs/multi_processing.md | 52 ++++++++++++- tests/test_cache.py | 21 +++++- tests/test_ignore_order.py | 13 +++- tests/test_multiprocessing.py | 133 +++++++++++++++++++++++++++++++++- 6 files changed, 330 insertions(+), 31 deletions(-) diff --git a/deepdiff/_multiprocessing.py b/deepdiff/_multiprocessing.py index aa4f4b04..f4c9ab08 100644 --- a/deepdiff/_multiprocessing.py +++ b/deepdiff/_multiprocessing.py @@ -23,6 +23,46 @@ DEFAULT_MAX_WORKERS = 4 DEFAULT_THRESHOLD = 64 +# Keys we lift out of a worker's internal _stats and ship back to the parent. +# These mirror the same string constants used by ``deepdiff/diff.py``; we keep +# string literals here to avoid importing diff.py at module load (which would +# create an import cycle under spawn). +_WORKER_STATS_COUNTER_KEYS = ('DIFF COUNT', 'PASSES COUNT', 'DISTANCE CACHE HIT COUNT') +_WORKER_STATS_FLAG_KEYS = ('MAX PASS LIMIT REACHED', 'MAX DIFF LIMIT REACHED') + + +def _extract_worker_stats(diff_instance: Any) -> Dict[str, Any]: + """Pull a small, picklable stats snapshot off a worker-local DeepDiff. + + Returns a dict with integer counters plus boolean limit flags. Missing keys + are tolerated so this stays robust if ``_stats`` shrinks at the end of + ``__init__`` (it currently deletes ``DISTANCE CACHE ENABLED`` and the + ``PREVIOUS *`` bookkeeping keys before we get here). + """ + stats = getattr(diff_instance, '_stats', None) or {} + delta: Dict[str, Any] = {} + for key in _WORKER_STATS_COUNTER_KEYS: + delta[key] = int(stats.get(key, 0) or 0) + for key in _WORKER_STATS_FLAG_KEYS: + delta[key] = bool(stats.get(key, False)) + return delta + + +def _aggregate_worker_stats(deltas: List[Dict[str, Any]]) -> Dict[str, Any]: + """Sum counter keys and OR-merge limit flags across worker deltas.""" + out: Dict[str, Any] = {key: 0 for key in _WORKER_STATS_COUNTER_KEYS} + for key in _WORKER_STATS_FLAG_KEYS: + out[key] = False + for delta in deltas: + if not delta: + continue + for key in _WORKER_STATS_COUNTER_KEYS: + out[key] += int(delta.get(key, 0) or 0) + for key in _WORKER_STATS_FLAG_KEYS: + if delta.get(key): + out[key] = True + return out + @dataclass(frozen=True) class MPConfig: @@ -114,7 +154,9 @@ def _sanitize_parameters_for_worker(parameters: Dict[str, Any]) -> Dict[str, Any return sanitized -def _distance_worker(job: Tuple[int, Dict[str, Any], Any, Any, Any, Any]) -> Tuple[int, float]: +def _distance_worker( + job: Tuple[int, Dict[str, Any], Any, Any, Any, Any], +) -> Tuple[int, float, Dict[str, Any]]: """Compute the rough distance between two items in a worker process. ``job`` layout matches what ``compute_distances_parallel`` ships: @@ -123,7 +165,9 @@ def _distance_worker(job: Tuple[int, Dict[str, Any], Any, Any, Any, Any]) -> Tup The worker constructs a fresh root ``DeepDiff`` (no shared parent state), requests the DELTA_VIEW so we hit the same code path as the serial call in - ``_get_rough_distance_of_hashed_objs``, and returns the resulting float. + ``_get_rough_distance_of_hashed_objs``, and returns the resulting float + plus a ``_extract_worker_stats`` snapshot so the parent can aggregate + diff/pass/cache-hit counts into its WORKER_* stats keys. """ # Imported here to keep module import cheap and to dodge any circular # import surprises under spawn. @@ -144,7 +188,7 @@ def _distance_worker(job: Tuple[int, Dict[str, Any], Any, Any, Any, Any]) -> Tup # call below, hence cache_purge_level=0. cache_purge_level=0, ) - return job_index, cast(float, diff._get_rough_distance()) + return job_index, cast(float, diff._get_rough_distance()), _extract_worker_stats(diff) def compute_distances_parallel( @@ -153,7 +197,7 @@ def compute_distances_parallel( original_type: Any, iterable_compare_func: Optional[Callable], config: MPConfig, -) -> Optional[Dict[Tuple[Any, Any], float]]: +) -> Optional[Tuple[Dict[Tuple[Any, Any], float], Dict[str, Any]]]: """Run ``_distance_worker`` over ``jobs`` and return distances by pair. ``jobs`` is a list of ``(added_hash, removed_hash, added_item, removed_item)`` @@ -161,17 +205,20 @@ def compute_distances_parallel( is responsible for that ordering; this helper does not reorder anything. Returns: - A dict ``{(added_hash, removed_hash): distance}``, or ``None`` if the - section is unsafe to parallelize (unpickleable inputs/parameters, - worker import error, etc.). On ``None`` the caller MUST fall back to - the serial path so correctness is preserved. + ``(distances_by_pair, aggregated_worker_stats)`` where the first item + is a dict ``{(added_hash, removed_hash): distance}`` and the second is + the aggregated ``_extract_worker_stats`` snapshot summed across all + workers (counter keys summed, limit flags OR-merged). Returns + ``None`` if the section is unsafe to parallelize (unpickleable + inputs/parameters, worker import error, etc.). On ``None`` the caller + MUST fall back to the serial path so correctness is preserved. Workers may finish out of order; we collect results into a dict keyed by the original job index, so callers see the same result regardless of completion order. """ if not jobs: - return {} + return {}, _aggregate_worker_stats([]) sanitized_params = _sanitize_parameters_for_worker(parameters) @@ -200,14 +247,16 @@ def compute_distances_parallel( ) results_by_index: Dict[int, float] = {} + stats_deltas: List[Dict[str, Any]] = [] try: with ProcessPoolExecutor(max_workers=config.workers) as executor: futures = [executor.submit(_distance_worker, payload) for payload in payloads] for future in as_completed(futures): # Re-raise worker exceptions in the parent so they surface as # normal DeepDiff exceptions instead of being swallowed. - idx, distance = future.result() + idx, distance, stats_delta = future.result() results_by_index[idx] = distance + stats_deltas.append(stats_delta) except (pickle.PicklingError, AttributeError, TypeError): # Pickling/spawn-related failures: surface as a serial fallback rather # than crashing the diff. Other exceptions (worker logic bugs, user @@ -217,7 +266,7 @@ def compute_distances_parallel( out: Dict[Tuple[Any, Any], float] = {} for i, job in enumerate(jobs): out[(job[0], job[1])] = results_by_index[i] - return out + return out, _aggregate_worker_stats(stats_deltas) def _hash_worker(job: Tuple[int, Any, str, Dict[str, Any]]) -> Tuple[int, Optional[str]]: @@ -256,7 +305,7 @@ def _hash_worker(job: Tuple[int, Any, str, Dict[str, Any]]) -> Tuple[int, Option def _subtree_diff_worker( job: Tuple[int, Dict[str, Any], Any, Any, Any], -) -> Tuple[int, List[Tuple[str, Any]]]: +) -> Tuple[int, List[Tuple[str, Any]], Dict[str, Any]]: """Run one paired-item subtree diff in a worker process. ``job`` layout: ``(job_index, sanitized_parameters, t1, t2, _original_type)``. @@ -290,7 +339,7 @@ def _subtree_diff_worker( continue for leaf in levels: entries.append((report_type, leaf)) - return job_index, entries + return job_index, entries, _extract_worker_stats(diff) def compute_subtree_diffs_parallel( @@ -298,14 +347,17 @@ def compute_subtree_diffs_parallel( parameters: Dict[str, Any], original_type: Any, config: MPConfig, -) -> Optional[List[List[Tuple[str, Any]]]]: +) -> Optional[Tuple[List[List[Tuple[str, Any]]], Dict[str, Any]]]: """Run ``_subtree_diff_worker`` over ``jobs`` and return per-job entries. ``jobs`` is a list of ``(t1_item, t2_item)`` tuples in the exact order - the serial paired-iteration code visits them. Returns a list aligned to - that order; each element is ``[(report_type, leaf_difflevel), ...]`` - suitable for the parent to rebase and merge into its tree. Returns - ``None`` when the section is unsafe to parallelize (unpickleable + the serial paired-iteration code visits them. Returns + ``(entries_by_job, aggregated_worker_stats)`` where ``entries_by_job`` is + a list aligned to job order — each element is ``[(report_type, + leaf_difflevel), ...]`` suitable for the parent to rebase and merge into + its tree — and ``aggregated_worker_stats`` is the per-batch ``_stats`` + deltas summed across workers (counters summed, limit flags OR-merged). + Returns ``None`` when the section is unsafe to parallelize (unpickleable parameters/items, worker import error). On ``None`` the caller MUST run the same jobs serially so correctness is preserved. @@ -313,7 +365,7 @@ def compute_subtree_diffs_parallel( job index so the merge order is identical regardless of completion order. """ if not jobs: - return [] + return [], _aggregate_worker_stats([]) sanitized_params = _sanitize_parameters_for_worker(parameters) @@ -332,16 +384,21 @@ def compute_subtree_diffs_parallel( ] results_by_index: Dict[int, List[Tuple[str, Any]]] = {} + stats_deltas: List[Dict[str, Any]] = [] try: with ProcessPoolExecutor(max_workers=config.workers) as executor: futures = [executor.submit(_subtree_diff_worker, payload) for payload in payloads] for future in as_completed(futures): - idx, entries = future.result() + idx, entries, stats_delta = future.result() results_by_index[idx] = entries + stats_deltas.append(stats_delta) except (pickle.PicklingError, AttributeError, TypeError): return None - return [results_by_index[i] for i in range(len(jobs))] + return ( + [results_by_index[i] for i in range(len(jobs))], + _aggregate_worker_stats(stats_deltas), + ) def compute_hashes_parallel( diff --git a/deepdiff/diff.py b/deepdiff/diff.py index f38681ba..74584ace 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -86,6 +86,10 @@ def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], No DISTANCE_CACHE_ENABLED = 'DISTANCE CACHE ENABLED' PREVIOUS_DIFF_COUNT = 'PREVIOUS DIFF COUNT' PREVIOUS_DISTANCE_CACHE_HIT_COUNT = 'PREVIOUS DISTANCE CACHE HIT COUNT' +WORKER_DIFF_COUNT = 'WORKER DIFF COUNT' +WORKER_PASSES_COUNT = 'WORKER PASSES COUNT' +WORKER_DISTANCE_CACHE_HIT_COUNT = 'WORKER DISTANCE CACHE HIT COUNT' +WORKER_BATCH_COUNT = 'WORKER BATCH COUNT' CANT_FIND_NUMPY_MSG = 'Unable to import numpy. This must be a bug in DeepDiff since a numpy array is detected.' INVALID_VIEW_MSG = "view parameter must be one of 'text', 'tree', 'delta', 'colored' or 'colored_compact'. But {} was passed." CUTOFF_RANGE_ERROR_MSG = 'cutoff_distance_for_pairs needs to be a positive float max 1.' @@ -340,6 +344,13 @@ def _group_by_sort_key(x): MAX_PASS_LIMIT_REACHED: False, MAX_DIFF_LIMIT_REACHED: False, DISTANCE_CACHE_ENABLED: bool(cache_size), + # Multiprocessing aggregates: each parallel batch sums per-worker + # _stats deltas into these keys. Parent-side counters above stay + # comparable to a serial run so existing tests are unaffected. + WORKER_DIFF_COUNT: 0, + WORKER_PASSES_COUNT: 0, + WORKER_DISTANCE_CACHE_HIT_COUNT: 0, + WORKER_BATCH_COUNT: 0, } self.hashes = dict_() if hashes is None else hashes self._numpy_paths = dict_() # if _numpy_paths is None else _numpy_paths @@ -1350,13 +1361,38 @@ def _maybe_compute_pair_distances_parallel( if not mp_config.should_parallelize(len(jobs)): return None - return compute_distances_parallel( + result = compute_distances_parallel( jobs=jobs, parameters=self._parameters, original_type=_original_type, iterable_compare_func=self.iterable_compare_func, config=mp_config, ) + if result is None: + return None + distances, worker_stats = result + self._merge_worker_stats(worker_stats) + return distances + + def _merge_worker_stats(self, worker_stats): + """Aggregate one parallel-batch's worker ``_stats`` delta into self._stats. + + Counters (DIFF / PASSES / DISTANCE CACHE HIT) sum into the matching + ``WORKER_*`` keys; limit flags OR-merge into the parent's existing + MAX_*_LIMIT_REACHED flags so any worker hitting a guard surfaces the + same warning state on the public ``get_stats()`` output. + """ + if not worker_stats: + return + self._stats[WORKER_DIFF_COUNT] += int(worker_stats.get('DIFF COUNT', 0) or 0) + self._stats[WORKER_PASSES_COUNT] += int(worker_stats.get('PASSES COUNT', 0) or 0) + self._stats[WORKER_DISTANCE_CACHE_HIT_COUNT] += int( + worker_stats.get('DISTANCE CACHE HIT COUNT', 0) or 0) + self._stats[WORKER_BATCH_COUNT] += 1 + if worker_stats.get(MAX_PASS_LIMIT_REACHED): + self._stats[MAX_PASS_LIMIT_REACHED] = True + if worker_stats.get(MAX_DIFF_LIMIT_REACHED): + self._stats[MAX_DIFF_LIMIT_REACHED] = True def _get_most_in_common_pairs_in_iterables( self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type): @@ -1578,12 +1614,15 @@ def _dispatch_subtree_jobs(self, pending_jobs, _original_type, local_tree): if (mp_config is not None and mp_config.enabled and mp_config.should_parallelize(len(pending_jobs))): jobs_payload = [(t1_item, t2_item) for (_, t1_item, t2_item, _) in pending_jobs] - parallel_results = compute_subtree_diffs_parallel( + outcome = compute_subtree_diffs_parallel( jobs=jobs_payload, parameters=self._parameters, original_type=_original_type, config=mp_config, ) + if outcome is not None: + parallel_results, worker_stats = outcome + self._merge_worker_stats(worker_stats) if parallel_results is None: # Below threshold or unsafe inputs — run inline-equivalent serial. diff --git a/docs/multi_processing.md b/docs/multi_processing.md index 78442b09..02b7ca6e 100644 --- a/docs/multi_processing.md +++ b/docs/multi_processing.md @@ -33,7 +33,28 @@ Without this, identity checks like `change.t2 is not notpresent` (used by `TextResult._from_tree_default` to decide t1-vs-t2 reporting) break on any DiffLevel that travels through `pickle`, which is exactly the Phase 3 path. -Subtickets #5, #6 (extended matrix), and #7 are still open. +**Phase 4 — landed (2026-04-27).** Subticket #5 (multiprocessing-aware stats) +is implemented. Workers now return their internal `_stats` snapshot alongside +their primary result; the parent aggregates those deltas into four new keys on +its own `_stats` dict — `WORKER DIFF COUNT`, `WORKER PASSES COUNT`, +`WORKER DISTANCE CACHE HIT COUNT`, and `WORKER BATCH COUNT` — and OR-merges +worker `MAX PASS LIMIT REACHED` / `MAX DIFF LIMIT REACHED` flags into the +parent's existing flags so any worker hitting a guard surfaces the same +warning state on the public `get_stats()` output. Parent counters +(`DIFF COUNT`, `PASSES COUNT`, `DISTANCE CACHE HIT COUNT`) stay scoped to the +parent process so they remain comparable to a serial run; this is what lets +existing stats-asserting tests pass with multiprocessing on. + +`max_diffs` and `max_passes` continue to act as approximate stop guards. +Workers run their own `DeepDiff` with the same constructor params, so they +trip the limit locally; the OR-merge means the parent's +`MAX_*_LIMIT_REACHED` flags reflect "any worker hit it" without requiring +exact serial-equivalent counts (which the doc explicitly does not require). +`get_stats()` always exposes the new `WORKER_*` keys, even on serial runs, +so consumers can read them unconditionally — they just stay zero when +multiprocessing is off or below threshold. + +Subtickets #6 (extended matrix) and #7 (benchmarks) are still open. What works today: @@ -61,6 +82,15 @@ What works today: fallback, `exclude_obj_callback` fallback, plus direct unit tests for `compute_subtree_diffs_parallel`). All other test files still pass unchanged. +- Phase 4 adds 8 stats-aggregation tests in `tests/test_multiprocessing.py` + (`TestWorkerStatsUnit` for `_extract_worker_stats` / `_aggregate_worker_stats`, + `TestStatsKeys` for the always-present `WORKER_*` keys on serial runs, and + `TestWorkerStatsAggregationSlow` covering paired-subtree aggregation, + distance-loop aggregation, and the no-double-counting invariant). The + pre-existing stats-asserting tests in `tests/test_cache.py` and + `tests/test_ignore_order.py` were updated to include the four new zeroed + keys in their `expected_stats` dicts; all of them continue to pass with + unchanged primary counter values. Code locations: @@ -89,6 +119,24 @@ Code locations: - `deepdiff/helper.py` — `NotPresent` / `Unprocessed` / `Skipped` / `NotHashed` gained `__reduce__` so the singleton sentinels survive `spawn`-based pickle round-trips. +- `deepdiff/_multiprocessing.py::_extract_worker_stats`, + `_aggregate_worker_stats` — Phase 4 helpers. Each worker dispatch returns + a small picklable stats dict (`DIFF COUNT`, `PASSES COUNT`, + `DISTANCE CACHE HIT COUNT`, plus the two limit flags); the orchestrator + sums counters and OR-merges flags before handing them back. +- `deepdiff/_multiprocessing.py::compute_distances_parallel`, + `compute_subtree_diffs_parallel` — both now return + `(primary_result, aggregated_worker_stats)` instead of just + `primary_result` (the `None` failure-case sentinel is unchanged). +- `deepdiff/diff.py::DeepDiff._merge_worker_stats` — Phase 4 helper that + takes one orchestrator's aggregated stats dict and folds it into the + parent's `self._stats`. Called by both + `_maybe_compute_pair_distances_parallel` and `_dispatch_subtree_jobs`. +- `deepdiff/diff.py` — four new module-level constants + (`WORKER_DIFF_COUNT`, `WORKER_PASSES_COUNT`, + `WORKER_DISTANCE_CACHE_HIT_COUNT`, `WORKER_BATCH_COUNT`) plus + initialization in `__init__` so the keys are always present in + `get_stats()`. Not yet implemented (deferred, intentional): @@ -105,8 +153,6 @@ Not yet implemented (deferred, intentional): the current tests don't cover. Worker-side `_iterable_opcodes` are also not propagated, so `DELTA_VIEW` of a paired subtree containing ordered iterables is not yet covered by Phase 3. -- **Subticket #5** — multiprocessing-aware stats semantics. Parent-only stats - remain meaningful in Phase 1, but no aggregation across workers. - **Subticket #6** — extended test matrix (numpy, pydantic, namedtuple, group_by, large-mixed structures, worker exception propagation tests). Phase 1 ships the core determinism harness; the rest is additive. diff --git a/tests/test_cache.py b/tests/test_cache.py index 419b6f7f..b4545ebe 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -63,7 +63,12 @@ def test_cache_deeply_nested_b(self, nested_b_t1, nested_b_t2, nested_b_result): 'DIFF COUNT': 306, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, - 'MAX DIFF LIMIT REACHED': False + 'MAX DIFF LIMIT REACHED': False, + # Phase 4: zeroed worker aggregates always present in get_stats(). + 'WORKER DIFF COUNT': 0, + 'WORKER PASSES COUNT': 0, + 'WORKER DISTANCE CACHE HIT COUNT': 0, + 'WORKER BATCH COUNT': 0, } stats_diff = DeepDiff(expected_stats, stats, use_log_scale=True, log_scale_similarity_threshold=0.15) assert not stats_diff @@ -93,7 +98,12 @@ def test_cache_1D_array_of_numbers_that_do_not_overlap(self): 'DIFF COUNT': 50, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, - 'MAX DIFF LIMIT REACHED': False + 'MAX DIFF LIMIT REACHED': False, + # Phase 4: zeroed worker aggregates always present in get_stats(). + 'WORKER DIFF COUNT': 0, + 'WORKER PASSES COUNT': 0, + 'WORKER DISTANCE CACHE HIT COUNT': 0, + 'WORKER BATCH COUNT': 0, } assert expected_stats == stats @@ -123,7 +133,12 @@ def test_cache_1D_array_of_numbers_that_overlap(self): 'DIFF COUNT': 16, 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, - 'MAX DIFF LIMIT REACHED': False + 'MAX DIFF LIMIT REACHED': False, + # Phase 4: zeroed worker aggregates always present in get_stats(). + 'WORKER DIFF COUNT': 0, + 'WORKER PASSES COUNT': 0, + 'WORKER DISTANCE CACHE HIT COUNT': 0, + 'WORKER BATCH COUNT': 0, } assert expected_stats == stats diff --git a/tests/test_ignore_order.py b/tests/test_ignore_order.py index 1e155f20..aaceeb71 100644 --- a/tests/test_ignore_order.py +++ b/tests/test_ignore_order.py @@ -742,6 +742,12 @@ def test_stats_that_include_distance_cache_hits(self): 'DISTANCE CACHE HIT COUNT': 0, 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False, + # Phase 4 (multiprocessing-aware stats) added zeroed worker keys to + # every run; serial diffs have nothing to aggregate so they stay 0. + 'WORKER DIFF COUNT': 0, + 'WORKER PASSES COUNT': 0, + 'WORKER DISTANCE CACHE HIT COUNT': 0, + 'WORKER BATCH COUNT': 0, } assert expected == diff.get_stats() @@ -819,7 +825,12 @@ def test_ignore_order_cache_for_individual_distances(self): 'DIFF COUNT': 13, 'DISTANCE CACHE HIT COUNT': 1, 'MAX PASS LIMIT REACHED': False, - 'MAX DIFF LIMIT REACHED': False + 'MAX DIFF LIMIT REACHED': False, + # Phase 4: zeroed worker aggregates always present in get_stats(). + 'WORKER DIFF COUNT': 0, + 'WORKER PASSES COUNT': 0, + 'WORKER DISTANCE CACHE HIT COUNT': 0, + 'WORKER BATCH COUNT': 0, } assert expected_stats == stats diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py index 19ebdc35..94dcf839 100644 --- a/tests/test_multiprocessing.py +++ b/tests/test_multiprocessing.py @@ -17,6 +17,8 @@ compute_distances_parallel, compute_hashes_parallel, compute_subtree_diffs_parallel, + _aggregate_worker_stats, + _extract_worker_stats, ) @@ -116,7 +118,13 @@ def test_empty_jobs_returns_empty_list(self): result = compute_subtree_diffs_parallel( jobs=[], parameters={}, original_type=None, config=cfg, ) - assert result == [] + # Phase 4: orchestrator now returns (entries_by_job, worker_stats). + assert result is not None + entries_by_job, worker_stats = result + assert entries_by_job == [] + assert worker_stats['DIFF COUNT'] == 0 + assert worker_stats['PASSES COUNT'] == 0 + assert worker_stats['MAX DIFF LIMIT REACHED'] is False def test_unpickleable_parameters_returns_none(self): cfg = MPConfig(enabled=True, workers=2, threshold=0) @@ -263,3 +271,126 @@ def test_unpickleable_hasher_falls_back(self): serial = DeepDiff(t1, t2, ignore_order=True, hasher=bad_hasher) parallel = _run_parallel(t1, t2, ignore_order=True, hasher=bad_hasher) assert parallel == serial + + +class TestWorkerStatsUnit: + """Phase 4 unit-level checks for the stats extraction/aggregation helpers.""" + + def test_extract_worker_stats_handles_missing_attribute(self): + class _Bare: + pass + # No ``_stats`` attribute at all — extractor must return zeroed counters + # rather than crash. This shields against the future case where a + # worker's DeepDiff is replaced by a non-DeepDiff stand-in. + delta = _extract_worker_stats(_Bare()) + assert delta['DIFF COUNT'] == 0 + assert delta['PASSES COUNT'] == 0 + assert delta['DISTANCE CACHE HIT COUNT'] == 0 + assert delta['MAX PASS LIMIT REACHED'] is False + assert delta['MAX DIFF LIMIT REACHED'] is False + + def test_aggregate_sums_counters_and_or_merges_flags(self): + deltas = [ + {'DIFF COUNT': 3, 'PASSES COUNT': 1, 'DISTANCE CACHE HIT COUNT': 0, + 'MAX PASS LIMIT REACHED': False, 'MAX DIFF LIMIT REACHED': False}, + {'DIFF COUNT': 7, 'PASSES COUNT': 2, 'DISTANCE CACHE HIT COUNT': 4, + 'MAX PASS LIMIT REACHED': True, 'MAX DIFF LIMIT REACHED': False}, + {}, # empty/missing delta must be tolerated + ] + agg = _aggregate_worker_stats(deltas) + assert agg['DIFF COUNT'] == 10 + assert agg['PASSES COUNT'] == 3 + assert agg['DISTANCE CACHE HIT COUNT'] == 4 + assert agg['MAX PASS LIMIT REACHED'] is True + assert agg['MAX DIFF LIMIT REACHED'] is False + + def test_aggregate_empty_input_returns_zeroed_dict(self): + agg = _aggregate_worker_stats([]) + assert agg == { + 'DIFF COUNT': 0, + 'PASSES COUNT': 0, + 'DISTANCE CACHE HIT COUNT': 0, + 'MAX PASS LIMIT REACHED': False, + 'MAX DIFF LIMIT REACHED': False, + } + + +class TestStatsKeys: + """get_stats() must always expose the new WORKER_* keys, even in serial mode.""" + + def test_serial_run_exposes_worker_keys_zeroed(self): + # No multiprocessing means workers never ran — but the keys must exist + # so downstream consumers that read them unconditionally don't KeyError. + diff = DeepDiff([1, 2, 3], [1, 2, 4], ignore_order=True) + stats = diff.get_stats() + assert stats['WORKER DIFF COUNT'] == 0 + assert stats['WORKER PASSES COUNT'] == 0 + assert stats['WORKER DISTANCE CACHE HIT COUNT'] == 0 + assert stats['WORKER BATCH COUNT'] == 0 + + def test_existing_stats_keys_still_present(self): + # Phase 4 must not regress the keys Phase 1 / pre-MP code relies on. + diff = DeepDiff([1, 2, 3], [1, 2, 4], ignore_order=True) + stats = diff.get_stats() + for key in ('PASSES COUNT', 'DIFF COUNT', 'DISTANCE CACHE HIT COUNT', + 'MAX PASS LIMIT REACHED', 'MAX DIFF LIMIT REACHED'): + assert key in stats + + +@pytest.mark.slow +class TestWorkerStatsAggregationSlow: + """End-to-end checks: workers must contribute to the WORKER_* aggregates.""" + + def test_paired_subtree_run_aggregates_worker_stats(self): + # Force the subtree-parallel path: lots of paired-item diffs, threshold + # 0 so we don't fall through to serial. ``cutoff_intersection_for_pairs=1`` + # is required — the default cutoff disables pair selection when most + # items differ, which is exactly our setup, so without it the subtree + # queue stays empty and no batch is dispatched. + t1 = [{"id": i, "data": {"x": i, "y": [i, i + 1]}} for i in range(20)] + t2 = [{"id": i, "data": {"x": i, "y": [i, i + 2]}} for i in range(20)] + diff = _run_parallel(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1) + stats = diff.get_stats() + assert stats['WORKER BATCH COUNT'] >= 1, ( + "expected at least one parallel batch to have run; got stats=%r" % stats + ) + assert stats['WORKER DIFF COUNT'] > 0, ( + "workers must have done diffs; got %r" % stats + ) + + def test_distance_loop_aggregates_worker_stats(self): + # Many added/removed candidates with distinct shapes — drives the + # distance-loop parallel path even when subtree pairing rejects most + # pairs. Also leans on threshold=0 to guarantee we go through the pool. + t1 = [{"id": i, "v": [i, i, i]} for i in range(80)] + t2 = [{"id": i + 1000, "v": [i, i, i + 1]} for i in range(80)] + diff = _run_parallel(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1) + stats = diff.get_stats() + # Either the distance batch or the subtree batch must have shipped to + # workers; both feed _merge_worker_stats so the batch counter is the + # cleanest evidence that aggregation actually fired. + assert stats['WORKER BATCH COUNT'] >= 1 + + def test_aggregation_does_not_corrupt_parent_counters(self): + # Phase 4 must not double-count: parent DIFF COUNT must remain in the + # same ballpark as a serial run, even when workers add their own. + t1 = [{"id": i, "v": i} for i in range(20)] + t2 = [{"id": i, "v": i + (1 if i == 5 else 0)} for i in range(20)] + serial = DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1) + parallel = _run_parallel(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1) + # Result must still match. + assert parallel == serial + # Parent DIFF COUNT may differ slightly because pair-selection traversal + # avoids some inline _diff calls when distances are precomputed in + # workers, but the order of magnitude must still be reasonable — + # specifically, parent count alone must not silently include worker work. + s_parent = serial.get_stats()['DIFF COUNT'] + p_parent = parallel.get_stats()['DIFF COUNT'] + # Parent-only count in a parallel run is <= serial count: the pairs + # whose distance was computed in a worker are subtracted from the + # parent's inline path. This invariant breaks if we accidentally also + # added worker counts back into DIFF COUNT. + assert p_parent <= s_parent, ( + "parent DIFF COUNT %d exceeds serial %d — looks like worker " + "counts are leaking into the parent counter" % (p_parent, s_parent) + ) From e829c618a7253a24c92872c501f2ad053efd600e Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 27 Apr 2026 19:49:07 -0700 Subject: [PATCH 15/23] =?UTF-8?q?Phase=205=20=E2=80=94=20Subticket=20#6=20?= =?UTF-8?q?(extended=20determinism=20matrix)=20=20=20-=20Added=2021=20test?= =?UTF-8?q?s=20to=20tests/test=5Fmultiprocessing.py=20across=205=20new=20c?= =?UTF-8?q?lasses=20covering:=20=20=20report=5Frepetition=3DFalse,=20sets/?= =?UTF-8?q?frozensets,=20pickleable=20custom=20hasher,=20ignore=5Fstring?= =?UTF-8?q?=5Fcase=20/=20=20=20ignore=5Fnumeric=5Ftype=5Fchanges=20/=20ign?= =?UTF-8?q?ore=5Fstring=5Ftype=5Fchanges,=20include=5Fpaths,=20exclude=5Fr?= =?UTF-8?q?egex=5Fpaths,=20=20=20namedtuple/=5F=5Fslots=5F=5F/=5F=5Fdict?= =?UTF-8?q?=5F=5F=20objects,=20group=5Fby,=20generators,=20numpy=20(import?= =?UTF-8?q?orskip),=20pydantic=20=20=20(importorskip),=20verbose=5Flevel?= =?UTF-8?q?=3D2,=20to=5Fdict()=20equality,=20closure=20iterable=5Fcompare?= =?UTF-8?q?=5Ffunc,=20and=20worker=20=20=20exception=20propagation=20via?= =?UTF-8?q?=20an=20=5F=5Freduce=5F=5F=20that=20survives=20pickle.dumps=20b?= =?UTF-8?q?ut=20raises=20on=20unpickle.=20=20=20-=20All=2056=20tests=20in?= =?UTF-8?q?=20test=5Fmultiprocessing.py=20pass;=20full=20suite=20(1126=20t?= =?UTF-8?q?ests=20+=2010=20skips)=20still=20green.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 6 — Subticket #7 (benchmarks) - Added benchmarks/multiprocessing_bench.py — three ignore_order=True workloads (paired_subtree, distance_loop, large_nested_dicts), --workers/--scale/--quick/--only flags, asserts parallel == serial on every row, non-zero exit on divergence. - Verified locally: paired_subtree at scale=400 gets ~1.3× with 2 workers; quick scales show spawn overhead dominating (which is exactly why DEFAULT_THRESHOLD = 64 exists). Doc: docs/multi_processing.md updated with Phase 5 and Phase 6 status sections, code locations, and a tightened "Not yet implemented" entry that now only flags the _prep_iterable/_prep_dict deeper recursion, the _diff_dict/ordered-pair extension of #4, and threshold tuning. --- benchmarks/__init__.py | 0 benchmarks/multiprocessing_bench.py | 245 ++++++++++++++++++++++ docs/multi_processing.md | 61 +++++- tests/test_multiprocessing.py | 306 ++++++++++++++++++++++++++++ 4 files changed, 605 insertions(+), 7 deletions(-) create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/multiprocessing_bench.py diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/multiprocessing_bench.py b/benchmarks/multiprocessing_bench.py new file mode 100644 index 00000000..b9801c4d --- /dev/null +++ b/benchmarks/multiprocessing_bench.py @@ -0,0 +1,245 @@ +"""Benchmarks for the internal multiprocessing mode (Subticket #7). + +Goal: provide a reproducible "is multiprocessing actually faster?" check for +the workloads multi_processing.md flags as the primary targets — the +``ignore_order=True`` distance loop, paired-subtree diffs, and large lists of +nested dicts. Each workload runs serial first, then parallel at a few worker +counts; we print a single results table. + +Usage:: + + source ~/.venvs/deep/bin/activate + python -m benchmarks.multiprocessing_bench + + # Smaller, faster sweep: + python -m benchmarks.multiprocessing_bench --quick + + # Just one workload: + python -m benchmarks.multiprocessing_bench --only paired_subtree + +The script also asserts that the parallel result equals the serial result for +every workload — a benchmark that produces wrong answers is worse than no +benchmark at all. If any pair diverges the script exits non-zero. + +The numbers here are not committed; they're meant to inform threshold tuning +(see DEFAULT_THRESHOLD in deepdiff/_multiprocessing.py) and to expose +regressions when the hot path changes. Re-run on your hardware before drawing +conclusions — process spawn overhead and IPC pickle cost vary wildly across +machines. +""" + +import argparse +import os +import sys +import time +from typing import Any, Callable, Dict, List, Tuple + +# Make the package importable when the script is run from a checkout. +HERE = os.path.dirname(os.path.abspath(__file__)) +ROOT = os.path.dirname(HERE) +if ROOT not in sys.path: + sys.path.insert(0, ROOT) + +from deepdiff import DeepDiff # noqa: E402 + + +# --------------------------------------------------------------------------- +# Workloads. +# +# Each builder returns ``(t1, t2, kwargs)`` where ``kwargs`` is the DeepDiff +# constructor arguments common to both the serial and parallel runs. +# Multiprocessing parameters are added by the runner; workloads should not set +# them. +# --------------------------------------------------------------------------- + + +def workload_paired_subtree(scale: int) -> Tuple[Any, Any, Dict[str, Any]]: + """Heavy paired-subtree diff path. + + Each item is a small dict whose nested ``data`` differs by one element; + pairing kicks in for every item, so the subtree-parallel path runs. + """ + n = scale + t1 = [{"id": i, "data": {"x": i, "y": [i, i + 1, i + 2]}} for i in range(n)] + t2 = [{"id": i, "data": {"x": i, "y": [i, i + 1, i + 3]}} for i in range(n)] + return t1, t2, {"ignore_order": True, "cutoff_intersection_for_pairs": 1} + + +def workload_distance_loop(scale: int) -> Tuple[Any, Any, Dict[str, Any]]: + """Heavy added-vs-removed distance grid. + + All ids are disjoint between t1 and t2, so every t2 item is "added" and + every t1 item is "removed". The candidate distance grid is N*N, which is + where the distance worker pool earns its keep. + """ + n = scale + t1 = [{"id": i, "v": [i, i, i]} for i in range(n)] + t2 = [{"id": i + 10_000, "v": [i, i, i + 1]} for i in range(n)] + return t1, t2, {"ignore_order": True, "cutoff_intersection_for_pairs": 1} + + +def workload_large_nested_dicts(scale: int) -> Tuple[Any, Any, Dict[str, Any]]: + """Large list of moderately-deep dicts with one mutation each. + + The shape mirrors the JSON-like blobs the doc calls out: each item is + several layers deep with a mix of strings, ints, and nested lists. + """ + n = scale + + def make(i: int, mutate: int) -> Dict[str, Any]: + return { + "id": i, + "name": "name-%d" % i, + "tags": ["t%d" % (i + j) for j in range(5)], + "details": { + "score": i + mutate, + "history": [{"step": j, "value": j * 2 + mutate} for j in range(4)], + "meta": {"created_at": "2024-01-%02d" % ((i % 28) + 1), + "owner": "user-%d" % (i % 17)}, + }, + } + + t1 = [make(i, 0) for i in range(n)] + t2 = [make(i, 1 if i % 7 == 0 else 0) for i in range(n)] + return t1, t2, {"ignore_order": True, "cutoff_intersection_for_pairs": 1} + + +WORKLOADS: Dict[str, Callable[[int], Tuple[Any, Any, Dict[str, Any]]]] = { + "paired_subtree": workload_paired_subtree, + "distance_loop": workload_distance_loop, + "large_nested_dicts": workload_large_nested_dicts, +} + + +# --------------------------------------------------------------------------- +# Runner. +# --------------------------------------------------------------------------- + + +def _time(fn: Callable[[], Any]) -> Tuple[float, Any]: + start = time.perf_counter() + result = fn() + return time.perf_counter() - start, result + + +def run_one(name: str, scale: int, worker_counts: List[int]) -> List[Dict[str, Any]]: + """Run one workload serial + parallel and return one row per worker count. + + The serial result is computed once and reused as the correctness reference + for every parallel run. + """ + t1, t2, kwargs = WORKLOADS[name](scale) + print(f"\n=== {name} (scale={scale}) ===") + print(f"input shape: t1 has {len(t1)} items, t2 has {len(t2)} items") + + serial_time, serial_result = _time(lambda: DeepDiff(t1, t2, **kwargs)) + print(f"serial: {serial_time:.3f}s") + + rows: List[Dict[str, Any]] = [{ + "workload": name, "scale": scale, + "mode": "serial", "workers": 1, + "time_s": serial_time, "speedup": 1.0, + "ok": True, + }] + + for workers in worker_counts: + parallel_time, parallel_result = _time(lambda: DeepDiff( + t1, t2, + multiprocessing=True, + multiprocessing_workers=workers, + multiprocessing_threshold=0, + **kwargs, + )) + ok = parallel_result == serial_result + speedup = serial_time / parallel_time if parallel_time > 0 else float("inf") + marker = "" if ok else " !! RESULT MISMATCH !!" + print(f"parallel(workers={workers}): {parallel_time:.3f}s " + f"speedup={speedup:.2f}x{marker}") + rows.append({ + "workload": name, "scale": scale, + "mode": "parallel", "workers": workers, + "time_s": parallel_time, "speedup": speedup, + "ok": ok, + }) + return rows + + +def print_table(rows: List[Dict[str, Any]]) -> None: + """Compact summary table at the end of the run.""" + print("\n=== summary ===") + header = ("workload", "scale", "mode", "workers", "time_s", "speedup", "ok") + print("%-22s %6s %-9s %7s %10s %9s %4s" % header) + print("-" * 72) + for r in rows: + print("%-22s %6d %-9s %7d %10.3f %9.2f %4s" % ( + r["workload"], r["scale"], r["mode"], + r["workers"], r["time_s"], r["speedup"], + "yes" if r["ok"] else "NO", + )) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + "--only", choices=list(WORKLOADS), action="append", default=None, + help="run only the named workload(s); may be repeated. Default: all.", + ) + parser.add_argument( + "--workers", type=int, action="append", default=None, + help="explicit worker count to test; may be repeated. " + "Default: 2 and min(4, cpu_count).", + ) + parser.add_argument( + "--scale", type=int, default=None, + help="override per-workload scale (number of items). Larger = more " + "wall time. Default: a per-workload value below.", + ) + parser.add_argument( + "--quick", action="store_true", + help="use small scales for a fast sanity-check run.", + ) + args = parser.parse_args() + + workloads = args.only or list(WORKLOADS) + cpu = os.cpu_count() or 1 + workers_list = args.workers or [2, min(4, cpu)] + # Deduplicate while preserving order — repeated --workers flags shouldn't + # cause duplicate rows. + workers_list = list(dict.fromkeys(workers_list)) + + # Default scales tuned so each row takes a few seconds serially. Override + # via --scale or --quick. These are starting points, not gospel. + default_scales = { + "paired_subtree": 200, + "distance_loop": 120, + "large_nested_dicts": 200, + } + quick_scales = { + "paired_subtree": 60, + "distance_loop": 40, + "large_nested_dicts": 60, + } + scales = quick_scales if args.quick else default_scales + if args.scale is not None: + scales = {name: args.scale for name in workloads} + + print("DeepDiff multiprocessing benchmark") + print(f"cpu_count={cpu} workers tested={workers_list}") + + all_rows: List[Dict[str, Any]] = [] + for name in workloads: + all_rows.extend(run_one(name, scales[name], workers_list)) + + print_table(all_rows) + + # Non-zero exit if any parallel run produced a different result than its + # serial reference — that's the one regression mode this script must catch. + if any(not r["ok"] for r in all_rows): + print("\nFAIL: at least one parallel run did not match its serial reference.") + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/multi_processing.md b/docs/multi_processing.md index 02b7ca6e..3ac66f22 100644 --- a/docs/multi_processing.md +++ b/docs/multi_processing.md @@ -54,7 +54,37 @@ exact serial-equivalent counts (which the doc explicitly does not require). so consumers can read them unconditionally — they just stay zero when multiprocessing is off or below threshold. -Subtickets #6 (extended matrix) and #7 (benchmarks) are still open. +**Phase 5 — landed (2026-04-27).** Subticket #6 (extended determinism +matrix) is implemented. `tests/test_multiprocessing.py` now includes +`TestDeterminismMatrixSlow` (15 cases — `report_repetition=False`, +sets/frozensets, pickleable custom hasher, `ignore_string_case`, +`ignore_numeric_type_changes`, `ignore_string_type_changes`, `include_paths`, +`exclude_regex_paths`, namedtuple, `__slots__`, `__dict__`-based objects, +`group_by`, generator inputs, `verbose_level=2`, and a `to_dict()`-equality +guard), `TestDeterminismNumpySlow` (numpy arrays inside dicts; uses +`pytest.importorskip` so it skips when numpy is absent), +`TestDeterminismPydanticSlow` (pydantic `BaseModel` items in a list; skipped +when pydantic isn't installed), `TestPickleFailureFallbackSlow` (closure +`iterable_compare_func`), and `TestWorkerExceptionPropagationSlow` (uses an +`__reduce__` payload that survives `pickle.dumps` but raises on unpickle — +proves the helper does not silently swallow non-pickle worker failures). +All cases assert `parallel == serial`. + +**Phase 6 — landed (2026-04-27).** Subticket #7 (benchmarks) is implemented. +`benchmarks/multiprocessing_bench.py` is a standalone script that runs three +representative `ignore_order=True` workloads — `paired_subtree`, +`distance_loop`, and `large_nested_dicts` — at a configurable scale, prints +serial baseline + parallel-at-N-workers timings, and asserts the parallel +result equals the serial result for every row. Non-zero exit on result +divergence so it can gate CI later. Defaults are tuned so each row takes a +few seconds; `--quick` shrinks scales for a fast smoke test, `--scale N` +pins one explicit size, and `--workers N` (repeatable) lets you sweep +worker counts. Verified against 3.14 CPython on an 8-core box: at the quick +scales spawn overhead dominates (parallel slower, as expected — this is +exactly what `DEFAULT_THRESHOLD = 64` is designed to avoid), and at +`paired_subtree` scale=400 the 2-worker run beats serial ~1.3×. The doc's +warning still stands — `multiprocessing=False` remains the default until a +clear cross-platform speedup curve justifies otherwise. What works today: @@ -91,6 +121,12 @@ What works today: `tests/test_ignore_order.py` were updated to include the four new zeroed keys in their `expected_stats` dicts; all of them continue to pass with unchanged primary counter values. +- Phase 5 adds 21 determinism / fallback / propagation tests in + `tests/test_multiprocessing.py` covering the public-API matrix listed in + Subticket #6, plus the new worker-exception-propagation harness. +- Phase 6 adds the `benchmarks/multiprocessing_bench.py` runner — three + workloads, configurable scale and worker counts, result-equality assertion, + non-zero exit on divergence. Code locations: @@ -137,6 +173,18 @@ Code locations: `WORKER_DISTANCE_CACHE_HIT_COUNT`, `WORKER_BATCH_COUNT`) plus initialization in `__init__` so the keys are always present in `get_stats()`. +- `tests/test_multiprocessing.py` — Phase 5 classes + (`TestDeterminismMatrixSlow`, `TestDeterminismNumpySlow`, + `TestDeterminismPydanticSlow`, `TestPickleFailureFallbackSlow`, + `TestWorkerExceptionPropagationSlow`) and the supporting module-level + helpers (`_SlotPoint`, `_DictBag`, `_NamedPoint`, `_hex_hasher`, + `_ExplodingItem` / `_explode_on_unpickle`). +- `benchmarks/multiprocessing_bench.py` — Phase 6 runner. Three + representative `ignore_order=True` workloads + (`workload_paired_subtree`, `workload_distance_loop`, + `workload_large_nested_dicts`), `--workers` / `--scale` / `--quick` + CLI flags, and a `print_table` summary. Run with + `python -m benchmarks.multiprocessing_bench` from the repo root. Not yet implemented (deferred, intentional): @@ -153,12 +201,11 @@ Not yet implemented (deferred, intentional): the current tests don't cover. Worker-side `_iterable_opcodes` are also not propagated, so `DELTA_VIEW` of a paired subtree containing ordered iterables is not yet covered by Phase 3. -- **Subticket #6** — extended test matrix (numpy, pydantic, namedtuple, group_by, - large-mixed structures, worker exception propagation tests). Phase 1 ships - the core determinism harness; the rest is additive. -- **Subticket #7** — benchmarks. The doc says default thresholds shouldn't - change before benchmarks land; the current `DEFAULT_THRESHOLD = 64` is a - conservative placeholder. +- **Threshold tuning** — `DEFAULT_THRESHOLD = 64` remains a conservative + placeholder. The Phase 6 benchmark gives us a tool to revisit this; on the + quick-scale runs spawn overhead still dominates so the threshold is + intentionally above where small workloads land. Tuning should happen on + representative production workloads, not on the benchmark fixtures. --- diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py index 94dcf839..75a2a709 100644 --- a/tests/test_multiprocessing.py +++ b/tests/test_multiprocessing.py @@ -394,3 +394,309 @@ def test_aggregation_does_not_corrupt_parent_counters(self): "parent DIFF COUNT %d exceeds serial %d — looks like worker " "counts are leaking into the parent counter" % (p_parent, s_parent) ) + + +# --------------------------------------------------------------------------- +# Phase 5 — extended determinism matrix (Subticket #6). +# +# Every test below pins the parallel result against the serial result for one +# axis of the public API. The point isn't to re-test that DeepDiff handles +# these features (other test files do that); it's to prove that turning +# multiprocessing on is a no-op for output across the supported surface. +# +# These are marked ``@pytest.mark.slow`` because each one pays a pool-spawn +# tax and they would dominate the default test run. Running ``pytest --runslow`` +# exercises the full matrix. +# --------------------------------------------------------------------------- + + +# Module-level — pickleable under spawn. +class _SlotPoint: + __slots__ = ("x", "y") + + def __init__(self, x, y): + self.x = x + self.y = y + + def __eq__(self, other): + return isinstance(other, _SlotPoint) and self.x == other.x and self.y == other.y + + def __hash__(self): + return hash((self.x, self.y)) + + def __repr__(self): + return "_SlotPoint(x=%r, y=%r)" % (self.x, self.y) + + +class _DictBag: + """Plain class with __dict__ — exercises object-with-attrs hashing/diffing.""" + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __eq__(self, other): + return isinstance(other, _DictBag) and self.__dict__ == other.__dict__ + + +from collections import namedtuple # noqa: E402 + +_NamedPoint = namedtuple("_NamedPoint", ["x", "y"]) + + +def _hex_hasher(obj, *args, **kwargs): + """Module-level pickleable custom hasher used to verify the full path.""" + import hashlib + return hashlib.md5(repr(obj).encode("utf-8")).hexdigest() + + +@pytest.mark.slow +class TestDeterminismMatrixSlow: + """Per-feature determinism: parallel output must equal serial output.""" + + def test_report_repetition_false(self): + t1 = [1, 1, 1, 2, 3, 3, 4, 4] + t2 = [3, 1, 2, 2, 4, 4, 5, 5] + _assert_parallel_matches_serial(t1, t2, report_repetition=False) + + def test_sets_of_dicts_inside_list(self): + # Frozensets-of-tuples inside a list — set membership is order-free, + # but DeepDiff still has to hash and pair the containing dicts. + t1 = [{"id": i, "tags": frozenset({("k", i), ("k", i + 1)})} for i in range(10)] + t2 = [{"id": i, "tags": frozenset({("k", i), ("k", i + 2)})} for i in range(10)] + _assert_parallel_matches_serial(t1, t2) + + def test_top_level_set(self): + t1 = {("a", 1), ("b", 2), ("c", 3), ("d", 4), ("e", 5)} + t2 = {("a", 1), ("b", 2), ("c", 3), ("d", 99), ("f", 6)} + _assert_parallel_matches_serial(t1, t2) + + def test_custom_hasher_pickleable(self): + # Pickleable hasher should travel to workers cleanly (no fallback). + t1 = [{"id": i, "v": i} for i in range(8)] + t2 = [{"id": i, "v": i + (1 if i == 4 else 0)} for i in range(8)] + _assert_parallel_matches_serial(t1, t2, hasher=_hex_hasher) + + def test_ignore_string_case(self): + t1 = [{"name": "Alice"}, {"name": "Bob"}, {"name": "Carol"}] + t2 = [{"name": "alice"}, {"name": "bob"}, {"name": "DAVE"}] + _assert_parallel_matches_serial(t1, t2, ignore_string_case=True) + + def test_ignore_numeric_type_changes(self): + t1 = [{"v": 1}, {"v": 2}, {"v": 3}] + t2 = [{"v": 1.0}, {"v": 2.0}, {"v": 4.0}] + _assert_parallel_matches_serial(t1, t2, ignore_numeric_type_changes=True) + + def test_ignore_string_type_changes(self): + t1 = [{"v": "x"}, {"v": "y"}, {"v": "z"}] + t2 = [{"v": b"x"}, {"v": b"y"}, {"v": b"q"}] + _assert_parallel_matches_serial(t1, t2, ignore_string_type_changes=True) + + def test_include_paths(self): + # ``include_paths`` is path-based, so the parent-side _skip_this re-filter + # in _dispatch_subtree_jobs has to handle it the same way it handles + # exclude_paths. + t1 = [{"id": i, "keep": i, "drop": i * 100} for i in range(8)] + t2 = [{"id": i, "keep": i + (1 if i == 3 else 0), "drop": i * 999} for i in range(8)] + _assert_parallel_matches_serial(t1, t2, include_paths="root[0]['keep']") + + def test_exclude_regex_paths(self): + import re + t1 = [{"id": i, "v": i, "_internal_a": i, "_internal_b": i * 2} for i in range(8)] + t2 = [{"id": i, "v": i + (1 if i == 4 else 0), + "_internal_a": i * 999, "_internal_b": i * 999} for i in range(8)] + _assert_parallel_matches_serial( + t1, t2, exclude_regex_paths=[re.compile(r"_internal_\w+")], + ) + + def test_namedtuple_items(self): + t1 = [_NamedPoint(x=i, y=i + 1) for i in range(10)] + t2 = [_NamedPoint(x=i, y=i + 2) for i in range(10)] + _assert_parallel_matches_serial(t1, t2) + + def test_slots_objects(self): + t1 = [_SlotPoint(x=i, y=i + 1) for i in range(10)] + t2 = [_SlotPoint(x=i, y=i + 2) for i in range(10)] + _assert_parallel_matches_serial(t1, t2) + + def test_dunder_dict_objects(self): + t1 = [_DictBag(id=i, v=i) for i in range(10)] + t2 = [_DictBag(id=i, v=i + (1 if i == 5 else 0)) for i in range(10)] + _assert_parallel_matches_serial(t1, t2) + + def test_group_by_serial_fallback(self): + # ``group_by`` reshapes input dicts into keyed dicts before diffing, + # which currently runs without ignore_order; the parallel path is not + # engaged. This test pins the no-regression invariant: turning mp on + # for a group_by run must still produce the same output. + t1 = [{"id": "a", "v": 1}, {"id": "b", "v": 2}, {"id": "c", "v": 3}] + t2 = [{"id": "a", "v": 1}, {"id": "b", "v": 99}, {"id": "c", "v": 3}] + serial = DeepDiff(t1, t2, group_by="id") + parallel = DeepDiff( + t1, t2, group_by="id", + multiprocessing=True, multiprocessing_workers=4, + multiprocessing_threshold=0, + ) + assert parallel == serial + + def test_generator_input_falls_back(self): + # Generators are flagged in the doc as unsupported (they may be + # consumed or pickled differently). DeepDiff materializes them in the + # parent before the parallel section, so the result must still match + # the serial run. + def gen1(): + for x in [{"id": i, "v": i} for i in range(8)]: + yield x + + def gen2(): + for x in [{"id": i, "v": i + (1 if i == 3 else 0)} for i in range(8)]: + yield x + + serial = DeepDiff(list(gen1()), list(gen2()), ignore_order=True, + cutoff_intersection_for_pairs=1) + parallel = _run_parallel(list(gen1()), list(gen2()), + cutoff_intersection_for_pairs=1) + assert parallel == serial + + def test_verbose_level_2(self): + t1 = [{"id": i, "v": i} for i in range(10)] + t2 = [{"id": i, "v": i + (1 if i == 5 else 0)} for i in range(10)] + _assert_parallel_matches_serial(t1, t2, verbose_level=2) + + def test_text_view_to_dict_matches(self): + # Compare the public dict view directly — guards against any drift + # between the tree representation and its TextResult projection. + t1 = [{"id": i, "v": i} for i in range(8)] + t2 = [{"id": i, "v": i + (1 if i == 3 else 0)} for i in range(8)] + serial = DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1) + parallel = _run_parallel(t1, t2, cutoff_intersection_for_pairs=1) + assert dict(parallel) == dict(serial) + + +@pytest.mark.slow +class TestDeterminismNumpySlow: + """Numpy-specific determinism cases. Skipped if numpy isn't available.""" + + def test_numpy_array_in_dict(self): + np = pytest.importorskip("numpy") + t1 = [{"id": i, "v": np.array([i, i + 1, i + 2])} for i in range(8)] + t2 = [{"id": i, "v": np.array([i, i + 1, i + 3])} for i in range(8)] + _assert_parallel_matches_serial(t1, t2) + + +# Pydantic test class must be module-level so spawn can find and unpickle it. +try: + import pydantic as _pydantic_mod # noqa: F401 + + class _PydanticItem(_pydantic_mod.BaseModel): + id: int + v: int + +except Exception: # pragma: no cover — pydantic not installed + _PydanticItem = None # type: ignore[assignment] + + +@pytest.mark.slow +class TestDeterminismPydanticSlow: + """Pydantic-specific determinism. Skipped if pydantic isn't available.""" + + def test_pydantic_models_in_list(self): + if _PydanticItem is None: + pytest.skip("pydantic not installed") + t1 = [_PydanticItem(id=i, v=i) for i in range(8)] + t2 = [_PydanticItem(id=i, v=i + (1 if i == 3 else 0)) for i in range(8)] + _assert_parallel_matches_serial(t1, t2) + + +@pytest.mark.slow +class TestPickleFailureFallbackSlow: + """Inputs that can't be pickled must fall back to serial without crashing.""" + + def test_unpickleable_iterable_compare_func_falls_back(self): + # iterable_compare_func is checked up front in compute_distances_parallel + # — a closure cannot pickle, so the helper returns None and the parent + # runs serially. + local_state = {"calls": 0} + + def closure_compare(x, y, level=None): + local_state["calls"] += 1 + return False + + t1 = [{"id": i, "v": i} for i in range(8)] + t2 = [{"id": i, "v": i + (1 if i == 4 else 0)} for i in range(8)] + # iterable_compare_func is only consulted when ignore_order is OFF + # (it's the ordered-pairing helper), so the parallel path doesn't run + # — the test still pins "mp=True doesn't break this combo." + serial = DeepDiff(t1, t2, iterable_compare_func=closure_compare) + parallel = DeepDiff( + t1, t2, iterable_compare_func=closure_compare, + multiprocessing=True, multiprocessing_workers=4, + multiprocessing_threshold=0, + ) + assert parallel == serial + + +def _explode_on_unpickle(): + """Raised when the worker unpickles ``_ExplodingItem``.""" + raise RuntimeError("worker explosion: _ExplodingItem cannot be reconstructed") + + +class _ExplodingItem: + """Pickleable on the parent, but unpickling in the worker raises. + + This is exactly the pattern that ``is_pickleable`` (which only calls + ``pickle.dumps``) cannot detect — and what the determinism contract says + must propagate as a normal exception, not a silent fallback. + """ + + def __reduce__(self): + return (_explode_on_unpickle, ()) + + +@pytest.mark.slow +class TestWorkerExceptionPropagationSlow: + """Worker exceptions outside the pickle-fallback set must propagate. + + The catch list in ``compute_*_parallel`` is intentionally narrow: + ``(pickle.PicklingError, AttributeError, TypeError)`` — Python raises those + *during the pickle round-trip*. Anything else (RuntimeError, ValueError) + that escapes the worker logic itself must bubble through ``future.result()`` + and out of the helper, not be silently converted to a ``None`` fallback. + """ + + def test_runtime_error_in_worker_propagates(self): + # ``_ExplodingItem`` survives ``pickle.dumps`` but its ``__reduce__`` + # tells the unpickler to call ``_explode_on_unpickle()``, which raises + # ``RuntimeError`` inside the worker process. The helper's catch list + # is ``(PicklingError, AttributeError, TypeError)``; an unpickle-time + # ``RuntimeError`` is outside that set, so it must propagate up rather + # than be silently turned into a ``None`` fallback. In practice the + # ProcessPoolExecutor surfaces this as ``BrokenProcessPool`` (the + # worker dies before it can return a result) — either form proves the + # contract: the failure is loud, not silent. + cfg = MPConfig(enabled=True, workers=2, threshold=0) + with pytest.raises(Exception) as exc_info: + compute_subtree_diffs_parallel( + jobs=[(_ExplodingItem(), _ExplodingItem())], + parameters={"foo": "bar"}, + original_type=None, + config=cfg, + ) + # Sanity-check we got a "loud" failure, not the silent fallback path + # (which would have returned ``None`` and never raised). + assert exc_info.value is not None + + def test_distance_worker_runtime_error_propagates(self): + # Same exploding-item trick on the distance helper. Same contract: + # an exception escapes the helper rather than being silenced. + cfg = MPConfig(enabled=True, workers=2, threshold=0) + with pytest.raises(Exception) as exc_info: + compute_distances_parallel( + jobs=[("h_added", "h_removed", _ExplodingItem(), _ExplodingItem())], + parameters={"foo": "bar"}, + original_type=None, + iterable_compare_func=None, + config=cfg, + ) + assert exc_info.value is not None + From d4e63421dc58b7471b29718ffb57f5d74355215a Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 4 May 2026 15:26:36 -0700 Subject: [PATCH 16/23] changing the link to survey --- README.md | 2 +- deepdiff/docstrings/diff_doc.rst | 2 +- deepdiff/docstrings/faq.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0836397e..bb98b0b5 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Please take a look at the [CHANGELOG](CHANGELOG.md) file. # Survey -:mega: **Please fill out our [fast 5-question survey](https://forms.gle/E6qXexcgjoKnSzjB8)** so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! :dancers: +:mega: **Please fill out our [fast 10-question survey](https://tally.so/r/J98MPY)** so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! :dancers: # Local dev diff --git a/deepdiff/docstrings/diff_doc.rst b/deepdiff/docstrings/diff_doc.rst index 9a6accc9..03580b1e 100644 --- a/deepdiff/docstrings/diff_doc.rst +++ b/deepdiff/docstrings/diff_doc.rst @@ -228,4 +228,4 @@ view: string, default = text int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! .. Note:: - |:mega:| **Please fill out our** `fast 5-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| + |:mega:| **Please fill out our** `fast 10-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| diff --git a/deepdiff/docstrings/faq.rst b/deepdiff/docstrings/faq.rst index 19fc7ad0..1afae4fd 100644 --- a/deepdiff/docstrings/faq.rst +++ b/deepdiff/docstrings/faq.rst @@ -12,7 +12,7 @@ F.A.Q *If you're building workflows around data validation and correction,* `Qluster `__ *gives your team a structured way to manage rules, review failures, approve fixes, and reuse decisions—without building the entire system from scratch.* .. Note:: - |:mega:| **Please fill out our** `fast 5-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| + |:mega:| **Please fill out our** `fast 10-question survey `__ so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! |:dancers:| Q: DeepDiff report is not precise when ignore_order=True From 4403424bd98f65128675a7d37d134999b499a92f Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 15 May 2026 10:24:09 -0700 Subject: [PATCH 17/23] fixed the broken test --- tests/test_serialization.py | 2 +- uv.lock | 123 +++++++++++++++++++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index cb6dd8a3..d2b720b7 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -542,7 +542,7 @@ def sig_to_bytes(inp: Dict[str, Union[str, bytes]]): (4, Decimal(2017.1), None), (5, {1, 2, 10}, set), (6, datetime.datetime(2023, 10, 11), datetime.datetime.fromisoformat), - (7, datetime.datetime.now(datetime.UTC), datetime.datetime.fromisoformat), + (7, datetime.datetime.now(datetime.timezone.utc), datetime.datetime.fromisoformat), (8, field_stats1, lambda x: SomeStats(**x)), (9, np.array([[ 101, 3533, 1998, 4532, 2024, 3415, 1012, 102]]), np.array), (10, memoryview(b"hello"), lambda x: memoryview(x.encode('utf-8'))), diff --git a/uv.lock b/uv.lock index fcf6a9b5..81a327c3 100644 --- a/uv.lock +++ b/uv.lock @@ -102,6 +102,125 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/e3/fa60c47d7c344533142eb3af0b73234ef8ea3fb2da742ab976b947e717df/bump2version-1.0.1-py2.py3-none-any.whl", hash = "sha256:37f927ea17cde7ae2d7baf832f8e80ce3777624554a653006c9144f8017fe410", size = 22030, upload-time = "2020-10-07T18:38:38.148Z" }, ] +[[package]] +name = "cachebox" +version = "5.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/f6/85f176d2518cf1d1be5f981fc2dadf6b131e33fefd721f36b330e3434d6c/cachebox-5.2.3.tar.gz", hash = "sha256:b1f68246685aa739bbbd2734befb1465363a1e1042407c154feadb065f17a099", size = 63686, upload-time = "2026-04-10T12:21:35.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/9e/88193fcb7a2a43fe8ed9d9888374d43fa5c7176aa802651e68b28f1aee4a/cachebox-5.2.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:c2c89720547271d36e10cad2c7302bbe11f46eb39eead0a2c321c2d371b8f8b6", size = 374393, upload-time = "2026-04-10T12:20:20.424Z" }, + { url = "https://files.pythonhosted.org/packages/98/8d/e0b13d9bfd43f295cce7824ebaac1970f818a7027c16f290de404934cafe/cachebox-5.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e7f33d24e90dc8aa26762e25898c91a1223b66685420a28a3628fa2e006924f5", size = 356318, upload-time = "2026-04-10T12:20:07.518Z" }, + { url = "https://files.pythonhosted.org/packages/bc/02/8ae1b63dbdebb2ebf600523f48b54e9bfb10db5a28551c3432346f49e1dd/cachebox-5.2.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56cb03ec6289a2ac5daf7422d755683324f02d821bfa796087100df2a7ebd5de", size = 395782, upload-time = "2026-04-10T12:18:50.054Z" }, + { url = "https://files.pythonhosted.org/packages/e4/2f/79a8a0057f354581c25a1a00ddabbd5db4b8631d192670d7a0cc4271dbb7/cachebox-5.2.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a71a71df463ba4c86bc843fa01c3a2a721033adefad888af28c6b65e1915a75c", size = 353194, upload-time = "2026-04-10T12:19:03.083Z" }, + { url = "https://files.pythonhosted.org/packages/3b/57/a1fead35cf481432bd87def0653cd4a069b1ea5847589255795e49ae74b8/cachebox-5.2.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbe4655371d19fc9f4f5874312bcb6e5b5b6182989979ac33d93c34c8d10c012", size = 371090, upload-time = "2026-04-10T12:19:16.019Z" }, + { url = "https://files.pythonhosted.org/packages/8c/58/53f1fab8bcc3238fd6c533ef3ab146097986a8acb722863c688a2410c1b2/cachebox-5.2.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4974476d1779961df89d6e6f79e6103a1659289d3ee11c92adcb52e236a8aaeb", size = 390902, upload-time = "2026-04-10T12:19:28.258Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/5abff74666f8388d2c9516c265f99c33484c827f7fcb3cd703c2f3cbb17e/cachebox-5.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad16d733219f4cab3eec6533af30ab7b9c919c6e3e22ad1ef4eb82629a62edef", size = 395855, upload-time = "2026-04-10T12:19:54.207Z" }, + { url = "https://files.pythonhosted.org/packages/dd/11/30b429db12ab5df663aa108bcfac42805f733da65b0bf452f60bfaf4a530/cachebox-5.2.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:12a9e0a93774ca2b3a9fe8a2a0d0812e399fac4af0fce6246a5bca1e7009b8fc", size = 425760, upload-time = "2026-04-10T12:19:41.138Z" }, + { url = "https://files.pythonhosted.org/packages/cd/b4/fdac1bb902b954c03d23eb301d645a328c9664caff5898930fdbd92fde80/cachebox-5.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:be89497a011eb7a638d13cc520244d77579c0f515b95bf759b3de0b90a015203", size = 564988, upload-time = "2026-04-10T12:20:34.673Z" }, + { url = "https://files.pythonhosted.org/packages/4e/63/76cd5405b0339f15bf86593258bf9bc5608f10a5e0fa6f37a282b42a6caa/cachebox-5.2.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:dd01fc0c1934cccb76493eb4b149a9232d299e5e0275f557adf875c3d25cec81", size = 669110, upload-time = "2026-04-10T12:20:49.039Z" }, + { url = "https://files.pythonhosted.org/packages/d9/bc/52d154aa0407bafce94d1d8d3ff27ca5e842f8311be43cfabdefcbb0f6b7/cachebox-5.2.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a0dfd97b0968f8bd48c33098a03d10f797964559c3a437c84bf97a9973545714", size = 643768, upload-time = "2026-04-10T12:21:04.095Z" }, + { url = "https://files.pythonhosted.org/packages/51/d9/82627eb8cecaf5e7e601bbc65d474a1c3053a2fbc21618ddc6aac19c47dc/cachebox-5.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:223ccf7ac60f595def258e7bc74c0b1d6f43991c9cae6d06749c803d22786d99", size = 610047, upload-time = "2026-04-10T12:21:19.469Z" }, + { url = "https://files.pythonhosted.org/packages/6e/2e/cc5b303746418fde00c93ddbc295733b4e2d131d2e8f5afbc6f45f50454e/cachebox-5.2.3-cp310-cp310-win32.whl", hash = "sha256:745b805fdd99931c3ce1d87d2ee21ca3fb62cba6b4e1f674907af87aad73dce4", size = 275529, upload-time = "2026-04-10T12:21:49.84Z" }, + { url = "https://files.pythonhosted.org/packages/31/72/fb10d6f779d041f701b89f0b7830329f51d1846fbc600869f9f7d635b7b5/cachebox-5.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:a87b19c0a3d8d665a9805b5b4afd64b40082395b70ebe2756131ed1edb0c8f02", size = 287988, upload-time = "2026-04-10T12:21:36.41Z" }, + { url = "https://files.pythonhosted.org/packages/81/88/154179d492f2c000fe6efab3c3ff6b8eb94fbfaa09efe47999bce6b1e29f/cachebox-5.2.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:996f49d04b234082530afcc650bdd00556afbebc19c6c0daaafb85950340cb3c", size = 374245, upload-time = "2026-04-10T12:20:22.042Z" }, + { url = "https://files.pythonhosted.org/packages/7d/9d/3b03f2e063161bcb1a5e0969d521b5c622c2da02252a5c8bd4ef0e4f9914/cachebox-5.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23a3300ebbb526fa12ce6fa53699002f5fba6da23b4bbbaf8ba8b18a3f03e6b3", size = 356308, upload-time = "2026-04-10T12:20:09.149Z" }, + { url = "https://files.pythonhosted.org/packages/bb/9b/8da38af731e3832e9f987548e4bfb610d7f3054019e12c44a94ba9272b37/cachebox-5.2.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79c63ee1589364caa04c018405e625d2e44e0bf9994f2715b2f322075d8c45b6", size = 395666, upload-time = "2026-04-10T12:18:51.89Z" }, + { url = "https://files.pythonhosted.org/packages/01/dd/1522aa808f94c904c5eb3640991799fed14dd43c1dd99a9f7b71bd95b1e3/cachebox-5.2.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ebd0f8d4ebc3943c1ddcbbdc54f1a8ddf95505c862ed5731319cebd1eb98ae41", size = 353362, upload-time = "2026-04-10T12:19:04.536Z" }, + { url = "https://files.pythonhosted.org/packages/dd/52/95bf883ec9b69a76f3a7d9fb14d015d9a4bdab0143a3eff62ceebc8b1419/cachebox-5.2.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:569966efcc6309aa7d774443e3513cdbb8671efae0158138ba2ebb7d8cc9d8ed", size = 371007, upload-time = "2026-04-10T12:19:17.484Z" }, + { url = "https://files.pythonhosted.org/packages/4b/3d/cc02066d5ccfcb8b35adbaf867977fdb54572cda56ace56da396f0caa3bf/cachebox-5.2.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5774d06f0da37dd566239a4376d6ca8cf983d3e4c3228712ec22b4130f662f21", size = 390670, upload-time = "2026-04-10T12:19:29.685Z" }, + { url = "https://files.pythonhosted.org/packages/b3/50/8e4d59b3e344405d8393d6cc5cc92754d3cc1d81134041ebffd3f5ab73e6/cachebox-5.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae5bf8755bc66bcf42e7ca5c42d703a041a7aaad58f9a0c3be54d5b1cefd2641", size = 395765, upload-time = "2026-04-10T12:19:56.169Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d4/d731cff1c4cec22404bd3ddda05b233c5efaa5f13d7abf4e2728905b7cdd/cachebox-5.2.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63f061cc6a5ca70bbce2e6be0588fe2fee00a93a1b0581b1086d54b10288cdb6", size = 425707, upload-time = "2026-04-10T12:19:42.714Z" }, + { url = "https://files.pythonhosted.org/packages/36/01/3ec8aadceb0dcc66dbd0b9b32966cf7b6928ed84471424c24d21b0af62d0/cachebox-5.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:577c781f18b559f4dc9eea176c6aed008843ef4b8e045cf61bb519e09dccc9ef", size = 564759, upload-time = "2026-04-10T12:20:36.268Z" }, + { url = "https://files.pythonhosted.org/packages/db/23/31cbc8623ecc2e25900f7e8f20f11bfb84786989a59a8046e70b27cbea6d/cachebox-5.2.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:7f691e25572a3ddbb018e19d796f774713bd6b0f7ce9be2e71f6e18572de264a", size = 669309, upload-time = "2026-04-10T12:20:51.117Z" }, + { url = "https://files.pythonhosted.org/packages/34/29/5a9e92bdc7b32dc865e73dd776638244f900136daee5bb0591a67e1530fa/cachebox-5.2.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:33368adf86669c29b936fbae5d6219cf90aacd4b1db71dae2e23d584a8219cd6", size = 643705, upload-time = "2026-04-10T12:21:05.882Z" }, + { url = "https://files.pythonhosted.org/packages/04/90/5273a412855fdc11f674e4749aee6d5ec0a91f5c1a9f6e922f7fa0cb7a83/cachebox-5.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:38ce67b7b45713e49459a09411d07f82de04022c04aecde6202cd32f934c2b1f", size = 609751, upload-time = "2026-04-10T12:21:21.331Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a4/0fadb5e6a00f373cc3fe56b4415cdea2fc0147f6ec475611762d16eb4b05/cachebox-5.2.3-cp311-cp311-win32.whl", hash = "sha256:a7cd2c81347063ab6c512d0f569aeb5f75fc2dfe686c8486258ffd08052324f4", size = 275485, upload-time = "2026-04-10T12:21:51.563Z" }, + { url = "https://files.pythonhosted.org/packages/03/83/67c1bf83f815294d2c3acd7631f25b5cbe6067e1d56495f76829dd60057b/cachebox-5.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:7e45798d6b969794840bb302857946d710ecb32af78dfcb3ab40f4e68ee7fdaf", size = 288024, upload-time = "2026-04-10T12:21:37.999Z" }, + { url = "https://files.pythonhosted.org/packages/e4/e7/6fa6abfc9c4c07b88f09a88466fa93c7081fd679d8e06f8f558bb4ac845c/cachebox-5.2.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:09c0340e9daa7b4530801e5a570cb0c1a1ad941a85d245d360020d3986d0e787", size = 377791, upload-time = "2026-04-10T12:20:23.87Z" }, + { url = "https://files.pythonhosted.org/packages/3a/79/89e4423352d0ca33bbf80fc1b4b665e654a93de8b16cf41e96fcac81801a/cachebox-5.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3162758792626685ec34950eedd565d015b115d0ff0d751d2716031fc32d51b", size = 359562, upload-time = "2026-04-10T12:20:10.626Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ab/e533c2751e6a3411ebe369277aaed03199b9e4586a48f0a3712a1f4b418b/cachebox-5.2.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a189a780c3ccd7b9d157074ba6bf3e191e522b39abbdb590075111851f02d50d", size = 397910, upload-time = "2026-04-10T12:18:53.336Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0d/b8492d6ca53278499a37c9f9d51afd4ad77bfbe813d6281944d45b97a1e7/cachebox-5.2.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:410b67baa99d433644199b11289627f7ebba4ee5786f95ca9858f238afcee157", size = 353699, upload-time = "2026-04-10T12:19:06.248Z" }, + { url = "https://files.pythonhosted.org/packages/78/d4/fd20b3a5362651303fa12d3ee62f56af2bd396e4a7303d7014a1a1e5b392/cachebox-5.2.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f81474dc19d3865fa5e57263f834bc6bbc00e471a594fb9d934ed552732c02fd", size = 372510, upload-time = "2026-04-10T12:19:18.997Z" }, + { url = "https://files.pythonhosted.org/packages/71/94/3ec55c946d300cc4eaed3a0f79740051ac6e11ef4032421332c6ca15f5d5/cachebox-5.2.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85ccd827193b3e3e887a88a16b88ef7ed174e7e65be515b5253322aa75e665c3", size = 392802, upload-time = "2026-04-10T12:19:31.196Z" }, + { url = "https://files.pythonhosted.org/packages/01/b1/1a3c4e436ad8a4c4ba3e70f4c62e1f927cbbb3c943a9bba5813b8b815bde/cachebox-5.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a1e7d3cb8a5e7e68996a8619e3ef8771a124d14568c251f9e586eba88d759c1", size = 398223, upload-time = "2026-04-10T12:19:57.583Z" }, + { url = "https://files.pythonhosted.org/packages/0a/ea/d36ad3976c4396b350b96a1582411b7a00e56c144eec0bb5ba5f36ce7d86/cachebox-5.2.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:adcedfcfcb933b21e7fdcfe560c79887bc8287abceab0586aa3730417dd0277d", size = 427696, upload-time = "2026-04-10T12:19:44.361Z" }, + { url = "https://files.pythonhosted.org/packages/a8/36/71845b5c7a9ffbd85e6fdb470c11a174f499bd5238fa37b1214157c2454d/cachebox-5.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c7f0c72c51a3a9e7049ea6ff2a43cd3877ab7fee966eb65771a59621563b75e3", size = 567854, upload-time = "2026-04-10T12:20:38.357Z" }, + { url = "https://files.pythonhosted.org/packages/e8/a2/baf0e5a8392e64e352b137ccd7356b3d98068c842fd19f510a7790c05d34/cachebox-5.2.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c48c10e498d573511aafbd545570e7f43b40a7428dc282183bf5adc334d9e1a8", size = 670306, upload-time = "2026-04-10T12:20:52.903Z" }, + { url = "https://files.pythonhosted.org/packages/a5/22/cd4e4c1d624b8ef9fb4b8bebf0bf5d2d74a399cf1ac46b667bb79d15359a/cachebox-5.2.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2f1e086ab5ffd082a68bb63699d517655a59b06414927bfc84e01df91b81e34d", size = 645943, upload-time = "2026-04-10T12:21:08.238Z" }, + { url = "https://files.pythonhosted.org/packages/0a/d6/55859981f5ec6a9e412baaa4db6aa5973a00008750b3f054cdefcb6491fc/cachebox-5.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:649d18399f13735bb82daa33800196f815529c49e967767c40ca221723e68afa", size = 612309, upload-time = "2026-04-10T12:21:23.404Z" }, + { url = "https://files.pythonhosted.org/packages/d7/1e/313f650467ac85824c4199188f8f1ee3386cd12eb665dbf7c88d372e4956/cachebox-5.2.3-cp312-cp312-win32.whl", hash = "sha256:0a17aeb4e5b1c6ef1c3db8fc5186f9986e215ba5ea5a5d08baa45bcf55f261b2", size = 279789, upload-time = "2026-04-10T12:21:53.215Z" }, + { url = "https://files.pythonhosted.org/packages/c5/50/3b334f887accfa811cf5c7533b8ce22c523eb009363a86401198899dadd2/cachebox-5.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:cfd69114141ab362acaa2099e425a1b965cf7b021a539a4e953143d593930b74", size = 290917, upload-time = "2026-04-10T12:21:39.696Z" }, + { url = "https://files.pythonhosted.org/packages/31/3b/16d5c295f6ec2913ef595b39986dc7b7cc179fdd2e73f5ebd1814c38fd51/cachebox-5.2.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9527c5c70f8735f2d696331d8bcf77254f03b4dc8542046807823bd36ed4e8ba", size = 377408, upload-time = "2026-04-10T12:20:25.444Z" }, + { url = "https://files.pythonhosted.org/packages/cd/87/45f834154f79721e5b64a80ffab4f9710834c4f9c01fa977f94a9116c32a/cachebox-5.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:40ac878af00d5969862c1f6bc076de1e34ca248662fce6aecca1761f52e33e32", size = 359274, upload-time = "2026-04-10T12:20:12.127Z" }, + { url = "https://files.pythonhosted.org/packages/46/17/794e5f93e0a172aa14ecd692f6d89bdf094f71eb35fa923d0a0af25cef1c/cachebox-5.2.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5ff26bfd8f7e95b3becf6d5f65c25edaca50fa68078868648b70d79bcccc260", size = 397520, upload-time = "2026-04-10T12:18:54.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/19/9470b1a96de6e480192b1a92b2fafa72aa052efc2509a5418a5652205b33/cachebox-5.2.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82e7002dd343afeeba2fcf0e483131b342a27ec3bc34b2214dc617691bda40d6", size = 353183, upload-time = "2026-04-10T12:19:07.797Z" }, + { url = "https://files.pythonhosted.org/packages/6c/2b/72813f80397ed4640e337cbd1a14ab7eaafe33e479291d3623b6a6a55fec/cachebox-5.2.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ccbdc54a6c4b5758408c1083bdfa217bd382894a8331c7d0a54b84ba0cf51e5b", size = 372239, upload-time = "2026-04-10T12:19:20.44Z" }, + { url = "https://files.pythonhosted.org/packages/05/17/47dc9687288fa55486573627089ecd9aae124de5924a4bce008af96d80b6/cachebox-5.2.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df5135a168f143d186b1cc3be0ca16b66446897ab5cedc03bd80bcc926fcd403", size = 392568, upload-time = "2026-04-10T12:19:32.73Z" }, + { url = "https://files.pythonhosted.org/packages/13/95/450765b971a3bed9d7cf003c3833c1976482eb83b0241b6dbb840a25b43b/cachebox-5.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10bedf96db8f9766cc956f9adcc623e604264e5d6fa2e255432f8c2ed7519143", size = 397920, upload-time = "2026-04-10T12:19:59.314Z" }, + { url = "https://files.pythonhosted.org/packages/5f/3e/dd8f4c1f92e58d479913ce9cbaa3227c911128e6046c82f4fd44309f685a/cachebox-5.2.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f22732d0d69bb84ad2dca7480bffdfd0430c647152d488936e152ecbbfee52fb", size = 427332, upload-time = "2026-04-10T12:19:45.888Z" }, + { url = "https://files.pythonhosted.org/packages/7e/20/80d8c26ce63e78da3874a5bb07a3a78de53a2b0356ba80583a4927f0a074/cachebox-5.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:26ae0b68979204d360327f4c0725cfdc95cfc34ab73ab1a8f528e3bd2f6d023c", size = 567494, upload-time = "2026-04-10T12:20:40.373Z" }, + { url = "https://files.pythonhosted.org/packages/10/35/7249885dfed3602b3b48c1e67781197dcdc536c50f72caeabe3944348af8/cachebox-5.2.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:f3d628b816e28a6e7661d460e02dd5b421247cc2cd275814f80ea79621245fc4", size = 669968, upload-time = "2026-04-10T12:20:55.155Z" }, + { url = "https://files.pythonhosted.org/packages/2d/8a/e5b58f0bbd6fef74da5d8e5ab49e67898ce7e6df28c16280a0f2b78461f7/cachebox-5.2.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:64057caa6b741320655cd3c5997fe642dae5dbff571eb530e6f53e58272bb43b", size = 645547, upload-time = "2026-04-10T12:21:09.948Z" }, + { url = "https://files.pythonhosted.org/packages/d8/25/51783a4c6f25ca87ef1b4b762ff0364bd98053a02d597b30d26ff4cf13c5/cachebox-5.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa325306084aa2dc0b21e07723d7700f4d43dece3732c7fdaf7a269dc5e35aa7", size = 611844, upload-time = "2026-04-10T12:21:25.286Z" }, + { url = "https://files.pythonhosted.org/packages/c5/c5/b26c4b046e296d0e249448fe297626b3caca2e851837712f03c358662cb7/cachebox-5.2.3-cp313-cp313-win32.whl", hash = "sha256:55003089d21c2f5515089c307be063b45558e884a4a1cc9593944374c89975c4", size = 279421, upload-time = "2026-04-10T12:21:54.921Z" }, + { url = "https://files.pythonhosted.org/packages/e0/7f/a49420670393bfea618de7a893d45cae9294cf3293d7b158e7af20e8f39e/cachebox-5.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:dcc5edb6ecf2b516e90b773d232360c5e4ed8fdcda038b19441da2ed9cf208ab", size = 290702, upload-time = "2026-04-10T12:21:41.458Z" }, + { url = "https://files.pythonhosted.org/packages/c9/0b/bf83bda13ef6fc490d208a1d4dd712034624526a88f61713cca0edc9884f/cachebox-5.2.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a4b7559fa4994c4032dd07466c2041d57e055feb814762e1f73f4e8beef188d0", size = 371704, upload-time = "2026-04-10T12:20:27.253Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ea/aa5162273238e84f9e41b33600c69299572dc1c8f0f768d07660b71be07d/cachebox-5.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f57afada3d9327adf87f3b5cf0094348c6fd49354ab2e9bd20b044648eb094ae", size = 353385, upload-time = "2026-04-10T12:20:13.668Z" }, + { url = "https://files.pythonhosted.org/packages/47/96/3ca013e2e48df5c1d7855669b208f4bf8014ccb842ccf7a3a0eaac07bee0/cachebox-5.2.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8342ff350ce86f062492752d612e9f056ac5dc56375713d75c3bf6e83b4d18db", size = 392181, upload-time = "2026-04-10T12:18:56.385Z" }, + { url = "https://files.pythonhosted.org/packages/63/ca/1bacb4efa0b0ce8065d1fb7c8dc7c382ec4e1cc3f007eb08417732be2725/cachebox-5.2.3-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:405f9cc8492fc9d953b5a6b9e2b661e99583755c6639ab8d09a287fdf336503c", size = 349494, upload-time = "2026-04-10T12:19:09.505Z" }, + { url = "https://files.pythonhosted.org/packages/d7/2e/75db4bda3768658f5baa5a54f6a4f643bc2de1a16788e40581a080e803c7/cachebox-5.2.3-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94aae393ec1d9b26565d346445bb6afa3963d2a0d3eb5e4188d0e510fab871a0", size = 369216, upload-time = "2026-04-10T12:19:22.224Z" }, + { url = "https://files.pythonhosted.org/packages/f5/82/e1f833be0d57e29a8c5eb0a0275cd34b962f3c7f5b9e0517ec4bf75e7cc3/cachebox-5.2.3-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8b0b575066fc09f6fae0d4bd30d6ff56584a6870cbe7d202916c5e0d725cfd4", size = 385922, upload-time = "2026-04-10T12:19:34.198Z" }, + { url = "https://files.pythonhosted.org/packages/53/d6/615a3c16c1d63839f2c67644eb414c4dc9769ab2e169d935110fd8e268d5/cachebox-5.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41e99c1240106d39b63ce7868a6cd8c9da9243fef08848b85d428164e0769fd2", size = 393276, upload-time = "2026-04-10T12:20:00.925Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a6/7844c9c84b170dae1005b22da174639968e64c8055d66a209a1598663771/cachebox-5.2.3-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:432ca62b99f7eafc21af669d76c88c1b7377db179b89fb6fca3ea93b8f9fff19", size = 421355, upload-time = "2026-04-10T12:19:47.691Z" }, + { url = "https://files.pythonhosted.org/packages/c9/0f/43f62355846cae3dc41cb4daccac0a4bb2b7b8b3c7d77d1b6a220bae6d54/cachebox-5.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e51d9c59006b53447f806145406eb37a7fc3c25553d4fd24c3887f3b268d214e", size = 561656, upload-time = "2026-04-10T12:20:42.161Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fc/a453813c6d000d69a41a06c6a3143a6c4d0d0e41f23c155db2f82ea0edfa/cachebox-5.2.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:5e48a405f699fb001b8af120a6e0b4a981277f84eb5dd66a1faa21e4b6fe9485", size = 665791, upload-time = "2026-04-10T12:20:56.842Z" }, + { url = "https://files.pythonhosted.org/packages/aa/a3/f6a9e75f1e602b67b6d67088a9a766adfc4e0a740a9c4b68e4e6207c1006/cachebox-5.2.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8cbfc007ea78af61d75d7d26e5854df53dc5da6877d074afd4b4696c074f4ee7", size = 640975, upload-time = "2026-04-10T12:21:11.641Z" }, + { url = "https://files.pythonhosted.org/packages/a3/15/4ac98277f7fd9d855c8ed337e8e2a3386d17997cce2dd3eadb23dedc08e3/cachebox-5.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6a94d0da8133b3a0707ae11c9ea321f8fc37e3b5a14517019a05d632218b0f56", size = 607242, upload-time = "2026-04-10T12:21:27.27Z" }, + { url = "https://files.pythonhosted.org/packages/9c/0b/ce61907a803f75854e0cc91b84c16e14dce0e4e939efbda26293eb4c8784/cachebox-5.2.3-cp313-cp313t-win32.whl", hash = "sha256:5fee33549877c03c2494ec5359a57a7667f872fe8e296a7f39d3dfe08dd3914c", size = 271619, upload-time = "2026-04-10T12:21:56.768Z" }, + { url = "https://files.pythonhosted.org/packages/b0/06/fece190ad5173d06b2779494aaad5528907f2e55c809618e5b67c2e3dbb5/cachebox-5.2.3-cp313-cp313t-win_amd64.whl", hash = "sha256:67548a05cd41fcc4f7af80a2f97f742fef3d436537ac2e1a1dce0fcba5d41190", size = 283133, upload-time = "2026-04-10T12:21:43.037Z" }, + { url = "https://files.pythonhosted.org/packages/b8/8b/72c0e80aad08e09867ce14a621bce689a733552f20cdf2ef96d4b052da10/cachebox-5.2.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:37fa0891f0defee053c09f5f43f802f731e36e6e6ca055d7d174af07f77232ca", size = 380523, upload-time = "2026-04-10T12:20:29.345Z" }, + { url = "https://files.pythonhosted.org/packages/fc/62/33aaade81b181d5191cc39c867c297aa7c65f3191aa9749bf99b77496b88/cachebox-5.2.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dc6315902f2ef4afbf10bc8e08c54ff34de5ce124546b8e0016c9b0d327be21e", size = 362424, upload-time = "2026-04-10T12:20:15.215Z" }, + { url = "https://files.pythonhosted.org/packages/9e/0b/3eedaf9ea4b41c931f4340bfa42056efe2bb5fe3a79649d6c8a1dce585a5/cachebox-5.2.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7df1735ca778480d51b8232fed397ffe3935158f20d34fb1c5ed171b53d5a6e2", size = 399572, upload-time = "2026-04-10T12:18:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/be/69/c79b8a6a5b889ac4a60800bacea3553cb3b86f6fd13b2262bade1cb962c6/cachebox-5.2.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e22451cde8f884051e941b21870e4fc91fcf58d0d8c285bb8964107e1f02445c", size = 353803, upload-time = "2026-04-10T12:19:11.21Z" }, + { url = "https://files.pythonhosted.org/packages/d4/c3/bc7838de51039f8c50506d8dc82f22ff9a652794339a223b12af595e1d2f/cachebox-5.2.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dcbccf3015d9a42bcf41260fa5cc048a5bdb75aa10997d514d6c976117f30ee2", size = 374474, upload-time = "2026-04-10T12:19:23.658Z" }, + { url = "https://files.pythonhosted.org/packages/65/61/e5231ad2ae952ca482f9b9df55df4b96add1a80de28de537c5f574605987/cachebox-5.2.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:311eae5079e256cbbfafdc3dcff1714b6598a767f9c1ef8c3709e74ea0cc12b0", size = 393045, upload-time = "2026-04-10T12:19:35.651Z" }, + { url = "https://files.pythonhosted.org/packages/78/c4/c9b3fa764ac5420a9e079ad53fa8840d4a26b74c4ccda56acbef49cf76ff/cachebox-5.2.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f4d2a80a5cd3380739c67f7d89e596634f5897b8d5a4a3dc1598312cb077535", size = 398700, upload-time = "2026-04-10T12:20:02.513Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3e/c4e3acd4cb04e01c5fb7cc7a4de16059b9594d90672fff85af8670275267/cachebox-5.2.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3977515b727a5203f494c44c4566fb936c4b940351c01d3d8e7b5d104dff4f53", size = 426725, upload-time = "2026-04-10T12:19:49.385Z" }, + { url = "https://files.pythonhosted.org/packages/25/5d/610b79479719951581109d985244d34c97f86a308c3d7c83443e2b1dac46/cachebox-5.2.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c5be17dd5c4fabcfecd5bcf6d54f9c6fb719daed3ef01ac1c03a14af0e2b26c1", size = 570042, upload-time = "2026-04-10T12:20:43.793Z" }, + { url = "https://files.pythonhosted.org/packages/8c/63/cad8a05db4d0c0f5ba6bccb32e57d15c472276de9476f56004445b40711f/cachebox-5.2.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:6d37334fc218fdaee31db8a4f938938716e7c3b1b4059e25de27c8447fc95fde", size = 670974, upload-time = "2026-04-10T12:20:58.528Z" }, + { url = "https://files.pythonhosted.org/packages/54/d1/9cff7c2b9048d1c38b7ad8199ce856596d09720b3bea74043f3bad71970b/cachebox-5.2.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1e5f1b7e23411b748d919348c3b65db1f9f8927ab8f6f3acae19bd617543df2d", size = 646213, upload-time = "2026-04-10T12:21:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/27/ae/2e1ad162ec13903e84469c8a753baf385f1bc324279d6c7cb6365e7099df/cachebox-5.2.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7b06a75a898b31fd73c4d8bf727a9b9f8b5b7738cccd0ab5e6fd2a9cf659d3c", size = 612787, upload-time = "2026-04-10T12:21:29.271Z" }, + { url = "https://files.pythonhosted.org/packages/c8/8a/07b5ffd841e1ff534bb6e8721c39fdfe0d7cdaac1398e1783b2a0c37bd22/cachebox-5.2.3-cp314-cp314-win32.whl", hash = "sha256:3b798052719f09a2ce7bf9fa9452dc0a7d4dc53b50a2d3aba6ce6ebc12d39df7", size = 278559, upload-time = "2026-04-10T12:21:58.482Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f8/b88a82ce9ec7a2fa0f09ed1cdd031692c8664c41f9ab71831e177c7ce2df/cachebox-5.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:4afc8b8575e3228a42ad8d819de5fbbecc6bd0b521295966b00244be37ae3b9b", size = 291928, upload-time = "2026-04-10T12:21:44.621Z" }, + { url = "https://files.pythonhosted.org/packages/4a/01/8c79c07c8c6517fb2fe7d479dd87044e38aac5b9af0245b33fcd695eae37/cachebox-5.2.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:0e8a34b82be30d3d9fb7dfaf9a86ec2b3ab9bc264715909ef27fc3d3587324d2", size = 374325, upload-time = "2026-04-10T12:20:30.923Z" }, + { url = "https://files.pythonhosted.org/packages/7f/51/0fc26b923e80ab857ac99d5f7f3784dc941e7b4de361c204835233176ddf/cachebox-5.2.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4d4e336aebf866463878ccd28a4d0ef4003ea216708cf4a02a7f198481b3af81", size = 355444, upload-time = "2026-04-10T12:20:16.879Z" }, + { url = "https://files.pythonhosted.org/packages/c1/6d/a6b399221f8dc4b3e01b37d3240ef5b8a7eb78cd9bfbb99b0e655dd01649/cachebox-5.2.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b102fcdd97b0602bf5d6ba1a571bba3e3d6fa912b89fd768b0da5427408eab8", size = 393978, upload-time = "2026-04-10T12:18:59.753Z" }, + { url = "https://files.pythonhosted.org/packages/bd/f1/4c8f998c117c1941a82bd824d6687280c50167f21fea6392e41531d641e2/cachebox-5.2.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245a79fb2c5d3bff252f4263f76210ef3ad7c2ff9b0234859b26974830a80491", size = 349298, upload-time = "2026-04-10T12:19:12.843Z" }, + { url = "https://files.pythonhosted.org/packages/d1/dd/683bc5a32a0da660d02fa248b880b71a2b834e9b54b8d272b5801282f402/cachebox-5.2.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd0e8dbd8fd4cf664c645c08f9e10508e133353756705c4a738e90a5406224b5", size = 370619, upload-time = "2026-04-10T12:19:25.298Z" }, + { url = "https://files.pythonhosted.org/packages/81/49/d6c47c78a7769b355076c5b635c2b538c8b88e8ceeb408e104d0f269b515/cachebox-5.2.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdb74294bdc33e39e26606919a9b2229038d5fac0edb80c9056683c08584d4a9", size = 385988, upload-time = "2026-04-10T12:19:37.638Z" }, + { url = "https://files.pythonhosted.org/packages/70/e2/b669555ada7fa1392e4cdb8a19f3367db5c6abef0fde8ab034a9747760df/cachebox-5.2.3-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bba3e9a7f52fa196b434522f39675f3b32a076976ef2373ded6f1065e99f4d20", size = 394090, upload-time = "2026-04-10T12:20:03.978Z" }, + { url = "https://files.pythonhosted.org/packages/8f/01/42916249e53fe4fcbdf0419fb55dbc09b9f377475376e1d7f4ae9c9bd6cd/cachebox-5.2.3-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abb21f0f937fb66528f1b9f1a04874d6aa503e78bbb26f4cf33bf67faddbdd68", size = 421632, upload-time = "2026-04-10T12:19:51.048Z" }, + { url = "https://files.pythonhosted.org/packages/a1/54/34eebe18c6ed8ba27b1331b5e3d08bd8bb62f03ba81fbf47a2db0fa646f7/cachebox-5.2.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:dab6fd3189b0c746fb03e1915fd947aaca9112cedf26ef3a0c39383acf87d2e5", size = 563871, upload-time = "2026-04-10T12:20:45.417Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b2/f92da0d54e4f18609588709090de8c81dd7c8b20ed6ac30f9b91bedbedf5/cachebox-5.2.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b4e7d2935b9df11d3717f99c7237b6780f1f8c70e6a99b69b8430d89929ec825", size = 665677, upload-time = "2026-04-10T12:21:00.512Z" }, + { url = "https://files.pythonhosted.org/packages/43/9d/bf2d3dc949afe4d21fc7eb15b7524255e834b9252df6bba111e6686d1c6f/cachebox-5.2.3-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:611aa260fe1b2506330ff72f415e2cb4053c9c4e3776ac68fe2eedee0e1b91b1", size = 642067, upload-time = "2026-04-10T12:21:15.727Z" }, + { url = "https://files.pythonhosted.org/packages/6e/4f/a789eda189550d239fbaf165b9810f148e733e97a2a4eda7c4192295c7f8/cachebox-5.2.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a424ffb8514a9cb49bacff7995b7c767625cb2239692bd6524245e8579e375cc", size = 608048, upload-time = "2026-04-10T12:21:31.156Z" }, + { url = "https://files.pythonhosted.org/packages/41/c3/590e161c04ffbd36e33933e6dcca5ffa40b5548e3121a21d77aad42af138/cachebox-5.2.3-cp314-cp314t-win32.whl", hash = "sha256:83988dd8e9075ee837e8407e26db49a9944ae74924d5db57b477444d7d98622c", size = 271694, upload-time = "2026-04-10T12:22:00.589Z" }, + { url = "https://files.pythonhosted.org/packages/66/f4/f60b8506df467261178afe918801df37c02c46ec2b8ce019760a14e2abe7/cachebox-5.2.3-cp314-cp314t-win_amd64.whl", hash = "sha256:dbda6390fa5070a19157ae35ab8066d3fe468634e0e9e21452c68ce7999c7d0c", size = 284212, upload-time = "2026-04-10T12:21:46.241Z" }, + { url = "https://files.pythonhosted.org/packages/ce/7b/5eead1ca0d437b1993a742c6571079ae58ae4db50d94d42e87b514aed6c3/cachebox-5.2.3-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c798cddfb780156db09d3d96ed5da4c2d5fc01dad4bc7b54db5b20c34f221926", size = 376199, upload-time = "2026-04-10T12:20:32.674Z" }, + { url = "https://files.pythonhosted.org/packages/77/e3/5e45042f9b552a5087cafc2e0fed834e632531fca17818201d72e78593ce/cachebox-5.2.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:c8f3de4afeb3fd721620be3d02f2338bcbc3fdbd464ca14e1c474088c9669db0", size = 357109, upload-time = "2026-04-10T12:20:18.554Z" }, + { url = "https://files.pythonhosted.org/packages/d4/51/3c4743b718b42e4b80166fa61f8722b603eba7bf206768a7892c4699dce7/cachebox-5.2.3-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b39022c258872185327acffa9ad42d6bdf42f37d006d35c825a684eb5fa98d40", size = 396433, upload-time = "2026-04-10T12:19:01.463Z" }, + { url = "https://files.pythonhosted.org/packages/f8/9b/678da91187bdb2836db2b8da62519da75359b46bc28697799a7caa314519/cachebox-5.2.3-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a0599fb85dcb6df9a86502435643fe90c793bbcd50b5d85217c70f2bc2e38fc", size = 354287, upload-time = "2026-04-10T12:19:14.55Z" }, + { url = "https://files.pythonhosted.org/packages/df/06/769446da6c9f2855499aaa19e2d7260aa47934bc2e15a931e5b737f8685a/cachebox-5.2.3-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3cdbe8f1b7716a44dc82ef3a6830a612260c7379478cfa80804632e2e6252b8e", size = 372507, upload-time = "2026-04-10T12:19:26.763Z" }, + { url = "https://files.pythonhosted.org/packages/79/cf/86c60994a7be734abef0395e440dc11714f84ffcd369cbcd8e61c3d58126/cachebox-5.2.3-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:783d1b9a0b3c77c43e7ae331b9d6561ad75827e16b2484e2a6cc289ec4d392ee", size = 390831, upload-time = "2026-04-10T12:19:39.591Z" }, + { url = "https://files.pythonhosted.org/packages/9d/db/acfb55f8d5ee4ea1c5f2d32ede25d4d04e944ba09d2832c27c085022490d/cachebox-5.2.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c6476a2a842906fee782d92f8fbcb03ecfd22eecc39adb7fb5b047d7e1cf020", size = 396277, upload-time = "2026-04-10T12:20:05.735Z" }, + { url = "https://files.pythonhosted.org/packages/dc/4f/35e27e85a48e15671c5863addcabde910eb311800a621c3e47c04bd36d17/cachebox-5.2.3-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:184bbcfa1370415b6d1f09e4fb74ab697dac8df09f522aa217a2fac65f973744", size = 426980, upload-time = "2026-04-10T12:19:52.622Z" }, + { url = "https://files.pythonhosted.org/packages/09/4b/50f2cadf20c02db9e449f2e9fee95f3eb5768ab1804dd0a5eba6c98119ad/cachebox-5.2.3-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:f89df36b46f8f5e11c0c49701ec3cebddf51191f96afb7bb75c394faf3c1cbc8", size = 565539, upload-time = "2026-04-10T12:20:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/43/53/b8e948cadb48b8bcf1d13c2aa4a788ff0e95b50ddb808c18e998499b4680/cachebox-5.2.3-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:fb0bdcd9e28686e3b91d5210c843542858f0f10de151181aee27a7978fe4992e", size = 670870, upload-time = "2026-04-10T12:21:02.141Z" }, + { url = "https://files.pythonhosted.org/packages/29/7b/d68ca3f59a9d6963c2f6b19bc4b1926a37db2e4a4f6c9891d12788e49ce2/cachebox-5.2.3-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:5196f0d2c2f99c92ddf0d2c37803ff90509d14a5df211b7754feb8b61ffd8740", size = 644542, upload-time = "2026-04-10T12:21:17.541Z" }, + { url = "https://files.pythonhosted.org/packages/f8/c8/44ae6d5dff09f044d61a92591e6a8db17f3b2ee51a54d375cce90271527b/cachebox-5.2.3-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:73671850d8c3634ab217398c83715d3feb52589ec97bd8e2f4d22e472741ea48", size = 610235, upload-time = "2026-04-10T12:21:32.93Z" }, + { url = "https://files.pythonhosted.org/packages/9a/1b/31cf2449da9a296f6c6c0002c7ae91a25c3a4bfef071763bbeb85300b402/cachebox-5.2.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:70c718f6bb77e6ba142b9a055b81ce85412a0c0e5e82a154489b45e6f91d09ec", size = 287614, upload-time = "2026-04-10T12:21:47.909Z" }, +] + [[package]] name = "certifi" version = "2026.2.25" @@ -378,9 +497,10 @@ wheels = [ [[package]] name = "deepdiff" -version = "8.7.0" +version = "9.0.0" source = { editable = "." } dependencies = [ + { name = "cachebox" }, { name = "orderly-set" }, ] @@ -434,6 +554,7 @@ test = [ [package.metadata] requires-dist = [ { name = "bump2version", marker = "extra == 'dev'", specifier = "~=1.0.1" }, + { name = "cachebox", specifier = ">=5.2,<6" }, { name = "click", marker = "extra == 'cli'", specifier = "~=8.3.1" }, { name = "coverage", marker = "extra == 'coverage'", specifier = "~=7.13.5" }, { name = "flake8", marker = "extra == 'static'", specifier = "~=7.3.0" }, From 5d758054fee8006b8908b78f9e0cebc60193a5e1 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 15 May 2026 10:38:40 -0700 Subject: [PATCH 18/23] updating dependencies --- pyproject.toml | 10 +- uv.lock | 953 ++++++++++++++++++++++++------------------------- 2 files changed, 481 insertions(+), 482 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b17ccfe4..8d9cb691 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ static = [ "pydantic~=2.12.5", ] test = [ - "pytest~=9.0.2", + "pytest~=9.0.3", "pytest-benchmark~=5.2.3", "pytest-cov~=7.1.0", "python-dotenv~=1.2.2", @@ -112,7 +112,13 @@ include = [ "mypy.ini", "noxfile.py", "tests/", - "uv.lock", +] +exclude = [ + "docs/.env", + "docs/_build/", + "docs/**/*.png", + "docs/CLAUDE.md", + "docs/AGENTS.md", ] [tool.pytest.ini_options] diff --git a/uv.lock b/uv.lock index 81a327c3..f53af93e 100644 --- a/uv.lock +++ b/uv.lock @@ -5,12 +5,9 @@ resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", "python_full_version >= '3.14' and sys_platform == 'emscripten'", "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", "python_full_version < '3.11'", ] @@ -223,128 +220,128 @@ wheels = [ [[package]] name = "certifi" -version = "2026.2.25" +version = "2026.4.22" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } +sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077, upload-time = "2026-04-22T11:26:11.191Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, + { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" }, ] [[package]] name = "charset-normalizer" -version = "3.4.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7b/60/e3bec1881450851b087e301bedc3daa9377a4d45f1c26aa90b0b235e38aa/charset_normalizer-3.4.6.tar.gz", hash = "sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6", size = 143363, upload-time = "2026-03-15T18:53:25.478Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/8c/2c56124c6dc53a774d435f985b5973bc592f42d437be58c0c92d65ae7296/charset_normalizer-3.4.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e1d8ca8611099001949d1cdfaefc510cf0f212484fe7c565f735b68c78c3c95", size = 298751, upload-time = "2026-03-15T18:50:00.003Z" }, - { url = "https://files.pythonhosted.org/packages/86/2a/2a7db6b314b966a3bcad8c731c0719c60b931b931de7ae9f34b2839289ee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e25369dc110d58ddf29b949377a93e0716d72a24f62bad72b2b39f155949c1fd", size = 200027, upload-time = "2026-03-15T18:50:01.702Z" }, - { url = "https://files.pythonhosted.org/packages/68/f2/0fe775c74ae25e2a3b07b01538fc162737b3e3f795bada3bc26f4d4d495c/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:259695e2ccc253feb2a016303543d691825e920917e31f894ca1a687982b1de4", size = 220741, upload-time = "2026-03-15T18:50:03.194Z" }, - { url = "https://files.pythonhosted.org/packages/10/98/8085596e41f00b27dd6aa1e68413d1ddda7e605f34dd546833c61fddd709/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dda86aba335c902b6149a02a55b38e96287157e609200811837678214ba2b1db", size = 215802, upload-time = "2026-03-15T18:50:05.859Z" }, - { url = "https://files.pythonhosted.org/packages/fd/ce/865e4e09b041bad659d682bbd98b47fb490b8e124f9398c9448065f64fee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51fb3c322c81d20567019778cb5a4a6f2dc1c200b886bc0d636238e364848c89", size = 207908, upload-time = "2026-03-15T18:50:07.676Z" }, - { url = "https://files.pythonhosted.org/packages/a8/54/8c757f1f7349262898c2f169e0d562b39dcb977503f18fdf0814e923db78/charset_normalizer-3.4.6-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:4482481cb0572180b6fd976a4d5c72a30263e98564da68b86ec91f0fe35e8565", size = 194357, upload-time = "2026-03-15T18:50:09.327Z" }, - { url = "https://files.pythonhosted.org/packages/6f/29/e88f2fac9218907fc7a70722b393d1bbe8334c61fe9c46640dba349b6e66/charset_normalizer-3.4.6-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:39f5068d35621da2881271e5c3205125cc456f54e9030d3f723288c873a71bf9", size = 205610, upload-time = "2026-03-15T18:50:10.732Z" }, - { url = "https://files.pythonhosted.org/packages/4c/c5/21d7bb0cb415287178450171d130bed9d664211fdd59731ed2c34267b07d/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8bea55c4eef25b0b19a0337dc4e3f9a15b00d569c77211fa8cde38684f234fb7", size = 203512, upload-time = "2026-03-15T18:50:12.535Z" }, - { url = "https://files.pythonhosted.org/packages/a4/be/ce52f3c7fdb35cc987ad38a53ebcef52eec498f4fb6c66ecfe62cfe57ba2/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:f0cdaecd4c953bfae0b6bb64910aaaca5a424ad9c72d85cb88417bb9814f7550", size = 195398, upload-time = "2026-03-15T18:50:14.236Z" }, - { url = "https://files.pythonhosted.org/packages/81/a0/3ab5dd39d4859a3555e5dadfc8a9fa7f8352f8c183d1a65c90264517da0e/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:150b8ce8e830eb7ccb029ec9ca36022f756986aaaa7956aad6d9ec90089338c0", size = 221772, upload-time = "2026-03-15T18:50:15.581Z" }, - { url = "https://files.pythonhosted.org/packages/04/6e/6a4e41a97ba6b2fa87f849c41e4d229449a586be85053c4d90135fe82d26/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:e68c14b04827dd76dcbd1aeea9e604e3e4b78322d8faf2f8132c7138efa340a8", size = 205759, upload-time = "2026-03-15T18:50:17.047Z" }, - { url = "https://files.pythonhosted.org/packages/db/3b/34a712a5ee64a6957bf355b01dc17b12de457638d436fdb05d01e463cd1c/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3778fd7d7cd04ae8f54651f4a7a0bd6e39a0cf20f801720a4c21d80e9b7ad6b0", size = 216938, upload-time = "2026-03-15T18:50:18.44Z" }, - { url = "https://files.pythonhosted.org/packages/cb/05/5bd1e12da9ab18790af05c61aafd01a60f489778179b621ac2a305243c62/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dad6e0f2e481fffdcf776d10ebee25e0ef89f16d691f1e5dee4b586375fdc64b", size = 210138, upload-time = "2026-03-15T18:50:19.852Z" }, - { url = "https://files.pythonhosted.org/packages/bd/8e/3cb9e2d998ff6b21c0a1860343cb7b83eba9cdb66b91410e18fc4969d6ab/charset_normalizer-3.4.6-cp310-cp310-win32.whl", hash = "sha256:74a2e659c7ecbc73562e2a15e05039f1e22c75b7c7618b4b574a3ea9118d1557", size = 144137, upload-time = "2026-03-15T18:50:21.505Z" }, - { url = "https://files.pythonhosted.org/packages/d8/8f/78f5489ffadb0db3eb7aff53d31c24531d33eb545f0c6f6567c25f49a5ff/charset_normalizer-3.4.6-cp310-cp310-win_amd64.whl", hash = "sha256:aa9cccf4a44b9b62d8ba8b4dd06c649ba683e4bf04eea606d2e94cfc2d6ff4d6", size = 154244, upload-time = "2026-03-15T18:50:22.81Z" }, - { url = "https://files.pythonhosted.org/packages/e4/74/e472659dffb0cadb2f411282d2d76c60da1fc94076d7fffed4ae8a93ec01/charset_normalizer-3.4.6-cp310-cp310-win_arm64.whl", hash = "sha256:e985a16ff513596f217cee86c21371b8cd011c0f6f056d0920aa2d926c544058", size = 143312, upload-time = "2026-03-15T18:50:24.074Z" }, - { url = "https://files.pythonhosted.org/packages/62/28/ff6f234e628a2de61c458be2779cb182bc03f6eec12200d4a525bbfc9741/charset_normalizer-3.4.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:82060f995ab5003a2d6e0f4ad29065b7672b6593c8c63559beefe5b443242c3e", size = 293582, upload-time = "2026-03-15T18:50:25.454Z" }, - { url = "https://files.pythonhosted.org/packages/1c/b7/b1a117e5385cbdb3205f6055403c2a2a220c5ea80b8716c324eaf75c5c95/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60c74963d8350241a79cb8feea80e54d518f72c26db618862a8f53e5023deaf9", size = 197240, upload-time = "2026-03-15T18:50:27.196Z" }, - { url = "https://files.pythonhosted.org/packages/a1/5f/2574f0f09f3c3bc1b2f992e20bce6546cb1f17e111c5be07308dc5427956/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e4333fb15c83f7d1482a76d45a0818897b3d33f00efd215528ff7c51b8e35d", size = 217363, upload-time = "2026-03-15T18:50:28.601Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d1/0ae20ad77bc949ddd39b51bf383b6ca932f2916074c95cad34ae465ab71f/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bc72863f4d9aba2e8fd9085e63548a324ba706d2ea2c83b260da08a59b9482de", size = 212994, upload-time = "2026-03-15T18:50:30.102Z" }, - { url = "https://files.pythonhosted.org/packages/60/ac/3233d262a310c1b12633536a07cde5ddd16985e6e7e238e9f3f9423d8eb9/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cc4fc6c196d6a8b76629a70ddfcd4635a6898756e2d9cac5565cf0654605d73", size = 204697, upload-time = "2026-03-15T18:50:31.654Z" }, - { url = "https://files.pythonhosted.org/packages/25/3c/8a18fc411f085b82303cfb7154eed5bd49c77035eb7608d049468b53f87c/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:0c173ce3a681f309f31b87125fecec7a5d1347261ea11ebbb856fa6006b23c8c", size = 191673, upload-time = "2026-03-15T18:50:33.433Z" }, - { url = "https://files.pythonhosted.org/packages/ff/a7/11cfe61d6c5c5c7438d6ba40919d0306ed83c9ab957f3d4da2277ff67836/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c907cdc8109f6c619e6254212e794d6548373cc40e1ec75e6e3823d9135d29cc", size = 201120, upload-time = "2026-03-15T18:50:35.105Z" }, - { url = "https://files.pythonhosted.org/packages/b5/10/cf491fa1abd47c02f69687046b896c950b92b6cd7337a27e6548adbec8e4/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:404a1e552cf5b675a87f0651f8b79f5f1e6fd100ee88dc612f89aa16abd4486f", size = 200911, upload-time = "2026-03-15T18:50:36.819Z" }, - { url = "https://files.pythonhosted.org/packages/28/70/039796160b48b18ed466fde0af84c1b090c4e288fae26cd674ad04a2d703/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e3c701e954abf6fc03a49f7c579cc80c2c6cc52525340ca3186c41d3f33482ef", size = 192516, upload-time = "2026-03-15T18:50:38.228Z" }, - { url = "https://files.pythonhosted.org/packages/ff/34/c56f3223393d6ff3124b9e78f7de738047c2d6bc40a4f16ac0c9d7a1cb3c/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7a6967aaf043bceabab5412ed6bd6bd26603dae84d5cb75bf8d9a74a4959d398", size = 218795, upload-time = "2026-03-15T18:50:39.664Z" }, - { url = "https://files.pythonhosted.org/packages/e8/3b/ce2d4f86c5282191a041fdc5a4ce18f1c6bd40a5bd1f74cf8625f08d51c1/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5feb91325bbceade6afab43eb3b508c63ee53579fe896c77137ded51c6b6958e", size = 201833, upload-time = "2026-03-15T18:50:41.552Z" }, - { url = "https://files.pythonhosted.org/packages/3b/9b/b6a9f76b0fd7c5b5ec58b228ff7e85095370282150f0bd50b3126f5506d6/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f820f24b09e3e779fe84c3c456cb4108a7aa639b0d1f02c28046e11bfcd088ed", size = 213920, upload-time = "2026-03-15T18:50:43.33Z" }, - { url = "https://files.pythonhosted.org/packages/ae/98/7bc23513a33d8172365ed30ee3a3b3fe1ece14a395e5fc94129541fc6003/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b35b200d6a71b9839a46b9b7fff66b6638bb52fc9658aa58796b0326595d3021", size = 206951, upload-time = "2026-03-15T18:50:44.789Z" }, - { url = "https://files.pythonhosted.org/packages/32/73/c0b86f3d1458468e11aec870e6b3feac931facbe105a894b552b0e518e79/charset_normalizer-3.4.6-cp311-cp311-win32.whl", hash = "sha256:9ca4c0b502ab399ef89248a2c84c54954f77a070f28e546a85e91da627d1301e", size = 143703, upload-time = "2026-03-15T18:50:46.103Z" }, - { url = "https://files.pythonhosted.org/packages/c6/e3/76f2facfe8eddee0bbd38d2594e709033338eae44ebf1738bcefe0a06185/charset_normalizer-3.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:a9e68c9d88823b274cf1e72f28cb5dc89c990edf430b0bfd3e2fb0785bfeabf4", size = 153857, upload-time = "2026-03-15T18:50:47.563Z" }, - { url = "https://files.pythonhosted.org/packages/e2/dc/9abe19c9b27e6cd3636036b9d1b387b78c40dedbf0b47f9366737684b4b0/charset_normalizer-3.4.6-cp311-cp311-win_arm64.whl", hash = "sha256:97d0235baafca5f2b09cf332cc275f021e694e8362c6bb9c96fc9a0eb74fc316", size = 142751, upload-time = "2026-03-15T18:50:49.234Z" }, - { url = "https://files.pythonhosted.org/packages/e5/62/c0815c992c9545347aeea7859b50dc9044d147e2e7278329c6e02ac9a616/charset_normalizer-3.4.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ef7fedc7a6ecbe99969cd09632516738a97eeb8bd7258bf8a0f23114c057dab", size = 295154, upload-time = "2026-03-15T18:50:50.88Z" }, - { url = "https://files.pythonhosted.org/packages/a8/37/bdca6613c2e3c58c7421891d80cc3efa1d32e882f7c4a7ee6039c3fc951a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4ea868bc28109052790eb2b52a9ab33f3aa7adc02f96673526ff47419490e21", size = 199191, upload-time = "2026-03-15T18:50:52.658Z" }, - { url = "https://files.pythonhosted.org/packages/6c/92/9934d1bbd69f7f398b38c5dae1cbf9cc672e7c34a4adf7b17c0a9c17d15d/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:836ab36280f21fc1a03c99cd05c6b7af70d2697e374c7af0b61ed271401a72a2", size = 218674, upload-time = "2026-03-15T18:50:54.102Z" }, - { url = "https://files.pythonhosted.org/packages/af/90/25f6ab406659286be929fd89ab0e78e38aa183fc374e03aa3c12d730af8a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f1ce721c8a7dfec21fcbdfe04e8f68174183cf4e8188e0645e92aa23985c57ff", size = 215259, upload-time = "2026-03-15T18:50:55.616Z" }, - { url = "https://files.pythonhosted.org/packages/4e/ef/79a463eb0fff7f96afa04c1d4c51f8fc85426f918db467854bfb6a569ce3/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e28d62a8fc7a1fa411c43bd65e346f3bce9716dc51b897fbe930c5987b402d5", size = 207276, upload-time = "2026-03-15T18:50:57.054Z" }, - { url = "https://files.pythonhosted.org/packages/f7/72/d0426afec4b71dc159fa6b4e68f868cd5a3ecd918fec5813a15d292a7d10/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:530d548084c4a9f7a16ed4a294d459b4f229db50df689bfe92027452452943a0", size = 195161, upload-time = "2026-03-15T18:50:58.686Z" }, - { url = "https://files.pythonhosted.org/packages/bf/18/c82b06a68bfcb6ce55e508225d210c7e6a4ea122bfc0748892f3dc4e8e11/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30f445ae60aad5e1f8bdbb3108e39f6fbc09f4ea16c815c66578878325f8f15a", size = 203452, upload-time = "2026-03-15T18:51:00.196Z" }, - { url = "https://files.pythonhosted.org/packages/44/d6/0c25979b92f8adafdbb946160348d8d44aa60ce99afdc27df524379875cb/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ac2393c73378fea4e52aa56285a3d64be50f1a12395afef9cce47772f60334c2", size = 202272, upload-time = "2026-03-15T18:51:01.703Z" }, - { url = "https://files.pythonhosted.org/packages/2e/3d/7fea3e8fe84136bebbac715dd1221cc25c173c57a699c030ab9b8900cbb7/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:90ca27cd8da8118b18a52d5f547859cc1f8354a00cd1e8e5120df3e30d6279e5", size = 195622, upload-time = "2026-03-15T18:51:03.526Z" }, - { url = "https://files.pythonhosted.org/packages/57/8a/d6f7fd5cb96c58ef2f681424fbca01264461336d2a7fc875e4446b1f1346/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e5a94886bedca0f9b78fecd6afb6629142fd2605aa70a125d49f4edc6037ee6", size = 220056, upload-time = "2026-03-15T18:51:05.269Z" }, - { url = "https://files.pythonhosted.org/packages/16/50/478cdda782c8c9c3fb5da3cc72dd7f331f031e7f1363a893cdd6ca0f8de0/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:695f5c2823691a25f17bc5d5ffe79fa90972cc34b002ac6c843bb8a1720e950d", size = 203751, upload-time = "2026-03-15T18:51:06.858Z" }, - { url = "https://files.pythonhosted.org/packages/75/fc/cc2fcac943939c8e4d8791abfa139f685e5150cae9f94b60f12520feaa9b/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:231d4da14bcd9301310faf492051bee27df11f2bc7549bc0bb41fef11b82daa2", size = 216563, upload-time = "2026-03-15T18:51:08.564Z" }, - { url = "https://files.pythonhosted.org/packages/a8/b7/a4add1d9a5f68f3d037261aecca83abdb0ab15960a3591d340e829b37298/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a056d1ad2633548ca18ffa2f85c202cfb48b68615129143915b8dc72a806a923", size = 209265, upload-time = "2026-03-15T18:51:10.312Z" }, - { url = "https://files.pythonhosted.org/packages/6c/18/c094561b5d64a24277707698e54b7f67bd17a4f857bbfbb1072bba07c8bf/charset_normalizer-3.4.6-cp312-cp312-win32.whl", hash = "sha256:c2274ca724536f173122f36c98ce188fd24ce3dad886ec2b7af859518ce008a4", size = 144229, upload-time = "2026-03-15T18:51:11.694Z" }, - { url = "https://files.pythonhosted.org/packages/ab/20/0567efb3a8fd481b8f34f739ebddc098ed062a59fed41a8d193a61939e8f/charset_normalizer-3.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:c8ae56368f8cc97c7e40a7ee18e1cedaf8e780cd8bc5ed5ac8b81f238614facb", size = 154277, upload-time = "2026-03-15T18:51:13.004Z" }, - { url = "https://files.pythonhosted.org/packages/15/57/28d79b44b51933119e21f65479d0864a8d5893e494cf5daab15df0247c17/charset_normalizer-3.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:899d28f422116b08be5118ef350c292b36fc15ec2daeb9ea987c89281c7bb5c4", size = 142817, upload-time = "2026-03-15T18:51:14.408Z" }, - { url = "https://files.pythonhosted.org/packages/1e/1d/4fdabeef4e231153b6ed7567602f3b68265ec4e5b76d6024cf647d43d981/charset_normalizer-3.4.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f", size = 294823, upload-time = "2026-03-15T18:51:15.755Z" }, - { url = "https://files.pythonhosted.org/packages/47/7b/20e809b89c69d37be748d98e84dce6820bf663cf19cf6b942c951a3e8f41/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843", size = 198527, upload-time = "2026-03-15T18:51:17.177Z" }, - { url = "https://files.pythonhosted.org/packages/37/a6/4f8d27527d59c039dce6f7622593cdcd3d70a8504d87d09eb11e9fdc6062/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf", size = 218388, upload-time = "2026-03-15T18:51:18.934Z" }, - { url = "https://files.pythonhosted.org/packages/f6/9b/4770ccb3e491a9bacf1c46cc8b812214fe367c86a96353ccc6daf87b01ec/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8", size = 214563, upload-time = "2026-03-15T18:51:20.374Z" }, - { url = "https://files.pythonhosted.org/packages/2b/58/a199d245894b12db0b957d627516c78e055adc3a0d978bc7f65ddaf7c399/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9", size = 206587, upload-time = "2026-03-15T18:51:21.807Z" }, - { url = "https://files.pythonhosted.org/packages/7e/70/3def227f1ec56f5c69dfc8392b8bd63b11a18ca8178d9211d7cc5e5e4f27/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88", size = 194724, upload-time = "2026-03-15T18:51:23.508Z" }, - { url = "https://files.pythonhosted.org/packages/58/ab/9318352e220c05efd31c2779a23b50969dc94b985a2efa643ed9077bfca5/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84", size = 202956, upload-time = "2026-03-15T18:51:25.239Z" }, - { url = "https://files.pythonhosted.org/packages/75/13/f3550a3ac25b70f87ac98c40d3199a8503676c2f1620efbf8d42095cfc40/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd", size = 201923, upload-time = "2026-03-15T18:51:26.682Z" }, - { url = "https://files.pythonhosted.org/packages/1b/db/c5c643b912740b45e8eec21de1bbab8e7fc085944d37e1e709d3dcd9d72f/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c", size = 195366, upload-time = "2026-03-15T18:51:28.129Z" }, - { url = "https://files.pythonhosted.org/packages/5a/67/3b1c62744f9b2448443e0eb160d8b001c849ec3fef591e012eda6484787c/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194", size = 219752, upload-time = "2026-03-15T18:51:29.556Z" }, - { url = "https://files.pythonhosted.org/packages/f6/98/32ffbaf7f0366ffb0445930b87d103f6b406bc2c271563644bde8a2b1093/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc", size = 203296, upload-time = "2026-03-15T18:51:30.921Z" }, - { url = "https://files.pythonhosted.org/packages/41/12/5d308c1bbe60cabb0c5ef511574a647067e2a1f631bc8634fcafaccd8293/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f", size = 215956, upload-time = "2026-03-15T18:51:32.399Z" }, - { url = "https://files.pythonhosted.org/packages/53/e9/5f85f6c5e20669dbe56b165c67b0260547dea97dba7e187938833d791687/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2", size = 208652, upload-time = "2026-03-15T18:51:34.214Z" }, - { url = "https://files.pythonhosted.org/packages/f1/11/897052ea6af56df3eef3ca94edafee410ca699ca0c7b87960ad19932c55e/charset_normalizer-3.4.6-cp313-cp313-win32.whl", hash = "sha256:d7de2637729c67d67cf87614b566626057e95c303bc0a55ffe391f5205e7003d", size = 143940, upload-time = "2026-03-15T18:51:36.15Z" }, - { url = "https://files.pythonhosted.org/packages/a1/5c/724b6b363603e419829f561c854b87ed7c7e31231a7908708ac086cdf3e2/charset_normalizer-3.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:572d7c822caf521f0525ba1bce1a622a0b85cf47ffbdae6c9c19e3b5ac3c4389", size = 154101, upload-time = "2026-03-15T18:51:37.876Z" }, - { url = "https://files.pythonhosted.org/packages/01/a5/7abf15b4c0968e47020f9ca0935fb3274deb87cb288cd187cad92e8cdffd/charset_normalizer-3.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a4474d924a47185a06411e0064b803c68be044be2d60e50e8bddcc2649957c1f", size = 143109, upload-time = "2026-03-15T18:51:39.565Z" }, - { url = "https://files.pythonhosted.org/packages/25/6f/ffe1e1259f384594063ea1869bfb6be5cdb8bc81020fc36c3636bc8302a1/charset_normalizer-3.4.6-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9cc6e6d9e571d2f863fa77700701dae73ed5f78881efc8b3f9a4398772ff53e8", size = 294458, upload-time = "2026-03-15T18:51:41.134Z" }, - { url = "https://files.pythonhosted.org/packages/56/60/09bb6c13a8c1016c2ed5c6a6488e4ffef506461aa5161662bd7636936fb1/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5960d965e67165d75b7c7ffc60a83ec5abfc5c11b764ec13ea54fbef8b4421", size = 199277, upload-time = "2026-03-15T18:51:42.953Z" }, - { url = "https://files.pythonhosted.org/packages/00/50/dcfbb72a5138bbefdc3332e8d81a23494bf67998b4b100703fd15fa52d81/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b3694e3f87f8ac7ce279d4355645b3c878d24d1424581b46282f24b92f5a4ae2", size = 218758, upload-time = "2026-03-15T18:51:44.339Z" }, - { url = "https://files.pythonhosted.org/packages/03/b3/d79a9a191bb75f5aa81f3aaaa387ef29ce7cb7a9e5074ba8ea095cc073c2/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d11595abf8dd942a77883a39d81433739b287b6aa71620f15164f8096221b30", size = 215299, upload-time = "2026-03-15T18:51:45.871Z" }, - { url = "https://files.pythonhosted.org/packages/76/7e/bc8911719f7084f72fd545f647601ea3532363927f807d296a8c88a62c0d/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7bda6eebafd42133efdca535b04ccb338ab29467b3f7bf79569883676fc628db", size = 206811, upload-time = "2026-03-15T18:51:47.308Z" }, - { url = "https://files.pythonhosted.org/packages/e2/40/c430b969d41dda0c465aa36cc7c2c068afb67177bef50905ac371b28ccc7/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:bbc8c8650c6e51041ad1be191742b8b421d05bbd3410f43fa2a00c8db87678e8", size = 193706, upload-time = "2026-03-15T18:51:48.849Z" }, - { url = "https://files.pythonhosted.org/packages/48/15/e35e0590af254f7df984de1323640ef375df5761f615b6225ba8deb9799a/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22c6f0c2fbc31e76c3b8a86fba1a56eda6166e238c29cdd3d14befdb4a4e4815", size = 202706, upload-time = "2026-03-15T18:51:50.257Z" }, - { url = "https://files.pythonhosted.org/packages/5e/bd/f736f7b9cc5e93a18b794a50346bb16fbfd6b37f99e8f306f7951d27c17c/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7edbed096e4a4798710ed6bc75dcaa2a21b68b6c356553ac4823c3658d53743a", size = 202497, upload-time = "2026-03-15T18:51:52.012Z" }, - { url = "https://files.pythonhosted.org/packages/9d/ba/2cc9e3e7dfdf7760a6ed8da7446d22536f3d0ce114ac63dee2a5a3599e62/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7f9019c9cb613f084481bd6a100b12e1547cf2efe362d873c2e31e4035a6fa43", size = 193511, upload-time = "2026-03-15T18:51:53.723Z" }, - { url = "https://files.pythonhosted.org/packages/9e/cb/5be49b5f776e5613be07298c80e1b02a2d900f7a7de807230595c85a8b2e/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:58c948d0d086229efc484fe2f30c2d382c86720f55cd9bc33591774348ad44e0", size = 220133, upload-time = "2026-03-15T18:51:55.333Z" }, - { url = "https://files.pythonhosted.org/packages/83/43/99f1b5dad345accb322c80c7821071554f791a95ee50c1c90041c157ae99/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:419a9d91bd238052642a51938af8ac05da5b3343becde08d5cdeab9046df9ee1", size = 203035, upload-time = "2026-03-15T18:51:56.736Z" }, - { url = "https://files.pythonhosted.org/packages/87/9a/62c2cb6a531483b55dddff1a68b3d891a8b498f3ca555fbcf2978e804d9d/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5273b9f0b5835ff0350c0828faea623c68bfa65b792720c453e22b25cc72930f", size = 216321, upload-time = "2026-03-15T18:51:58.17Z" }, - { url = "https://files.pythonhosted.org/packages/6e/79/94a010ff81e3aec7c293eb82c28f930918e517bc144c9906a060844462eb/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0e901eb1049fdb80f5bd11ed5ea1e498ec423102f7a9b9e4645d5b8204ff2815", size = 208973, upload-time = "2026-03-15T18:51:59.998Z" }, - { url = "https://files.pythonhosted.org/packages/2a/57/4ecff6d4ec8585342f0c71bc03efaa99cb7468f7c91a57b105bcd561cea8/charset_normalizer-3.4.6-cp314-cp314-win32.whl", hash = "sha256:b4ff1d35e8c5bd078be89349b6f3a845128e685e751b6ea1169cf2160b344c4d", size = 144610, upload-time = "2026-03-15T18:52:02.213Z" }, - { url = "https://files.pythonhosted.org/packages/80/94/8434a02d9d7f168c25767c64671fead8d599744a05d6a6c877144c754246/charset_normalizer-3.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:74119174722c4349af9708993118581686f343adc1c8c9c007d59be90d077f3f", size = 154962, upload-time = "2026-03-15T18:52:03.658Z" }, - { url = "https://files.pythonhosted.org/packages/46/4c/48f2cdbfd923026503dfd67ccea45c94fd8fe988d9056b468579c66ed62b/charset_normalizer-3.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:e5bcc1a1ae744e0bb59641171ae53743760130600da8db48cbb6e4918e186e4e", size = 143595, upload-time = "2026-03-15T18:52:05.123Z" }, - { url = "https://files.pythonhosted.org/packages/31/93/8878be7569f87b14f1d52032946131bcb6ebbd8af3e20446bc04053dc3f1/charset_normalizer-3.4.6-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ad8faf8df23f0378c6d527d8b0b15ea4a2e23c89376877c598c4870d1b2c7866", size = 314828, upload-time = "2026-03-15T18:52:06.831Z" }, - { url = "https://files.pythonhosted.org/packages/06/b6/fae511ca98aac69ecc35cde828b0a3d146325dd03d99655ad38fc2cc3293/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5ea69428fa1b49573eef0cc44a1d43bebd45ad0c611eb7d7eac760c7ae771bc", size = 208138, upload-time = "2026-03-15T18:52:08.239Z" }, - { url = "https://files.pythonhosted.org/packages/54/57/64caf6e1bf07274a1e0b7c160a55ee9e8c9ec32c46846ce59b9c333f7008/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:06a7e86163334edfc5d20fe104db92fcd666e5a5df0977cb5680a506fe26cc8e", size = 224679, upload-time = "2026-03-15T18:52:10.043Z" }, - { url = "https://files.pythonhosted.org/packages/aa/cb/9ff5a25b9273ef160861b41f6937f86fae18b0792fe0a8e75e06acb08f1d/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e1f6e2f00a6b8edb562826e4632e26d063ac10307e80f7461f7de3ad8ef3f077", size = 223475, upload-time = "2026-03-15T18:52:11.854Z" }, - { url = "https://files.pythonhosted.org/packages/fc/97/440635fc093b8d7347502a377031f9605a1039c958f3cd18dcacffb37743/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b52c68d64c1878818687a473a10547b3292e82b6f6fe483808fb1468e2f52f", size = 215230, upload-time = "2026-03-15T18:52:13.325Z" }, - { url = "https://files.pythonhosted.org/packages/cd/24/afff630feb571a13f07c8539fbb502d2ab494019492aaffc78ef41f1d1d0/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:7504e9b7dc05f99a9bbb4525c67a2c155073b44d720470a148b34166a69c054e", size = 199045, upload-time = "2026-03-15T18:52:14.752Z" }, - { url = "https://files.pythonhosted.org/packages/e5/17/d1399ecdaf7e0498c327433e7eefdd862b41236a7e484355b8e0e5ebd64b/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:172985e4ff804a7ad08eebec0a1640ece87ba5041d565fff23c8f99c1f389484", size = 211658, upload-time = "2026-03-15T18:52:16.278Z" }, - { url = "https://files.pythonhosted.org/packages/b5/38/16baa0affb957b3d880e5ac2144caf3f9d7de7bc4a91842e447fbb5e8b67/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4be9f4830ba8741527693848403e2c457c16e499100963ec711b1c6f2049b7c7", size = 210769, upload-time = "2026-03-15T18:52:17.782Z" }, - { url = "https://files.pythonhosted.org/packages/05/34/c531bc6ac4c21da9ddfddb3107be2287188b3ea4b53b70fc58f2a77ac8d8/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:79090741d842f564b1b2827c0b82d846405b744d31e84f18d7a7b41c20e473ff", size = 201328, upload-time = "2026-03-15T18:52:19.553Z" }, - { url = "https://files.pythonhosted.org/packages/fa/73/a5a1e9ca5f234519c1953608a03fe109c306b97fdfb25f09182babad51a7/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:87725cfb1a4f1f8c2fc9890ae2f42094120f4b44db9360be5d99a4c6b0e03a9e", size = 225302, upload-time = "2026-03-15T18:52:21.043Z" }, - { url = "https://files.pythonhosted.org/packages/ba/f6/cd782923d112d296294dea4bcc7af5a7ae0f86ab79f8fefbda5526b6cfc0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fcce033e4021347d80ed9c66dcf1e7b1546319834b74445f561d2e2221de5659", size = 211127, upload-time = "2026-03-15T18:52:22.491Z" }, - { url = "https://files.pythonhosted.org/packages/0e/c5/0b6898950627af7d6103a449b22320372c24c6feda91aa24e201a478d161/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ca0276464d148c72defa8bb4390cce01b4a0e425f3b50d1435aa6d7a18107602", size = 222840, upload-time = "2026-03-15T18:52:24.113Z" }, - { url = "https://files.pythonhosted.org/packages/7d/25/c4bba773bef442cbdc06111d40daa3de5050a676fa26e85090fc54dd12f0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:197c1a244a274bb016dd8b79204850144ef77fe81c5b797dc389327adb552407", size = 216890, upload-time = "2026-03-15T18:52:25.541Z" }, - { url = "https://files.pythonhosted.org/packages/35/1a/05dacadb0978da72ee287b0143097db12f2e7e8d3ffc4647da07a383b0b7/charset_normalizer-3.4.6-cp314-cp314t-win32.whl", hash = "sha256:2a24157fa36980478dd1770b585c0f30d19e18f4fb0c47c13aa568f871718579", size = 155379, upload-time = "2026-03-15T18:52:27.05Z" }, - { url = "https://files.pythonhosted.org/packages/5d/7a/d269d834cb3a76291651256f3b9a5945e81d0a49ab9f4a498964e83c0416/charset_normalizer-3.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:cd5e2801c89992ed8c0a3f0293ae83c159a60d9a5d685005383ef4caca77f2c4", size = 169043, upload-time = "2026-03-15T18:52:28.502Z" }, - { url = "https://files.pythonhosted.org/packages/23/06/28b29fba521a37a8932c6a84192175c34d49f84a6d4773fa63d05f9aff22/charset_normalizer-3.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:47955475ac79cc504ef2704b192364e51d0d473ad452caedd0002605f780101c", size = 148523, upload-time = "2026-03-15T18:52:29.956Z" }, - { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" }, +version = "3.4.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/08/0f303cb0b529e456bb116f2d50565a482694fbb94340bf56d44677e7ed03/charset_normalizer-3.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d", size = 315182, upload-time = "2026-04-02T09:25:40.673Z" }, + { url = "https://files.pythonhosted.org/packages/24/47/b192933e94b546f1b1fe4df9cc1f84fcdbf2359f8d1081d46dd029b50207/charset_normalizer-3.4.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8", size = 209329, upload-time = "2026-04-02T09:25:42.354Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b4/01fa81c5ca6141024d89a8fc15968002b71da7f825dd14113207113fabbd/charset_normalizer-3.4.7-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790", size = 231230, upload-time = "2026-04-02T09:25:44.281Z" }, + { url = "https://files.pythonhosted.org/packages/20/f7/7b991776844dfa058017e600e6e55ff01984a063290ca5622c0b63162f68/charset_normalizer-3.4.7-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc", size = 225890, upload-time = "2026-04-02T09:25:45.475Z" }, + { url = "https://files.pythonhosted.org/packages/20/e7/bed0024a0f4ab0c8a9c64d4445f39b30c99bd1acd228291959e3de664247/charset_normalizer-3.4.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393", size = 216930, upload-time = "2026-04-02T09:25:46.58Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ab/b18f0ab31cdd7b3ddb8bb76c4a414aeb8160c9810fdf1bc62f269a539d87/charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153", size = 202109, upload-time = "2026-04-02T09:25:48.031Z" }, + { url = "https://files.pythonhosted.org/packages/82/e5/7e9440768a06dfb3075936490cb82dbf0ee20a133bf0dd8551fa096914ec/charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af", size = 214684, upload-time = "2026-04-02T09:25:49.245Z" }, + { url = "https://files.pythonhosted.org/packages/71/94/8c61d8da9f062fdf457c80acfa25060ec22bf1d34bbeaca4350f13bcfd07/charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34", size = 212785, upload-time = "2026-04-02T09:25:50.671Z" }, + { url = "https://files.pythonhosted.org/packages/66/cd/6e9889c648e72c0ab2e5967528bb83508f354d706637bc7097190c874e13/charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1", size = 203055, upload-time = "2026-04-02T09:25:51.802Z" }, + { url = "https://files.pythonhosted.org/packages/92/2e/7a951d6a08aefb7eb8e1b54cdfb580b1365afdd9dd484dc4bee9e5d8f258/charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752", size = 232502, upload-time = "2026-04-02T09:25:53.388Z" }, + { url = "https://files.pythonhosted.org/packages/58/d5/abcf2d83bf8e0a1286df55cd0dc1d49af0da4282aa77e986df343e7de124/charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53", size = 214295, upload-time = "2026-04-02T09:25:54.765Z" }, + { url = "https://files.pythonhosted.org/packages/47/3a/7d4cd7ed54be99973a0dc176032cba5cb1f258082c31fa6df35cff46acfc/charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616", size = 227145, upload-time = "2026-04-02T09:25:55.904Z" }, + { url = "https://files.pythonhosted.org/packages/1d/98/3a45bf8247889cf28262ebd3d0872edff11565b2a1e3064ccb132db3fbb0/charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a", size = 218884, upload-time = "2026-04-02T09:25:57.074Z" }, + { url = "https://files.pythonhosted.org/packages/ad/80/2e8b7f8915ed5c9ef13aa828d82738e33888c485b65ebf744d615040c7ea/charset_normalizer-3.4.7-cp310-cp310-win32.whl", hash = "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374", size = 148343, upload-time = "2026-04-02T09:25:58.199Z" }, + { url = "https://files.pythonhosted.org/packages/35/1b/3b8c8c77184af465ee9ad88b5aea46ea6b2e1f7b9dc9502891e37af21e30/charset_normalizer-3.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943", size = 159174, upload-time = "2026-04-02T09:25:59.322Z" }, + { url = "https://files.pythonhosted.org/packages/be/c1/feb40dca40dbb21e0a908801782d9288c64fc8d8e562c2098e9994c8c21b/charset_normalizer-3.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008", size = 147805, upload-time = "2026-04-02T09:26:00.756Z" }, + { url = "https://files.pythonhosted.org/packages/c2/d7/b5b7020a0565c2e9fa8c09f4b5fa6232feb326b8c20081ccded47ea368fd/charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7", size = 309705, upload-time = "2026-04-02T09:26:02.191Z" }, + { url = "https://files.pythonhosted.org/packages/5a/53/58c29116c340e5456724ecd2fff4196d236b98f3da97b404bc5e51ac3493/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7", size = 206419, upload-time = "2026-04-02T09:26:03.583Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/e8146dc6591a37a00e5144c63f29fb7c97a734ea8a111190783c0e60ab63/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e", size = 227901, upload-time = "2026-04-02T09:26:04.738Z" }, + { url = "https://files.pythonhosted.org/packages/fb/73/77486c4cd58f1267bf17db420e930c9afa1b3be3fe8c8b8ebbebc9624359/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c", size = 222742, upload-time = "2026-04-02T09:26:06.36Z" }, + { url = "https://files.pythonhosted.org/packages/a1/fa/f74eb381a7d94ded44739e9d94de18dc5edc9c17fb8c11f0a6890696c0a9/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df", size = 214061, upload-time = "2026-04-02T09:26:08.347Z" }, + { url = "https://files.pythonhosted.org/packages/dc/92/42bd3cefcf7687253fb86694b45f37b733c97f59af3724f356fa92b8c344/charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265", size = 199239, upload-time = "2026-04-02T09:26:09.823Z" }, + { url = "https://files.pythonhosted.org/packages/4c/3d/069e7184e2aa3b3cddc700e3dd267413dc259854adc3380421c805c6a17d/charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4", size = 210173, upload-time = "2026-04-02T09:26:10.953Z" }, + { url = "https://files.pythonhosted.org/packages/62/51/9d56feb5f2e7074c46f93e0ebdbe61f0848ee246e2f0d89f8e20b89ebb8f/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e", size = 209841, upload-time = "2026-04-02T09:26:12.142Z" }, + { url = "https://files.pythonhosted.org/packages/d2/59/893d8f99cc4c837dda1fe2f1139079703deb9f321aabcb032355de13b6c7/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38", size = 200304, upload-time = "2026-04-02T09:26:13.711Z" }, + { url = "https://files.pythonhosted.org/packages/7d/1d/ee6f3be3464247578d1ed5c46de545ccc3d3ff933695395c402c21fa6b77/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c", size = 229455, upload-time = "2026-04-02T09:26:14.941Z" }, + { url = "https://files.pythonhosted.org/packages/54/bb/8fb0a946296ea96a488928bdce8ef99023998c48e4713af533e9bb98ef07/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b", size = 210036, upload-time = "2026-04-02T09:26:16.478Z" }, + { url = "https://files.pythonhosted.org/packages/9a/bc/015b2387f913749f82afd4fcba07846d05b6d784dd16123cb66860e0237d/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c", size = 224739, upload-time = "2026-04-02T09:26:17.751Z" }, + { url = "https://files.pythonhosted.org/packages/17/ab/63133691f56baae417493cba6b7c641571a2130eb7bceba6773367ab9ec5/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d", size = 216277, upload-time = "2026-04-02T09:26:18.981Z" }, + { url = "https://files.pythonhosted.org/packages/06/6d/3be70e827977f20db77c12a97e6a9f973631a45b8d186c084527e53e77a4/charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad", size = 147819, upload-time = "2026-04-02T09:26:20.295Z" }, + { url = "https://files.pythonhosted.org/packages/20/d9/5f67790f06b735d7c7637171bbfd89882ad67201891b7275e51116ed8207/charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00", size = 159281, upload-time = "2026-04-02T09:26:21.74Z" }, + { url = "https://files.pythonhosted.org/packages/ca/83/6413f36c5a34afead88ce6f66684d943d91f233d76dd083798f9602b75ae/charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1", size = 147843, upload-time = "2026-04-02T09:26:22.901Z" }, + { url = "https://files.pythonhosted.org/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328, upload-time = "2026-04-02T09:26:24.331Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061, upload-time = "2026-04-02T09:26:25.568Z" }, + { url = "https://files.pythonhosted.org/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031, upload-time = "2026-04-02T09:26:26.865Z" }, + { url = "https://files.pythonhosted.org/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239, upload-time = "2026-04-02T09:26:28.044Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589, upload-time = "2026-04-02T09:26:29.239Z" }, + { url = "https://files.pythonhosted.org/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733, upload-time = "2026-04-02T09:26:30.5Z" }, + { url = "https://files.pythonhosted.org/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652, upload-time = "2026-04-02T09:26:31.709Z" }, + { url = "https://files.pythonhosted.org/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229, upload-time = "2026-04-02T09:26:33.282Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552, upload-time = "2026-04-02T09:26:34.845Z" }, + { url = "https://files.pythonhosted.org/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806, upload-time = "2026-04-02T09:26:36.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316, upload-time = "2026-04-02T09:26:37.672Z" }, + { url = "https://files.pythonhosted.org/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274, upload-time = "2026-04-02T09:26:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468, upload-time = "2026-04-02T09:26:40.17Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460, upload-time = "2026-04-02T09:26:41.416Z" }, + { url = "https://files.pythonhosted.org/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330, upload-time = "2026-04-02T09:26:42.554Z" }, + { url = "https://files.pythonhosted.org/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828, upload-time = "2026-04-02T09:26:44.075Z" }, + { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" }, + { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" }, + { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" }, + { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" }, + { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" }, + { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" }, + { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" }, + { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" }, + { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" }, + { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" }, + { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" }, + { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" }, + { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" }, + { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" }, + { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234, upload-time = "2026-04-02T09:27:07.194Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042, upload-time = "2026-04-02T09:27:08.749Z" }, + { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706, upload-time = "2026-04-02T09:27:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727, upload-time = "2026-04-02T09:27:11.175Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882, upload-time = "2026-04-02T09:27:12.446Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860, upload-time = "2026-04-02T09:27:13.721Z" }, + { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564, upload-time = "2026-04-02T09:27:15.272Z" }, + { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276, upload-time = "2026-04-02T09:27:16.834Z" }, + { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238, upload-time = "2026-04-02T09:27:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189, upload-time = "2026-04-02T09:27:19.445Z" }, + { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352, upload-time = "2026-04-02T09:27:20.79Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024, upload-time = "2026-04-02T09:27:22.063Z" }, + { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869, upload-time = "2026-04-02T09:27:23.486Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541, upload-time = "2026-04-02T09:27:25.146Z" }, + { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634, upload-time = "2026-04-02T09:27:26.642Z" }, + { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384, upload-time = "2026-04-02T09:27:28.271Z" }, + { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133, upload-time = "2026-04-02T09:27:29.474Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257, upload-time = "2026-04-02T09:27:30.793Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851, upload-time = "2026-04-02T09:27:32.44Z" }, + { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393, upload-time = "2026-04-02T09:27:34.03Z" }, + { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251, upload-time = "2026-04-02T09:27:35.369Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609, upload-time = "2026-04-02T09:27:36.661Z" }, + { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014, upload-time = "2026-04-02T09:27:38.019Z" }, + { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979, upload-time = "2026-04-02T09:27:39.37Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238, upload-time = "2026-04-02T09:27:40.722Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110, upload-time = "2026-04-02T09:27:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824, upload-time = "2026-04-02T09:27:43.924Z" }, + { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103, upload-time = "2026-04-02T09:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194, upload-time = "2026-04-02T09:27:46.706Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827, upload-time = "2026-04-02T09:27:48.053Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168, upload-time = "2026-04-02T09:27:49.795Z" }, + { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018, upload-time = "2026-04-02T09:27:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, ] [[package]] name = "click" -version = "8.3.1" +version = "8.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/63/f9e1ea081ce35720d8b92acde70daaedace594dc93b693c869e0d5910718/click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2", size = 328061, upload-time = "2026-04-22T15:11:27.506Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, + { url = "https://files.pythonhosted.org/packages/ae/44/c1221527f6a71a01ec6fbad7fa78f1d50dfa02217385cf0fa3eec7087d59/click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613", size = 110502, upload-time = "2026-04-22T15:11:25.044Z" }, ] [[package]] @@ -519,10 +516,10 @@ dev = [ { name = "jsonpickle" }, { name = "nox" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "orjson" }, { name = "pandas", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pandas", version = "3.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "polars" }, { name = "python-dateutil" }, { name = "pytz" }, @@ -573,7 +570,7 @@ requires-dist = [ { name = "pandas", marker = "python_full_version < '3.11' and extra == 'dev'", specifier = "~=2.2.0" }, { name = "polars", marker = "extra == 'dev'", specifier = "~=1.39.3" }, { name = "pydantic", marker = "extra == 'static'", specifier = "~=2.12.5" }, - { name = "pytest", marker = "extra == 'test'", specifier = "~=9.0.2" }, + { name = "pytest", marker = "extra == 'test'", specifier = "~=9.0.3" }, { name = "pytest-benchmark", marker = "extra == 'test'", specifier = "~=5.2.3" }, { name = "pytest-cov", marker = "extra == 'test'", specifier = "~=7.1.0" }, { name = "python-dateutil", marker = "extra == 'dev'", specifier = "~=2.9.0.post0" }, @@ -643,11 +640,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.25.2" +version = "3.29.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" }, + { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, ] [[package]] @@ -712,11 +709,11 @@ wheels = [ [[package]] name = "idna" -version = "3.11" +version = "3.15" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, ] [[package]] @@ -743,9 +740,8 @@ version = "0.13.13" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "decorator" }, - { name = "ipython", version = "8.38.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "ipython", version = "9.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" }, - { name = "ipython", version = "9.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/3d/1b/7e07e7b752017f7693a0f4d41c13e5ca29ce8cbcfdcc1fd6c4ad8c0a27a0/ipdb-0.13.13.tar.gz", hash = "sha256:e3ac6018ef05126d442af680aad863006ec19d02290561ac88b8b1c0b0cfc726", size = 17042, upload-time = "2023-03-09T15:40:57.487Z" } @@ -755,7 +751,7 @@ wheels = [ [[package]] name = "ipython" -version = "8.38.0" +version = "8.39.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11'", @@ -773,65 +769,40 @@ dependencies = [ { name = "traitlets", marker = "python_full_version < '3.11'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/61/1810830e8b93c72dcd3c0f150c80a00c3deb229562d9423807ec92c3a539/ipython-8.38.0.tar.gz", hash = "sha256:9cfea8c903ce0867cc2f23199ed8545eb741f3a69420bfcf3743ad1cec856d39", size = 5513996, upload-time = "2026-01-05T10:59:06.901Z" } +sdist = { url = "https://files.pythonhosted.org/packages/40/18/f8598d287006885e7136451fdea0755af4ebcbfe342836f24deefaed1164/ipython-8.39.0.tar.gz", hash = "sha256:4110ae96012c379b8b6db898a07e186c40a2a1ef5d57a7fa83166047d9da7624", size = 5513971, upload-time = "2026-03-27T10:02:13.94Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/df/db59624f4c71b39717c423409950ac3f2c8b2ce4b0aac843112c7fb3f721/ipython-8.38.0-py3-none-any.whl", hash = "sha256:750162629d800ac65bb3b543a14e7a74b0e88063eac9b92124d4b2aa3f6d8e86", size = 831813, upload-time = "2026-01-05T10:59:04.239Z" }, + { url = "https://files.pythonhosted.org/packages/c0/56/4cc7fc9e9e3f38fd324f24f8afe0ad8bb5fa41283f37f1aaf9de0612c968/ipython-8.39.0-py3-none-any.whl", hash = "sha256:bb3c51c4fa8148ab1dea07a79584d1c854e234ea44aa1283bcb37bc75054651f", size = 831849, upload-time = "2026-03-27T10:02:07.846Z" }, ] [[package]] name = "ipython" -version = "9.10.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.11.*' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", -] -dependencies = [ - { name = "colorama", marker = "python_full_version == '3.11.*' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version == '3.11.*'" }, - { name = "ipython-pygments-lexers", marker = "python_full_version == '3.11.*'" }, - { name = "jedi", marker = "python_full_version == '3.11.*'" }, - { name = "matplotlib-inline", marker = "python_full_version == '3.11.*'" }, - { name = "pexpect", marker = "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version == '3.11.*'" }, - { name = "pygments", marker = "python_full_version == '3.11.*'" }, - { name = "stack-data", marker = "python_full_version == '3.11.*'" }, - { name = "traitlets", marker = "python_full_version == '3.11.*'" }, - { name = "typing-extensions", marker = "python_full_version == '3.11.*'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a6/60/2111715ea11f39b1535bed6024b7dec7918b71e5e5d30855a5b503056b50/ipython-9.10.0.tar.gz", hash = "sha256:cd9e656be97618a0676d058134cd44e6dc7012c0e5cb36a9ce96a8c904adaf77", size = 4426526, upload-time = "2026-02-02T10:00:33.594Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/aa/898dec789a05731cd5a9f50605b7b44a72bd198fd0d4528e11fc610177cc/ipython-9.10.0-py3-none-any.whl", hash = "sha256:c6ab68cc23bba8c7e18e9b932797014cc61ea7fd6f19de180ab9ba73e65ee58d", size = 622774, upload-time = "2026-02-02T10:00:31.503Z" }, -] - -[[package]] -name = "ipython" -version = "9.11.0" +version = "9.13.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", "python_full_version >= '3.14' and sys_platform == 'emscripten'", "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] dependencies = [ - { name = "colorama", marker = "python_full_version >= '3.12' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version >= '3.12'" }, - { name = "ipython-pygments-lexers", marker = "python_full_version >= '3.12'" }, - { name = "jedi", marker = "python_full_version >= '3.12'" }, - { name = "matplotlib-inline", marker = "python_full_version >= '3.12'" }, - { name = "pexpect", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version >= '3.12'" }, - { name = "pygments", marker = "python_full_version >= '3.12'" }, - { name = "stack-data", marker = "python_full_version >= '3.12'" }, - { name = "traitlets", marker = "python_full_version >= '3.12'" }, + { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version >= '3.11'" }, + { name = "ipython-pygments-lexers", marker = "python_full_version >= '3.11'" }, + { name = "jedi", marker = "python_full_version >= '3.11'" }, + { name = "matplotlib-inline", marker = "python_full_version >= '3.11'" }, + { name = "pexpect", marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version >= '3.11'" }, + { name = "psutil", marker = "python_full_version >= '3.11'" }, + { name = "pygments", marker = "python_full_version >= '3.11'" }, + { name = "stack-data", marker = "python_full_version >= '3.11'" }, + { name = "traitlets", marker = "python_full_version >= '3.11'" }, + { name = "typing-extensions", marker = "python_full_version == '3.11.*'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/86/28/a4698eda5a8928a45d6b693578b135b753e14fa1c2b36ee9441e69a45576/ipython-9.11.0.tar.gz", hash = "sha256:2a94bc4406b22ecc7e4cb95b98450f3ea493a76bec8896cda11b78d7752a6667", size = 4427354, upload-time = "2026-03-05T08:57:30.549Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/c4/87cda5842cf5c31837c06ddb588e11c3c35d8ece89b7a0108c06b8c9b00a/ipython-9.13.0.tar.gz", hash = "sha256:7e834b6afc99f020e3f05966ced34792f40267d64cb1ea9043886dab0dde5967", size = 4430549, upload-time = "2026-04-24T12:24:55.221Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/90/45c72becc57158facc6a6404f663b77bbcea2519ca57f760e2879ae1315d/ipython-9.11.0-py3-none-any.whl", hash = "sha256:6922d5bcf944c6e525a76a0a304451b60a2b6f875e86656d8bc2dfda5d710e19", size = 624222, upload-time = "2026-03-05T08:57:28.94Z" }, + { url = "https://files.pythonhosted.org/packages/b9/86/3060e8029b7cc505cce9a0137431dda81d0a3fde93a8f0f50ee0bf37a795/ipython-9.13.0-py3-none-any.whl", hash = "sha256:57f9d4639e20818d328d287c7b549af3d05f12486ea8f2e7f73e52a36ec4d201", size = 627274, upload-time = "2026-04-24T12:24:53.038Z" }, ] [[package]] @@ -848,14 +819,14 @@ wheels = [ [[package]] name = "jedi" -version = "0.19.2" +version = "0.20.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "parso" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } +sdist = { url = "https://files.pythonhosted.org/packages/46/b7/a3635f6a2d7cf5b5dd98064fc1d5fbbafcb25477bcea204a3a92145d158b/jedi-0.20.0.tar.gz", hash = "sha256:c3f4ccbd276696f4b19c54618d4fb18f9fc24b0aef02acf704b23f487daa1011", size = 3119416, upload-time = "2026-05-01T23:38:47.814Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, + { url = "https://files.pythonhosted.org/packages/9a/93/242e2eab5fe682ffcb8b0084bde703a41d51e17ee0f3a31ff0d9d813620a/jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67", size = 4884812, upload-time = "2026-05-01T23:38:43.919Z" }, ] [[package]] @@ -966,14 +937,14 @@ wheels = [ [[package]] name = "matplotlib-inline" -version = "0.2.1" +version = "0.2.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "traitlets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c7/74/97e72a36efd4ae2bccb3463284300f8953f199b5ffbc04cbbb0ec78f74b1/matplotlib_inline-0.2.1.tar.gz", hash = "sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe", size = 8110, upload-time = "2025-10-23T09:00:22.126Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/c0/9f7c9a46090390368a4d7bcb76bb87a4a36c421e4c0792cdb53486ffac7a/matplotlib_inline-0.2.2.tar.gz", hash = "sha256:72f3fe8fce36b70d4a5b612f899090cd0401deddc4ea90e1572b9f4bfb058c79", size = 8150, upload-time = "2026-05-08T17:33:33.49Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" }, + { url = "https://files.pythonhosted.org/packages/41/09/5b161152e2d90f7b87f781c2e1267494aef9c32498df793f73ad0a0a494a/matplotlib_inline-0.2.2-py3-none-any.whl", hash = "sha256:3c821cf1c209f59fb2d2d64abbf5b23b67bcb2210d663f9918dd851c6da1fcf6", size = 9534, upload-time = "2026-05-08T17:33:32.055Z" }, ] [[package]] @@ -1009,12 +980,9 @@ name = "numpy" version = "2.2.6" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", "python_full_version < '3.11'", ] sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } @@ -1077,86 +1045,86 @@ wheels = [ [[package]] name = "numpy" -version = "2.4.3" +version = "2.4.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", "python_full_version >= '3.14' and sys_platform == 'emscripten'", "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] -sdist = { url = "https://files.pythonhosted.org/packages/10/8b/c265f4823726ab832de836cdd184d0986dcf94480f81e8739692a7ac7af2/numpy-2.4.3.tar.gz", hash = "sha256:483a201202b73495f00dbc83796c6ae63137a9bdade074f7648b3e32613412dd", size = 20727743, upload-time = "2026-03-09T07:58:53.426Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/51/5093a2df15c4dc19da3f79d1021e891f5dcf1d9d1db6ba38891d5590f3fe/numpy-2.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33b3bf58ee84b172c067f56aeadc7ee9ab6de69c5e800ab5b10295d54c581adb", size = 16957183, upload-time = "2026-03-09T07:55:57.774Z" }, - { url = "https://files.pythonhosted.org/packages/b5/7c/c061f3de0630941073d2598dc271ac2f6cbcf5c83c74a5870fea07488333/numpy-2.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ba7b51e71c05aa1f9bc3641463cd82308eab40ce0d5c7e1fd4038cbf9938147", size = 14968734, upload-time = "2026-03-09T07:56:00.494Z" }, - { url = "https://files.pythonhosted.org/packages/ef/27/d26c85cbcd86b26e4f125b0668e7a7c0542d19dd7d23ee12e87b550e95b5/numpy-2.4.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a1988292870c7cb9d0ebb4cc96b4d447513a9644801de54606dc7aabf2b7d920", size = 5475288, upload-time = "2026-03-09T07:56:02.857Z" }, - { url = "https://files.pythonhosted.org/packages/2b/09/3c4abbc1dcd8010bf1a611d174c7aa689fc505585ec806111b4406f6f1b1/numpy-2.4.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:23b46bb6d8ecb68b58c09944483c135ae5f0e9b8d8858ece5e4ead783771d2a9", size = 6805253, upload-time = "2026-03-09T07:56:04.53Z" }, - { url = "https://files.pythonhosted.org/packages/21/bc/e7aa3f6817e40c3f517d407742337cbb8e6fc4b83ce0b55ab780c829243b/numpy-2.4.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a016db5c5dba78fa8fe9f5d80d6708f9c42ab087a739803c0ac83a43d686a470", size = 15969479, upload-time = "2026-03-09T07:56:06.638Z" }, - { url = "https://files.pythonhosted.org/packages/78/51/9f5d7a41f0b51649ddf2f2320595e15e122a40610b233d51928dd6c92353/numpy-2.4.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:715de7f82e192e8cae5a507a347d97ad17598f8e026152ca97233e3666daaa71", size = 16901035, upload-time = "2026-03-09T07:56:09.405Z" }, - { url = "https://files.pythonhosted.org/packages/64/6e/b221dd847d7181bc5ee4857bfb026182ef69499f9305eb1371cbb1aea626/numpy-2.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2ddb7919366ee468342b91dea2352824c25b55814a987847b6c52003a7c97f15", size = 17325657, upload-time = "2026-03-09T07:56:12.067Z" }, - { url = "https://files.pythonhosted.org/packages/eb/b8/8f3fd2da596e1063964b758b5e3c970aed1949a05200d7e3d46a9d46d643/numpy-2.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a315e5234d88067f2d97e1f2ef670a7569df445d55400f1e33d117418d008d52", size = 18635512, upload-time = "2026-03-09T07:56:14.629Z" }, - { url = "https://files.pythonhosted.org/packages/5c/24/2993b775c37e39d2f8ab4125b44337ab0b2ba106c100980b7c274a22bee7/numpy-2.4.3-cp311-cp311-win32.whl", hash = "sha256:2b3f8d2c4589b1a2028d2a770b0fc4d1f332fb5e01521f4de3199a896d158ddd", size = 6238100, upload-time = "2026-03-09T07:56:17.243Z" }, - { url = "https://files.pythonhosted.org/packages/76/1d/edccf27adedb754db7c4511d5eac8b83f004ae948fe2d3509e8b78097d4c/numpy-2.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:77e76d932c49a75617c6d13464e41203cd410956614d0a0e999b25e9e8d27eec", size = 12609816, upload-time = "2026-03-09T07:56:19.089Z" }, - { url = "https://files.pythonhosted.org/packages/92/82/190b99153480076c8dce85f4cfe7d53ea84444145ffa54cb58dcd460d66b/numpy-2.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:eb610595dd91560905c132c709412b512135a60f1851ccbd2c959e136431ff67", size = 10485757, upload-time = "2026-03-09T07:56:21.753Z" }, - { url = "https://files.pythonhosted.org/packages/a9/ed/6388632536f9788cea23a3a1b629f25b43eaacd7d7377e5d6bc7b9deb69b/numpy-2.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:61b0cbabbb6126c8df63b9a3a0c4b1f44ebca5e12ff6997b80fcf267fb3150ef", size = 16669628, upload-time = "2026-03-09T07:56:24.252Z" }, - { url = "https://files.pythonhosted.org/packages/74/1b/ee2abfc68e1ce728b2958b6ba831d65c62e1b13ce3017c13943f8f9b5b2e/numpy-2.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7395e69ff32526710748f92cd8c9849b361830968ea3e24a676f272653e8983e", size = 14696872, upload-time = "2026-03-09T07:56:26.991Z" }, - { url = "https://files.pythonhosted.org/packages/ba/d1/780400e915ff5638166f11ca9dc2c5815189f3d7cf6f8759a1685e586413/numpy-2.4.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:abdce0f71dcb4a00e4e77f3faf05e4616ceccfe72ccaa07f47ee79cda3b7b0f4", size = 5203489, upload-time = "2026-03-09T07:56:29.414Z" }, - { url = "https://files.pythonhosted.org/packages/0b/bb/baffa907e9da4cc34a6e556d6d90e032f6d7a75ea47968ea92b4858826c4/numpy-2.4.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:48da3a4ee1336454b07497ff7ec83903efa5505792c4e6d9bf83d99dc07a1e18", size = 6550814, upload-time = "2026-03-09T07:56:32.225Z" }, - { url = "https://files.pythonhosted.org/packages/7b/12/8c9f0c6c95f76aeb20fc4a699c33e9f827fa0d0f857747c73bb7b17af945/numpy-2.4.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32e3bef222ad6b052280311d1d60db8e259e4947052c3ae7dd6817451fc8a4c5", size = 15666601, upload-time = "2026-03-09T07:56:34.461Z" }, - { url = "https://files.pythonhosted.org/packages/bd/79/cc665495e4d57d0aa6fbcc0aa57aa82671dfc78fbf95fe733ed86d98f52a/numpy-2.4.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7dd01a46700b1967487141a66ac1a3cf0dd8ebf1f08db37d46389401512ca97", size = 16621358, upload-time = "2026-03-09T07:56:36.852Z" }, - { url = "https://files.pythonhosted.org/packages/a8/40/b4ecb7224af1065c3539f5ecfff879d090de09608ad1008f02c05c770cb3/numpy-2.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:76f0f283506c28b12bba319c0fab98217e9f9b54e6160e9c79e9f7348ba32e9c", size = 17016135, upload-time = "2026-03-09T07:56:39.337Z" }, - { url = "https://files.pythonhosted.org/packages/f7/b1/6a88e888052eed951afed7a142dcdf3b149a030ca59b4c71eef085858e43/numpy-2.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737f630a337364665aba3b5a77e56a68cc42d350edd010c345d65a3efa3addcc", size = 18345816, upload-time = "2026-03-09T07:56:42.31Z" }, - { url = "https://files.pythonhosted.org/packages/f3/8f/103a60c5f8c3d7fc678c19cd7b2476110da689ccb80bc18050efbaeae183/numpy-2.4.3-cp312-cp312-win32.whl", hash = "sha256:26952e18d82a1dbbc2f008d402021baa8d6fc8e84347a2072a25e08b46d698b9", size = 5960132, upload-time = "2026-03-09T07:56:44.851Z" }, - { url = "https://files.pythonhosted.org/packages/d7/7c/f5ee1bf6ed888494978046a809df2882aad35d414b622893322df7286879/numpy-2.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:65f3c2455188f09678355f5cae1f959a06b778bc66d535da07bf2ef20cd319d5", size = 12316144, upload-time = "2026-03-09T07:56:47.057Z" }, - { url = "https://files.pythonhosted.org/packages/71/46/8d1cb3f7a00f2fb6394140e7e6623696e54c6318a9d9691bb4904672cf42/numpy-2.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:2abad5c7fef172b3377502bde47892439bae394a71bc329f31df0fd829b41a9e", size = 10220364, upload-time = "2026-03-09T07:56:49.849Z" }, - { url = "https://files.pythonhosted.org/packages/b6/d0/1fe47a98ce0df229238b77611340aff92d52691bcbc10583303181abf7fc/numpy-2.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b346845443716c8e542d54112966383b448f4a3ba5c66409771b8c0889485dd3", size = 16665297, upload-time = "2026-03-09T07:56:52.296Z" }, - { url = "https://files.pythonhosted.org/packages/27/d9/4e7c3f0e68dfa91f21c6fb6cf839bc829ec920688b1ce7ec722b1a6202fb/numpy-2.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2629289168f4897a3c4e23dc98d6f1731f0fc0fe52fb9db19f974041e4cc12b9", size = 14691853, upload-time = "2026-03-09T07:56:54.992Z" }, - { url = "https://files.pythonhosted.org/packages/3a/66/bd096b13a87549683812b53ab211e6d413497f84e794fb3c39191948da97/numpy-2.4.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bb2e3cf95854233799013779216c57e153c1ee67a0bf92138acca0e429aefaee", size = 5198435, upload-time = "2026-03-09T07:56:57.184Z" }, - { url = "https://files.pythonhosted.org/packages/a2/2f/687722910b5a5601de2135c891108f51dfc873d8e43c8ed9f4ebb440b4a2/numpy-2.4.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:7f3408ff897f8ab07a07fbe2823d7aee6ff644c097cc1f90382511fe982f647f", size = 6546347, upload-time = "2026-03-09T07:56:59.531Z" }, - { url = "https://files.pythonhosted.org/packages/bf/ec/7971c4e98d86c564750393fab8d7d83d0a9432a9d78bb8a163a6dc59967a/numpy-2.4.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:decb0eb8a53c3b009b0962378065589685d66b23467ef5dac16cbe818afde27f", size = 15664626, upload-time = "2026-03-09T07:57:01.385Z" }, - { url = "https://files.pythonhosted.org/packages/7e/eb/7daecbea84ec935b7fc732e18f532073064a3816f0932a40a17f3349185f/numpy-2.4.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5f51900414fc9204a0e0da158ba2ac52b75656e7dce7e77fb9f84bfa343b4cc", size = 16608916, upload-time = "2026-03-09T07:57:04.008Z" }, - { url = "https://files.pythonhosted.org/packages/df/58/2a2b4a817ffd7472dca4421d9f0776898b364154e30c95f42195041dc03b/numpy-2.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6bd06731541f89cdc01b261ba2c9e037f1543df7472517836b78dfb15bd6e476", size = 17015824, upload-time = "2026-03-09T07:57:06.347Z" }, - { url = "https://files.pythonhosted.org/packages/4a/ca/627a828d44e78a418c55f82dd4caea8ea4a8ef24e5144d9e71016e52fb40/numpy-2.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22654fe6be0e5206f553a9250762c653d3698e46686eee53b399ab90da59bd92", size = 18334581, upload-time = "2026-03-09T07:57:09.114Z" }, - { url = "https://files.pythonhosted.org/packages/cd/c0/76f93962fc79955fcba30a429b62304332345f22d4daec1cb33653425643/numpy-2.4.3-cp313-cp313-win32.whl", hash = "sha256:d71e379452a2f670ccb689ec801b1218cd3983e253105d6e83780967e899d687", size = 5958618, upload-time = "2026-03-09T07:57:11.432Z" }, - { url = "https://files.pythonhosted.org/packages/b1/3c/88af0040119209b9b5cb59485fa48b76f372c73068dbf9254784b975ac53/numpy-2.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a60e17a14d640f49146cb38e3f105f571318db7826d9b6fef7e4dce758faecd", size = 12312824, upload-time = "2026-03-09T07:57:13.586Z" }, - { url = "https://files.pythonhosted.org/packages/58/ce/3d07743aced3d173f877c3ef6a454c2174ba42b584ab0b7e6d99374f51ed/numpy-2.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:c9619741e9da2059cd9c3f206110b97583c7152c1dc9f8aafd4beb450ac1c89d", size = 10221218, upload-time = "2026-03-09T07:57:16.183Z" }, - { url = "https://files.pythonhosted.org/packages/62/09/d96b02a91d09e9d97862f4fc8bfebf5400f567d8eb1fe4b0cc4795679c15/numpy-2.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7aa4e54f6469300ebca1d9eb80acd5253cdfa36f2c03d79a35883687da430875", size = 14819570, upload-time = "2026-03-09T07:57:18.564Z" }, - { url = "https://files.pythonhosted.org/packages/b5/ca/0b1aba3905fdfa3373d523b2b15b19029f4f3031c87f4066bd9d20ef6c6b/numpy-2.4.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d1b90d840b25874cf5cd20c219af10bac3667db3876d9a495609273ebe679070", size = 5326113, upload-time = "2026-03-09T07:57:21.052Z" }, - { url = "https://files.pythonhosted.org/packages/c0/63/406e0fd32fcaeb94180fd6a4c41e55736d676c54346b7efbce548b94a914/numpy-2.4.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a749547700de0a20a6718293396ec237bb38218049cfce788e08fcb716e8cf73", size = 6646370, upload-time = "2026-03-09T07:57:22.804Z" }, - { url = "https://files.pythonhosted.org/packages/b6/d0/10f7dc157d4b37af92720a196be6f54f889e90dcd30dce9dc657ed92c257/numpy-2.4.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f3c4a151a2e529adf49c1d54f0f57ff8f9b233ee4d44af623a81553ab86368", size = 15723499, upload-time = "2026-03-09T07:57:24.693Z" }, - { url = "https://files.pythonhosted.org/packages/66/f1/d1c2bf1161396629701bc284d958dc1efa3a5a542aab83cf11ee6eb4cba5/numpy-2.4.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c31dc07025123aedf7f2db9e91783df13f1776dc52c6b22c620870dc0fab22", size = 16657164, upload-time = "2026-03-09T07:57:27.676Z" }, - { url = "https://files.pythonhosted.org/packages/1a/be/cca19230b740af199ac47331a21c71e7a3d0ba59661350483c1600d28c37/numpy-2.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:148d59127ac95979d6f07e4d460f934ebdd6eed641db9c0db6c73026f2b2101a", size = 17081544, upload-time = "2026-03-09T07:57:30.664Z" }, - { url = "https://files.pythonhosted.org/packages/b9/c5/9602b0cbb703a0936fb40f8a95407e8171935b15846de2f0776e08af04c7/numpy-2.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a97cbf7e905c435865c2d939af3d93f99d18eaaa3cabe4256f4304fb51604349", size = 18380290, upload-time = "2026-03-09T07:57:33.763Z" }, - { url = "https://files.pythonhosted.org/packages/ed/81/9f24708953cd30be9ee36ec4778f4b112b45165812f2ada4cc5ea1c1f254/numpy-2.4.3-cp313-cp313t-win32.whl", hash = "sha256:be3b8487d725a77acccc9924f65fd8bce9af7fac8c9820df1049424a2115af6c", size = 6082814, upload-time = "2026-03-09T07:57:36.491Z" }, - { url = "https://files.pythonhosted.org/packages/e2/9e/52f6eaa13e1a799f0ab79066c17f7016a4a8ae0c1aefa58c82b4dab690b4/numpy-2.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1ec84fd7c8e652b0f4aaaf2e6e9cc8eaa9b1b80a537e06b2e3a2fb176eedcb26", size = 12452673, upload-time = "2026-03-09T07:57:38.281Z" }, - { url = "https://files.pythonhosted.org/packages/c4/04/b8cece6ead0b30c9fbd99bb835ad7ea0112ac5f39f069788c5558e3b1ab2/numpy-2.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:120df8c0a81ebbf5b9020c91439fccd85f5e018a927a39f624845be194a2be02", size = 10290907, upload-time = "2026-03-09T07:57:40.747Z" }, - { url = "https://files.pythonhosted.org/packages/70/ae/3936f79adebf8caf81bd7a599b90a561334a658be4dcc7b6329ebf4ee8de/numpy-2.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5884ce5c7acfae1e4e1b6fde43797d10aa506074d25b531b4f54bde33c0c31d4", size = 16664563, upload-time = "2026-03-09T07:57:43.817Z" }, - { url = "https://files.pythonhosted.org/packages/9b/62/760f2b55866b496bb1fa7da2a6db076bef908110e568b02fcfc1422e2a3a/numpy-2.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:297837823f5bc572c5f9379b0c9f3a3365f08492cbdc33bcc3af174372ebb168", size = 14702161, upload-time = "2026-03-09T07:57:46.169Z" }, - { url = "https://files.pythonhosted.org/packages/32/af/a7a39464e2c0a21526fb4fb76e346fb172ebc92f6d1c7a07c2c139cc17b1/numpy-2.4.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a111698b4a3f8dcbe54c64a7708f049355abd603e619013c346553c1fd4ca90b", size = 5208738, upload-time = "2026-03-09T07:57:48.506Z" }, - { url = "https://files.pythonhosted.org/packages/29/8c/2a0cf86a59558fa078d83805589c2de490f29ed4fb336c14313a161d358a/numpy-2.4.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:4bd4741a6a676770e0e97fe9ab2e51de01183df3dcbcec591d26d331a40de950", size = 6543618, upload-time = "2026-03-09T07:57:50.591Z" }, - { url = "https://files.pythonhosted.org/packages/aa/b8/612ce010c0728b1c363fa4ea3aa4c22fe1c5da1de008486f8c2f5cb92fae/numpy-2.4.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54f29b877279d51e210e0c80709ee14ccbbad647810e8f3d375561c45ef613dd", size = 15680676, upload-time = "2026-03-09T07:57:52.34Z" }, - { url = "https://files.pythonhosted.org/packages/a9/7e/4f120ecc54ba26ddf3dc348eeb9eb063f421de65c05fc961941798feea18/numpy-2.4.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:679f2a834bae9020f81534671c56fd0cc76dd7e5182f57131478e23d0dc59e24", size = 16613492, upload-time = "2026-03-09T07:57:54.91Z" }, - { url = "https://files.pythonhosted.org/packages/2c/86/1b6020db73be330c4b45d5c6ee4295d59cfeef0e3ea323959d053e5a6909/numpy-2.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d84f0f881cb2225c2dfd7f78a10a5645d487a496c6668d6cc39f0f114164f3d0", size = 17031789, upload-time = "2026-03-09T07:57:57.641Z" }, - { url = "https://files.pythonhosted.org/packages/07/3a/3b90463bf41ebc21d1b7e06079f03070334374208c0f9a1f05e4ae8455e7/numpy-2.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d213c7e6e8d211888cc359bab7199670a00f5b82c0978b9d1c75baf1eddbeac0", size = 18339941, upload-time = "2026-03-09T07:58:00.577Z" }, - { url = "https://files.pythonhosted.org/packages/a8/74/6d736c4cd962259fd8bae9be27363eb4883a2f9069763747347544c2a487/numpy-2.4.3-cp314-cp314-win32.whl", hash = "sha256:52077feedeff7c76ed7c9f1a0428558e50825347b7545bbb8523da2cd55c547a", size = 6007503, upload-time = "2026-03-09T07:58:03.331Z" }, - { url = "https://files.pythonhosted.org/packages/48/39/c56ef87af669364356bb011922ef0734fc49dad51964568634c72a009488/numpy-2.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:0448e7f9caefb34b4b7dd2b77f21e8906e5d6f0365ad525f9f4f530b13df2afc", size = 12444915, upload-time = "2026-03-09T07:58:06.353Z" }, - { url = "https://files.pythonhosted.org/packages/9d/1f/ab8528e38d295fd349310807496fabb7cf9fe2e1f70b97bc20a483ea9d4a/numpy-2.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:b44fd60341c4d9783039598efadd03617fa28d041fc37d22b62d08f2027fa0e7", size = 10494875, upload-time = "2026-03-09T07:58:08.734Z" }, - { url = "https://files.pythonhosted.org/packages/e6/ef/b7c35e4d5ef141b836658ab21a66d1a573e15b335b1d111d31f26c8ef80f/numpy-2.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0a195f4216be9305a73c0e91c9b026a35f2161237cf1c6de9b681637772ea657", size = 14822225, upload-time = "2026-03-09T07:58:11.034Z" }, - { url = "https://files.pythonhosted.org/packages/cd/8d/7730fa9278cf6648639946cc816e7cc89f0d891602584697923375f801ed/numpy-2.4.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:cd32fbacb9fd1bf041bf8e89e4576b6f00b895f06d00914820ae06a616bdfef7", size = 5328769, upload-time = "2026-03-09T07:58:13.67Z" }, - { url = "https://files.pythonhosted.org/packages/47/01/d2a137317c958b074d338807c1b6a383406cdf8b8e53b075d804cc3d211d/numpy-2.4.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:2e03c05abaee1f672e9d67bc858f300b5ccba1c21397211e8d77d98350972093", size = 6649461, upload-time = "2026-03-09T07:58:15.912Z" }, - { url = "https://files.pythonhosted.org/packages/5c/34/812ce12bc0f00272a4b0ec0d713cd237cb390666eb6206323d1cc9cedbb2/numpy-2.4.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d1ce23cce91fcea443320a9d0ece9b9305d4368875bab09538f7a5b4131938a", size = 15725809, upload-time = "2026-03-09T07:58:17.787Z" }, - { url = "https://files.pythonhosted.org/packages/25/c0/2aed473a4823e905e765fee3dc2cbf504bd3e68ccb1150fbdabd5c39f527/numpy-2.4.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c59020932feb24ed49ffd03704fbab89f22aa9c0d4b180ff45542fe8918f5611", size = 16655242, upload-time = "2026-03-09T07:58:20.476Z" }, - { url = "https://files.pythonhosted.org/packages/f2/c8/7e052b2fc87aa0e86de23f20e2c42bd261c624748aa8efd2c78f7bb8d8c6/numpy-2.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9684823a78a6cd6ad7511fc5e25b07947d1d5b5e2812c93fe99d7d4195130720", size = 17080660, upload-time = "2026-03-09T07:58:23.067Z" }, - { url = "https://files.pythonhosted.org/packages/f3/3d/0876746044db2adcb11549f214d104f2e1be00f07a67edbb4e2812094847/numpy-2.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0200b25c687033316fb39f0ff4e3e690e8957a2c3c8d22499891ec58c37a3eb5", size = 18380384, upload-time = "2026-03-09T07:58:25.839Z" }, - { url = "https://files.pythonhosted.org/packages/07/12/8160bea39da3335737b10308df4f484235fd297f556745f13092aa039d3b/numpy-2.4.3-cp314-cp314t-win32.whl", hash = "sha256:5e10da9e93247e554bb1d22f8edc51847ddd7dde52d85ce31024c1b4312bfba0", size = 6154547, upload-time = "2026-03-09T07:58:28.289Z" }, - { url = "https://files.pythonhosted.org/packages/42/f3/76534f61f80d74cc9cdf2e570d3d4eeb92c2280a27c39b0aaf471eda7b48/numpy-2.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:45f003dbdffb997a03da2d1d0cb41fbd24a87507fb41605c0420a3db5bd4667b", size = 12633645, upload-time = "2026-03-09T07:58:30.384Z" }, - { url = "https://files.pythonhosted.org/packages/1f/b6/7c0d4334c15983cec7f92a69e8ce9b1e6f31857e5ee3a413ac424e6bd63d/numpy-2.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:4d382735cecd7bcf090172489a525cd7d4087bc331f7df9f60ddc9a296cf208e", size = 10565454, upload-time = "2026-03-09T07:58:33.031Z" }, - { url = "https://files.pythonhosted.org/packages/64/e4/4dab9fb43c83719c29241c535d9e07be73bea4bc0c6686c5816d8e1b6689/numpy-2.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c6b124bfcafb9e8d3ed09130dbee44848c20b3e758b6bbf006e641778927c028", size = 16834892, upload-time = "2026-03-09T07:58:35.334Z" }, - { url = "https://files.pythonhosted.org/packages/c9/29/f8b6d4af90fed3dfda84ebc0df06c9833d38880c79ce954e5b661758aa31/numpy-2.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:76dbb9d4e43c16cf9aa711fcd8de1e2eeb27539dcefb60a1d5e9f12fae1d1ed8", size = 14893070, upload-time = "2026-03-09T07:58:37.7Z" }, - { url = "https://files.pythonhosted.org/packages/9a/04/a19b3c91dbec0a49269407f15d5753673a09832daed40c45e8150e6fa558/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:29363fbfa6f8ee855d7569c96ce524845e3d726d6c19b29eceec7dd555dab152", size = 5399609, upload-time = "2026-03-09T07:58:39.853Z" }, - { url = "https://files.pythonhosted.org/packages/79/34/4d73603f5420eab89ea8a67097b31364bf7c30f811d4dd84b1659c7476d9/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:bc71942c789ef415a37f0d4eab90341425a00d538cd0642445d30b41023d3395", size = 6714355, upload-time = "2026-03-09T07:58:42.365Z" }, - { url = "https://files.pythonhosted.org/packages/58/ad/1100d7229bb248394939a12a8074d485b655e8ed44207d328fdd7fcebc7b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e58765ad74dcebd3ef0208a5078fba32dc8ec3578fe84a604432950cd043d79", size = 15800434, upload-time = "2026-03-09T07:58:44.837Z" }, - { url = "https://files.pythonhosted.org/packages/0c/fd/16d710c085d28ba4feaf29ac60c936c9d662e390344f94a6beaa2ac9899b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e236dbda4e1d319d681afcbb136c0c4a8e0f1a5c58ceec2adebb547357fe857", size = 16729409, upload-time = "2026-03-09T07:58:47.972Z" }, - { url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/c6/4218570d8c8ecc9704b5157a3348e486e84ef4be0ed3e38218ab473c83d2/numpy-2.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f983334aea213c99992053ede6168500e5f086ce74fbc4acc3f2b00f5762e9db", size = 16976799, upload-time = "2026-03-29T13:18:15.438Z" }, + { url = "https://files.pythonhosted.org/packages/dd/92/b4d922c4a5f5dab9ed44e6153908a5c665b71acf183a83b93b690996e39b/numpy-2.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72944b19f2324114e9dc86a159787333b77874143efcf89a5167ef83cfee8af0", size = 14971552, upload-time = "2026-03-29T13:18:18.606Z" }, + { url = "https://files.pythonhosted.org/packages/8a/dc/df98c095978fa6ee7b9a9387d1d58cbb3d232d0e69ad169a4ce784bde4fd/numpy-2.4.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:86b6f55f5a352b48d7fbfd2dbc3d5b780b2d79f4d3c121f33eb6efb22e9a2015", size = 5476566, upload-time = "2026-03-29T13:18:21.532Z" }, + { url = "https://files.pythonhosted.org/packages/28/34/b3fdcec6e725409223dd27356bdf5a3c2cc2282e428218ecc9cb7acc9763/numpy-2.4.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:ba1f4fc670ed79f876f70082eff4f9583c15fb9a4b89d6188412de4d18ae2f40", size = 6806482, upload-time = "2026-03-29T13:18:23.634Z" }, + { url = "https://files.pythonhosted.org/packages/68/62/63417c13aa35d57bee1337c67446761dc25ea6543130cf868eace6e8157b/numpy-2.4.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a87ec22c87be071b6bdbd27920b129b94f2fc964358ce38f3822635a3e2e03d", size = 15973376, upload-time = "2026-03-29T13:18:26.677Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c5/9fcb7e0e69cef59cf10c746b84f7d58b08bc66a6b7d459783c5a4f6101a6/numpy-2.4.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df3775294accfdd75f32c74ae39fcba920c9a378a2fc18a12b6820aa8c1fb502", size = 16925137, upload-time = "2026-03-29T13:18:30.14Z" }, + { url = "https://files.pythonhosted.org/packages/7e/43/80020edacb3f84b9efdd1591120a4296462c23fd8db0dde1666f6ef66f13/numpy-2.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0d4e437e295f18ec29bc79daf55e8a47a9113df44d66f702f02a293d93a2d6dd", size = 17329414, upload-time = "2026-03-29T13:18:33.733Z" }, + { url = "https://files.pythonhosted.org/packages/fd/06/af0658593b18a5f73532d377188b964f239eb0894e664a6c12f484472f97/numpy-2.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6aa3236c78803afbcb255045fbef97a9e25a1f6c9888357d205ddc42f4d6eba5", size = 18658397, upload-time = "2026-03-29T13:18:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ce/13a09ed65f5d0ce5c7dd0669250374c6e379910f97af2c08c57b0608eee4/numpy-2.4.4-cp311-cp311-win32.whl", hash = "sha256:30caa73029a225b2d40d9fae193e008e24b2026b7ee1a867b7ee8d96ca1a448e", size = 6239499, upload-time = "2026-03-29T13:18:40.372Z" }, + { url = "https://files.pythonhosted.org/packages/bd/63/05d193dbb4b5eec1eca73822d80da98b511f8328ad4ae3ca4caf0f4db91d/numpy-2.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6bbe4eb67390b0a0265a2c25458f6b90a409d5d069f1041e6aff1e27e3d9a79e", size = 12614257, upload-time = "2026-03-29T13:18:42.95Z" }, + { url = "https://files.pythonhosted.org/packages/87/c5/8168052f080c26fa984c413305012be54741c9d0d74abd7fbeeccae3889f/numpy-2.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:fcfe2045fd2e8f3cb0ce9d4ba6dba6333b8fa05bb8a4939c908cd43322d14c7e", size = 10486775, upload-time = "2026-03-29T13:18:45.835Z" }, + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/6b/33/8fae8f964a4f63ed528264ddf25d2b683d0b663e3cba26961eb838a7c1bd/numpy-2.4.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:58c8b5929fcb8287cbd6f0a3fae19c6e03a5c48402ae792962ac465224a629a4", size = 16854491, upload-time = "2026-03-29T13:21:38.03Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d0/1aabee441380b981cf8cdda3ae7a46aa827d1b5a8cce84d14598bc94d6d9/numpy-2.4.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:eea7ac5d2dce4189771cedb559c738a71512768210dc4e4753b107a2048b3d0e", size = 14895830, upload-time = "2026-03-29T13:21:41.509Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b8/aafb0d1065416894fccf4df6b49ef22b8db045187949545bced89c034b8e/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:51fc224f7ca4d92656d5a5eb315f12eb5fe2c97a66249aa7b5f562528a3be38c", size = 5400927, upload-time = "2026-03-29T13:21:44.747Z" }, + { url = "https://files.pythonhosted.org/packages/d6/77/063baa20b08b431038c7f9ff5435540c7b7265c78cf56012a483019ca72d/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:28a650663f7314afc3e6ec620f44f333c386aad9f6fc472030865dc0ebb26ee3", size = 6715557, upload-time = "2026-03-29T13:21:47.406Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a8/379542d45a14f149444c5c4c4e7714707239ce9cc1de8c2803958889da14/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19710a9ca9992d7174e9c52f643d4272dcd1558c5f7af7f6f8190f633bd651a7", size = 15804253, upload-time = "2026-03-29T13:21:50.753Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c8/f0a45426d6d21e7ea3310a15cf90c43a14d9232c31a837702dba437f3373/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b2aec6af35c113b05695ebb5749a787acd63cafc83086a05771d1e1cd1e555f", size = 16753552, upload-time = "2026-03-29T13:21:54.344Z" }, + { url = "https://files.pythonhosted.org/packages/04/74/f4c001f4714c3ad9ce037e18cf2b9c64871a84951eaa0baf683a9ca9301c/numpy-2.4.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2cf083b324a467e1ab358c105f6cad5ea950f50524668a80c486ff1db24e119", size = 12509075, upload-time = "2026-03-29T13:21:57.644Z" }, ] [[package]] @@ -1170,92 +1138,92 @@ wheels = [ [[package]] name = "orjson" -version = "3.11.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/1a/a373746fa6d0e116dd9e54371a7b54622c44d12296d5d0f3ad5e3ff33490/orjson-3.11.7-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a02c833f38f36546ba65a452127633afce4cf0dd7296b753d3bb54e55e5c0174", size = 229140, upload-time = "2026-02-02T15:37:06.082Z" }, - { url = "https://files.pythonhosted.org/packages/52/a2/fa129e749d500f9b183e8a3446a193818a25f60261e9ce143ad61e975208/orjson-3.11.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b63c6e6738d7c3470ad01601e23376aa511e50e1f3931395b9f9c722406d1a67", size = 128670, upload-time = "2026-02-02T15:37:08.002Z" }, - { url = "https://files.pythonhosted.org/packages/08/93/1e82011cd1e0bd051ef9d35bed1aa7fb4ea1f0a055dc2c841b46b43a9ebd/orjson-3.11.7-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:043d3006b7d32c7e233b8cfb1f01c651013ea079e08dcef7189a29abd8befe11", size = 123832, upload-time = "2026-02-02T15:37:09.191Z" }, - { url = "https://files.pythonhosted.org/packages/fe/d8/a26b431ef962c7d55736674dddade876822f3e33223c1f47a36879350d04/orjson-3.11.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57036b27ac8a25d81112eb0cc9835cd4833c5b16e1467816adc0015f59e870dc", size = 129171, upload-time = "2026-02-02T15:37:11.112Z" }, - { url = "https://files.pythonhosted.org/packages/a7/19/f47819b84a580f490da260c3ee9ade214cf4cf78ac9ce8c1c758f80fdfc9/orjson-3.11.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:733ae23ada68b804b222c44affed76b39e30806d38660bf1eb200520d259cc16", size = 141967, upload-time = "2026-02-02T15:37:12.282Z" }, - { url = "https://files.pythonhosted.org/packages/5b/cd/37ece39a0777ba077fdcdbe4cccae3be8ed00290c14bf8afdc548befc260/orjson-3.11.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fdfad2093bdd08245f2e204d977facd5f871c88c4a71230d5bcbd0e43bf6222", size = 130991, upload-time = "2026-02-02T15:37:13.465Z" }, - { url = "https://files.pythonhosted.org/packages/8f/ed/f2b5d66aa9b6b5c02ff5f120efc7b38c7c4962b21e6be0f00fd99a5c348e/orjson-3.11.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cededd6738e1c153530793998e31c05086582b08315db48ab66649768f326baa", size = 133674, upload-time = "2026-02-02T15:37:14.694Z" }, - { url = "https://files.pythonhosted.org/packages/c4/6e/baa83e68d1aa09fa8c3e5b2c087d01d0a0bd45256de719ed7bc22c07052d/orjson-3.11.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:14f440c7268c8f8633d1b3d443a434bd70cb15686117ea6beff8fdc8f5917a1e", size = 138722, upload-time = "2026-02-02T15:37:16.501Z" }, - { url = "https://files.pythonhosted.org/packages/0c/47/7f8ef4963b772cd56999b535e553f7eb5cd27e9dd6c049baee6f18bfa05d/orjson-3.11.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3a2479753bbb95b0ebcf7969f562cdb9668e6d12416a35b0dda79febf89cdea2", size = 409056, upload-time = "2026-02-02T15:37:17.895Z" }, - { url = "https://files.pythonhosted.org/packages/38/eb/2df104dd2244b3618f25325a656f85cc3277f74bbd91224752410a78f3c7/orjson-3.11.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:71924496986275a737f38e3f22b4e0878882b3f7a310d2ff4dc96e812789120c", size = 144196, upload-time = "2026-02-02T15:37:19.349Z" }, - { url = "https://files.pythonhosted.org/packages/b6/2a/ee41de0aa3a6686598661eae2b4ebdff1340c65bfb17fcff8b87138aab21/orjson-3.11.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4a9eefdc70bf8bf9857f0290f973dec534ac84c35cd6a7f4083be43e7170a8f", size = 134979, upload-time = "2026-02-02T15:37:20.906Z" }, - { url = "https://files.pythonhosted.org/packages/4c/fa/92fc5d3d402b87a8b28277a9ed35386218a6a5287c7fe5ee9b9f02c53fb2/orjson-3.11.7-cp310-cp310-win32.whl", hash = "sha256:ae9e0b37a834cef7ce8f99de6498f8fad4a2c0bf6bfc3d02abd8ed56aa15b2de", size = 127968, upload-time = "2026-02-02T15:37:23.178Z" }, - { url = "https://files.pythonhosted.org/packages/07/29/a576bf36d73d60df06904d3844a9df08e25d59eba64363aaf8ec2f9bff41/orjson-3.11.7-cp310-cp310-win_amd64.whl", hash = "sha256:d772afdb22555f0c58cfc741bdae44180122b3616faa1ecadb595cd526e4c993", size = 125128, upload-time = "2026-02-02T15:37:24.329Z" }, - { url = "https://files.pythonhosted.org/packages/37/02/da6cb01fc6087048d7f61522c327edf4250f1683a58a839fdcc435746dd5/orjson-3.11.7-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9487abc2c2086e7c8eb9a211d2ce8855bae0e92586279d0d27b341d5ad76c85c", size = 228664, upload-time = "2026-02-02T15:37:25.542Z" }, - { url = "https://files.pythonhosted.org/packages/c1/c2/5885e7a5881dba9a9af51bc564e8967225a642b3e03d089289a35054e749/orjson-3.11.7-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:79cacb0b52f6004caf92405a7e1f11e6e2de8bdf9019e4f76b44ba045125cd6b", size = 125344, upload-time = "2026-02-02T15:37:26.92Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1d/4e7688de0a92d1caf600dfd5fb70b4c5bfff51dfa61ac555072ef2d0d32a/orjson-3.11.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e85fe4698b6a56d5e2ebf7ae87544d668eb6bde1ad1226c13f44663f20ec9e", size = 128404, upload-time = "2026-02-02T15:37:28.108Z" }, - { url = "https://files.pythonhosted.org/packages/2f/b2/ec04b74ae03a125db7bd69cffd014b227b7f341e3261bf75b5eb88a1aa92/orjson-3.11.7-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b8d14b71c0b12963fe8a62aac87119f1afdf4cb88a400f61ca5ae581449efcb5", size = 123677, upload-time = "2026-02-02T15:37:30.287Z" }, - { url = "https://files.pythonhosted.org/packages/4c/69/f95bdf960605f08f827f6e3291fe243d8aa9c5c9ff017a8d7232209184c3/orjson-3.11.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91c81ef070c8f3220054115e1ef468b1c9ce8497b4e526cb9f68ab4dc0a7ac62", size = 128950, upload-time = "2026-02-02T15:37:31.595Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1b/de59c57bae1d148ef298852abd31909ac3089cff370dfd4cd84cc99cbc42/orjson-3.11.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:411ebaf34d735e25e358a6d9e7978954a9c9d58cfb47bc6683cdc3964cd2f910", size = 141756, upload-time = "2026-02-02T15:37:32.985Z" }, - { url = "https://files.pythonhosted.org/packages/ee/9e/9decc59f4499f695f65c650f6cfa6cd4c37a3fbe8fa235a0a3614cb54386/orjson-3.11.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a16bcd08ab0bcdfc7e8801d9c4a9cc17e58418e4d48ddc6ded4e9e4b1a94062b", size = 130812, upload-time = "2026-02-02T15:37:34.204Z" }, - { url = "https://files.pythonhosted.org/packages/28/e6/59f932bcabd1eac44e334fe8e3281a92eacfcb450586e1f4bde0423728d8/orjson-3.11.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c0b51672e466fd7e56230ffbae7f1639e18d0ce023351fb75da21b71bc2c960", size = 133444, upload-time = "2026-02-02T15:37:35.446Z" }, - { url = "https://files.pythonhosted.org/packages/f1/36/b0f05c0eaa7ca30bc965e37e6a2956b0d67adb87a9872942d3568da846ae/orjson-3.11.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:136dcd6a2e796dfd9ffca9fc027d778567b0b7c9968d092842d3c323cef88aa8", size = 138609, upload-time = "2026-02-02T15:37:36.657Z" }, - { url = "https://files.pythonhosted.org/packages/b8/03/58ec7d302b8d86944c60c7b4b82975d5161fcce4c9bc8c6cb1d6741b6115/orjson-3.11.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:7ba61079379b0ae29e117db13bda5f28d939766e410d321ec1624afc6a0b0504", size = 408918, upload-time = "2026-02-02T15:37:38.076Z" }, - { url = "https://files.pythonhosted.org/packages/06/3a/868d65ef9a8b99be723bd510de491349618abd9f62c826cf206d962db295/orjson-3.11.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0527a4510c300e3b406591b0ba69b5dc50031895b0a93743526a3fc45f59d26e", size = 143998, upload-time = "2026-02-02T15:37:39.706Z" }, - { url = "https://files.pythonhosted.org/packages/5b/c7/1e18e1c83afe3349f4f6dc9e14910f0ae5f82eac756d1412ea4018938535/orjson-3.11.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a709e881723c9b18acddcfb8ba357322491ad553e277cf467e1e7e20e2d90561", size = 134802, upload-time = "2026-02-02T15:37:41.002Z" }, - { url = "https://files.pythonhosted.org/packages/d4/0b/ccb7ee1a65b37e8eeb8b267dc953561d72370e85185e459616d4345bab34/orjson-3.11.7-cp311-cp311-win32.whl", hash = "sha256:c43b8b5bab288b6b90dac410cca7e986a4fa747a2e8f94615aea407da706980d", size = 127828, upload-time = "2026-02-02T15:37:42.241Z" }, - { url = "https://files.pythonhosted.org/packages/af/9e/55c776dffda3f381e0f07d010a4f5f3902bf48eaba1bb7684d301acd4924/orjson-3.11.7-cp311-cp311-win_amd64.whl", hash = "sha256:6543001328aa857187f905308a028935864aefe9968af3848401b6fe80dbb471", size = 124941, upload-time = "2026-02-02T15:37:43.444Z" }, - { url = "https://files.pythonhosted.org/packages/aa/8e/424a620fa7d263b880162505fb107ef5e0afaa765b5b06a88312ac291560/orjson-3.11.7-cp311-cp311-win_arm64.whl", hash = "sha256:1ee5cc7160a821dfe14f130bc8e63e7611051f964b463d9e2a3a573204446a4d", size = 126245, upload-time = "2026-02-02T15:37:45.18Z" }, - { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" }, - { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" }, - { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" }, - { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" }, - { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" }, - { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" }, - { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" }, - { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" }, - { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" }, - { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" }, - { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" }, - { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" }, - { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" }, - { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" }, - { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" }, - { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" }, - { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" }, - { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" }, - { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" }, - { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" }, - { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" }, - { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" }, - { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" }, - { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" }, - { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" }, - { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" }, - { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" }, - { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" }, - { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" }, - { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" }, - { url = "https://files.pythonhosted.org/packages/e9/1e/745565dca749813db9a093c5ebc4bac1a9475c64d54b95654336ac3ed961/orjson-3.11.7-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:de0a37f21d0d364954ad5de1970491d7fbd0fb1ef7417d4d56a36dc01ba0c0a0", size = 228391, upload-time = "2026-02-02T15:38:27.757Z" }, - { url = "https://files.pythonhosted.org/packages/46/19/e40f6225da4d3aa0c8dc6e5219c5e87c2063a560fe0d72a88deb59776794/orjson-3.11.7-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:c2428d358d85e8da9d37cba18b8c4047c55222007a84f97156a5b22028dfbfc0", size = 125188, upload-time = "2026-02-02T15:38:29.241Z" }, - { url = "https://files.pythonhosted.org/packages/9d/7e/c4de2babef2c0817fd1f048fd176aa48c37bec8aef53d2fa932983032cce/orjson-3.11.7-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c4bc6c6ac52cdaa267552544c73e486fecbd710b7ac09bc024d5a78555a22f6", size = 128097, upload-time = "2026-02-02T15:38:30.618Z" }, - { url = "https://files.pythonhosted.org/packages/eb/74/233d360632bafd2197f217eee7fb9c9d0229eac0c18128aee5b35b0014fe/orjson-3.11.7-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd0d68edd7dfca1b2eca9361a44ac9f24b078de3481003159929a0573f21a6bf", size = 123364, upload-time = "2026-02-02T15:38:32.363Z" }, - { url = "https://files.pythonhosted.org/packages/79/51/af79504981dd31efe20a9e360eb49c15f06df2b40e7f25a0a52d9ae888e8/orjson-3.11.7-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:623ad1b9548ef63886319c16fa317848e465a21513b31a6ad7b57443c3e0dcf5", size = 129076, upload-time = "2026-02-02T15:38:33.68Z" }, - { url = "https://files.pythonhosted.org/packages/67/e2/da898eb68b72304f8de05ca6715870d09d603ee98d30a27e8a9629abc64b/orjson-3.11.7-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6e776b998ac37c0396093d10290e60283f59cfe0fc3fccbd0ccc4bd04dd19892", size = 141705, upload-time = "2026-02-02T15:38:34.989Z" }, - { url = "https://files.pythonhosted.org/packages/c5/89/15364d92acb3d903b029e28d834edb8780c2b97404cbf7929aa6b9abdb24/orjson-3.11.7-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:652c6c3af76716f4a9c290371ba2e390ede06f6603edb277b481daf37f6f464e", size = 130855, upload-time = "2026-02-02T15:38:36.379Z" }, - { url = "https://files.pythonhosted.org/packages/c2/8b/ecdad52d0b38d4b8f514be603e69ccd5eacf4e7241f972e37e79792212ec/orjson-3.11.7-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a56df3239294ea5964adf074c54bcc4f0ccd21636049a2cf3ca9cf03b5d03cf1", size = 133386, upload-time = "2026-02-02T15:38:37.704Z" }, - { url = "https://files.pythonhosted.org/packages/b9/0e/45e1dcf10e17d0924b7c9162f87ec7b4ca79e28a0548acf6a71788d3e108/orjson-3.11.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bda117c4148e81f746655d5a3239ae9bd00cb7bc3ca178b5fc5a5997e9744183", size = 138295, upload-time = "2026-02-02T15:38:39.096Z" }, - { url = "https://files.pythonhosted.org/packages/63/d7/4d2e8b03561257af0450f2845b91fbd111d7e526ccdf737267108075e0ba/orjson-3.11.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:23d6c20517a97a9daf1d48b580fcdc6f0516c6f4b5038823426033690b4d2650", size = 408720, upload-time = "2026-02-02T15:38:40.634Z" }, - { url = "https://files.pythonhosted.org/packages/78/cf/d45343518282108b29c12a65892445fc51f9319dc3c552ceb51bb5905ed2/orjson-3.11.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8ff206156006da5b847c9304b6308a01e8cdbc8cce824e2779a5ba71c3def141", size = 144152, upload-time = "2026-02-02T15:38:42.262Z" }, - { url = "https://files.pythonhosted.org/packages/a9/3a/d6001f51a7275aacd342e77b735c71fa04125a3f93c36fee4526bc8c654e/orjson-3.11.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:962d046ee1765f74a1da723f4b33e3b228fe3a48bd307acce5021dfefe0e29b2", size = 134814, upload-time = "2026-02-02T15:38:43.627Z" }, - { url = "https://files.pythonhosted.org/packages/1d/d3/f19b47ce16820cc2c480f7f1723e17f6d411b3a295c60c8ad3aa9ff1c96a/orjson-3.11.7-cp314-cp314-win32.whl", hash = "sha256:89e13dd3f89f1c38a9c9eba5fbf7cdc2d1feca82f5f290864b4b7a6aac704576", size = 127997, upload-time = "2026-02-02T15:38:45.06Z" }, - { url = "https://files.pythonhosted.org/packages/12/df/172771902943af54bf661a8d102bdf2e7f932127968080632bda6054b62c/orjson-3.11.7-cp314-cp314-win_amd64.whl", hash = "sha256:845c3e0d8ded9c9271cd79596b9b552448b885b97110f628fb687aee2eed11c1", size = 124985, upload-time = "2026-02-02T15:38:46.388Z" }, - { url = "https://files.pythonhosted.org/packages/6f/1c/f2a8d8a1b17514660a614ce5f7aac74b934e69f5abc2700cc7ced882a009/orjson-3.11.7-cp314-cp314-win_arm64.whl", hash = "sha256:4a2e9c5be347b937a2e0203866f12bba36082e89b402ddb9e927d5822e43088d", size = 126038, upload-time = "2026-02-02T15:38:47.703Z" }, +version = "3.11.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/0c/964746fcafbd16f8ff53219ad9f6b412b34f345c75f384ad434ceaadb538/orjson-3.11.9.tar.gz", hash = "sha256:4fef17e1f8722c11587a6ef18e35902450221da0028e65dbaaa543619e68e48f", size = 5599163, upload-time = "2026-05-06T15:11:08.309Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/5d/b95ca542a001135cc250a49370f282f578c8f4e46cc8617d73775297eea8/orjson-3.11.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:135869ef917b8704ea0a94e01620e0c05021c15c52036e4663baffe75e72f8ce", size = 228986, upload-time = "2026-05-06T15:09:14.765Z" }, + { url = "https://files.pythonhosted.org/packages/80/01/be33fbff646e22f93398429ea645f20d2097aea1a6cdc1e6628e70125f83/orjson-3.11.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:115ab5f5f4a0f203cc2a5f0fb09aee503a3f771aa08392949ab5ca230c4fbdbd", size = 132558, upload-time = "2026-05-06T15:09:17.431Z" }, + { url = "https://files.pythonhosted.org/packages/4e/61/73d49333bba660a075daccca10970dc6409ce1cf42ae4046646a19468aad/orjson-3.11.9-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4da3c38a2083ca4aaf9c2a36776cce3e9328e6647b10d118948f3cfb4913ffe4", size = 128213, upload-time = "2026-05-06T15:09:18.719Z" }, + { url = "https://files.pythonhosted.org/packages/1f/7d/30e844b3dac3f74aed66b1f984daf9db3c98c0328c03d965a9e8dc06449e/orjson-3.11.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53b50b0e14084b8f7e29c5ce84c5af0f1160169b30d8a6914231d97d2fe297d4", size = 135430, upload-time = "2026-05-06T15:09:20.257Z" }, + { url = "https://files.pythonhosted.org/packages/16/64/bd815f5c610b3facc204f26ba94e87a9eb49b0d83de3d5fc1eee2402d91b/orjson-3.11.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:231742b4a11dad8d5380a435962c57e91b7c37b79be858f4ef1c0df1a259897e", size = 146178, upload-time = "2026-05-06T15:09:21.616Z" }, + { url = "https://files.pythonhosted.org/packages/c7/35/e744fd36c79b339d27beb06068b5a08a8882ef5418804d0ce545a31f718d/orjson-3.11.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34fd2317602587321faab75ab76c623a0117e80841a6413654f04e47f339a8fb", size = 133068, upload-time = "2026-05-06T15:09:23.228Z" }, + { url = "https://files.pythonhosted.org/packages/2a/56/d54152b67b63a0b3e556cfc549d6ce84f74d7f425ddeadc6c8a74d913da7/orjson-3.11.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71f3db16e69b667b132e0f305a833d5497da302d801508cbb051ed9a9819da47", size = 134217, upload-time = "2026-05-06T15:09:24.847Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ee/66154baf69f71c7164a268a5e888908aec5a0819d13c81d5e2755a257758/orjson-3.11.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0b34789fa0da61cf7bef0546b09c738fb195331e017e477096d129e9105ab03d", size = 141917, upload-time = "2026-05-06T15:09:26.647Z" }, + { url = "https://files.pythonhosted.org/packages/09/d3/c5824260ca8b9d7ba82648d042a3f8f4815d18c15bb98a1f30edd1bb2d83/orjson-3.11.9-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:87e4d4ab280b0c87424d47695bec2182caf8cfc17879ea78dab76680194abc13", size = 415356, upload-time = "2026-05-06T15:09:28.252Z" }, + { url = "https://files.pythonhosted.org/packages/64/cb/509c2e816fe4df641d93dc92f6a89adc8df3ada8ebdee2bd44aba3264c3c/orjson-3.11.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ace6c58523302d3b97b6ac5c38a5298a54b473762b6be82726b4265c41029f92", size = 148112, upload-time = "2026-05-06T15:09:29.783Z" }, + { url = "https://files.pythonhosted.org/packages/db/b5/3ceae56d2e4962979eedb023ba6a46a4bb65f333960379be0ca470686220/orjson-3.11.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:97d0d932803c1b164fde11cb542a9efcb1e0f63b184537cca65887147906ff48", size = 137112, upload-time = "2026-05-06T15:09:31.432Z" }, + { url = "https://files.pythonhosted.org/packages/d7/7a/81fa3f2c7bef79b04cf2ab7838e5ac74b1f12511ceab979759b0275d6bb4/orjson-3.11.9-cp310-cp310-win32.whl", hash = "sha256:b3afcf569c15577a9fe64627292daa3e6b3a70f4fb77a5df246a87ec21681b94", size = 131706, upload-time = "2026-05-06T15:09:32.707Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d8/b64600f9083c7f151ad39717a5877fccbeb0ef6d7efcb55f971ce00b6bee/orjson-3.11.9-cp310-cp310-win_amd64.whl", hash = "sha256:8697ab6a080a5c46edaad50e2bc5bd8c7ca5c66442d24104fa44ec74910a8244", size = 127282, upload-time = "2026-05-06T15:09:33.955Z" }, + { url = "https://files.pythonhosted.org/packages/1e/51/3fb9e65ae76ee97bd611869a503fa3fc0a6e81dd8b737cf3003f682df7ff/orjson-3.11.9-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f01c4818b3fc9b0da8e096722a84318071eaa118df35f6ed2344da0e73a5444f", size = 228522, upload-time = "2026-05-06T15:09:35.362Z" }, + { url = "https://files.pythonhosted.org/packages/16/fa/9d54b07cb3f3b0bfd57841478e42d7a0ece4a9f49f9907eecf5a45461687/orjson-3.11.9-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:3ebca4179031ee716ed076ffadc29428e900512f6fccee8614c9983157fcf19c", size = 128463, upload-time = "2026-05-06T15:09:37.063Z" }, + { url = "https://files.pythonhosted.org/packages/88/b1/6ceafc2eefd0a553e3be77ce6c49d107e772485d9568629376171c50e634/orjson-3.11.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48ee05097750de0ff69ed5b7bbcf0732182fd57a24043dcc2a1da780a5ead3a5", size = 132306, upload-time = "2026-05-06T15:09:38.299Z" }, + { url = "https://files.pythonhosted.org/packages/ea/76/f11311285324a40aab1e3031385c50b635a7cd0734fdaf60c7e89a696f60/orjson-3.11.9-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6082706765a95a6680d812e1daf1c0cfe8adec7831b3ff3b625693f3b461b1c", size = 127988, upload-time = "2026-05-06T15:09:39.597Z" }, + { url = "https://files.pythonhosted.org/packages/9e/85/0ef63bcf1337f44031ce9b91b1919563f62a37527b3ea4368bb15a22e5d7/orjson-3.11.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:277fefe9d76ee17eb14debf399e3533d4d63b5f677a4d3719eb763536af1f4bd", size = 135188, upload-time = "2026-05-06T15:09:40.957Z" }, + { url = "https://files.pythonhosted.org/packages/05/94/b0d27090ea8a2095db3c2bd1b1c96f96f19bbb494d7fef33130e846e613d/orjson-3.11.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03db380e3780fa0015ed776a90f20e8e20bb11dde13b216ce19e5718e3dfba62", size = 145937, upload-time = "2026-05-06T15:09:42.249Z" }, + { url = "https://files.pythonhosted.org/packages/09/eb/75d50c29c05b8054013e221e598820a365c8e64065312e75e202ed880709/orjson-3.11.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33d7d766701847dc6729846362dc27895d2f2d2251264f9d10e7cb9878194877", size = 132758, upload-time = "2026-05-06T15:09:43.945Z" }, + { url = "https://files.pythonhosted.org/packages/49/bd/360686f39348aa88827cb6fbf7dc606fd41c831a35235e1abf1db8e3a9e6/orjson-3.11.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:147302878da387104b66bb4a8b0227d1d487e976ce41a8501916161072ed87b1", size = 133971, upload-time = "2026-05-06T15:09:45.239Z" }, + { url = "https://files.pythonhosted.org/packages/0e/30/3178eb16f3221aeef068b6f1f1ebe05f656ea5c6dffe9f6c917329fe17a3/orjson-3.11.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3513550321f8c8c811a7c3297b8a630e82dc08e4c10216d07703c997776236cd", size = 141685, upload-time = "2026-05-06T15:09:46.858Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f1/ff2f19ed0225f9680fafa42febca3570dd59444ebf190980738d376214c2/orjson-3.11.9-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:c5d001196b89fa9cf0a4ab79766cd835b991a166e4b621ba95089edc50c429ff", size = 415167, upload-time = "2026-05-06T15:09:48.312Z" }, + { url = "https://files.pythonhosted.org/packages/9b/61/863bddf0da6e9e586765414debd54b4e58db05f560902b6d00658cb88636/orjson-3.11.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:16969c9d369c98eb084889c6e4d2d39b77c7eb38ceccf8da2a9fff62ae908980", size = 147913, upload-time = "2026-05-06T15:09:49.733Z" }, + { url = "https://files.pythonhosted.org/packages/b6/8a/4081492586d75b073d60c5271a8d0f05a0955cabf1e34c8473f6fcd84235/orjson-3.11.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:63e0efbc991250c0b3143488fa57d95affcabbfc63c99c48d625dd37779aafe2", size = 136959, upload-time = "2026-05-06T15:09:51.311Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bd/70b6ab193594d7abb875320c0a7c8335e846f28968c432c31042409c3c8d/orjson-3.11.9-cp311-cp311-win32.whl", hash = "sha256:14ed654580c1ed2bc217352ec82f91b047aef82951aa71c7f64e0dcb03c0e180", size = 131533, upload-time = "2026-05-06T15:09:52.637Z" }, + { url = "https://files.pythonhosted.org/packages/3f/17/1a1a228183d62d1b77e2c30d210f47dd4768b310ebe1607c63e3c0e3a71e/orjson-3.11.9-cp311-cp311-win_amd64.whl", hash = "sha256:57ea77fb70a448ce87d18fca050193202a3da5e54598f6501ca5476fb66cfe02", size = 127106, upload-time = "2026-05-06T15:09:54.204Z" }, + { url = "https://files.pythonhosted.org/packages/b8/95/285de5fa296d09681ee9c546cd4a8aeb773b701cf343dc125994f4d52953/orjson-3.11.9-cp311-cp311-win_arm64.whl", hash = "sha256:19b72ed11572a2ee51a67a903afbe5af504f84ed6f529c0fe44b0ab3fb5cc697", size = 126848, upload-time = "2026-05-06T15:09:55.551Z" }, + { url = "https://files.pythonhosted.org/packages/16/6d/11867a3ffa3a3608d84a4de51ef4dd0896d6b5cc9132fbe1daf593e677bc/orjson-3.11.9-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9ef6fe90aadef185c7b128859f40beb24720b4ecea95379fc9000931179c3a49", size = 228515, upload-time = "2026-05-06T15:09:57.265Z" }, + { url = "https://files.pythonhosted.org/packages/24/75/05912954c8b288f34fcf5cd4b9b071cb4f6e77b9961e175e56ebb258089f/orjson-3.11.9-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e5c9b8f28e726e97d97696c826bc7bea5d71cecd63576dba92924a32c1961291", size = 128409, upload-time = "2026-05-06T15:09:59.063Z" }, + { url = "https://files.pythonhosted.org/packages/ab/86/1c3a47df3bc8191ea9ac51603bbb872a95167a364320c269f2557911f406/orjson-3.11.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a473dbb4162108b27901492546f83c76fdcea3d0eadff00ae7a07e18dcce09", size = 132106, upload-time = "2026-05-06T15:10:00.798Z" }, + { url = "https://files.pythonhosted.org/packages/d7/cf/b33b5f3e695ae7d63feef9d915c37cc3b8f465493dcd4f8e0b4c697a2366/orjson-3.11.9-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:011382e2a60fda9d46f1cdee31068cfc52ffe952b587d683ec0463002802a0f4", size = 127864, upload-time = "2026-05-06T15:10:02.15Z" }, + { url = "https://files.pythonhosted.org/packages/31/6a/6cf69385a58208024fcb8c014e2141b8ce838aba6492b589f8acfff97fab/orjson-3.11.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2d3dc759490128c5c1711a53eeaa8ee1d437fd0038ffd2b6008abf46db3f882", size = 135213, upload-time = "2026-05-06T15:10:03.515Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f8/0b1bd3e8f2efcdd376af5c8cfd79eaf13f018080c0089c80ebd724e3c7fb/orjson-3.11.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8ea516b3726d190e1b4297e6f4e7a8650347ae053868a18163b4dd3641d1fff", size = 145994, upload-time = "2026-05-06T15:10:05.083Z" }, + { url = "https://files.pythonhosted.org/packages/f3/59/dab79f61044c529d2c81aecdc589b1f833a1c8dec11ba3b1c2498a02ca7e/orjson-3.11.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380cdce7ba24989af81d0a7013d0aaec5d0e2a21734c0e2681b1bc4f141957fe", size = 132744, upload-time = "2026-05-06T15:10:06.853Z" }, + { url = "https://files.pythonhosted.org/packages/0e/a4/82b7a2fe5d8a67a59ed831b24d59a3d46ea7d207b66e1602d376541d94a6/orjson-3.11.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be4fa4f0af7fa18951f7ab3fc2148e223af211bf03f59e1c6034ec3f97f21d61", size = 134014, upload-time = "2026-05-06T15:10:08.213Z" }, + { url = "https://files.pythonhosted.org/packages/50/c7/375e83a76851b73b2e39f3bcf0e5a19e2b89bad13e5bca97d0b293d27f24/orjson-3.11.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a8f5f8bc7ce7d59f08d9f99fa510c06496164a24cb5f3d34537dbd9ca30132e2", size = 141509, upload-time = "2026-05-06T15:10:09.595Z" }, + { url = "https://files.pythonhosted.org/packages/7f/7c/49d5d82a3d3097f641f094f552131f1e2723b0b8cb0fa2874ab65ecfffa6/orjson-3.11.9-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4d7fde5501b944f83b3e665e1b31343ff6e154b15560a16b7130ea1e594a4206", size = 415127, upload-time = "2026-05-06T15:10:11.049Z" }, + { url = "https://files.pythonhosted.org/packages/3a/dc/7446c538590d55f455647e5f3c61fc33f7108714e7afcffa6a2a033f8350/orjson-3.11.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cde1a448023ba7d5bb4c01c5afb48894380b5e4956e0627266526587ef4e535f", size = 148025, upload-time = "2026-05-06T15:10:12.842Z" }, + { url = "https://files.pythonhosted.org/packages/df/e5/4d2d8af06f788329b4f78f8cc3679bb395392fcaa1e4d8d3c33e85308fa4/orjson-3.11.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:71e63adb0e1f1ed5d9e168f50a91ceb93ae6420731d222dc7da5c69409aa47aa", size = 136943, upload-time = "2026-05-06T15:10:14.405Z" }, + { url = "https://files.pythonhosted.org/packages/06/69/850264ccf6d80f6b174620d30a87f65c9b1490aba33fe6b62798e618cad3/orjson-3.11.9-cp312-cp312-win32.whl", hash = "sha256:2d057a602cdd19a0ad680417527c45b6961a095081c0f46fe0e03e304aac6470", size = 131606, upload-time = "2026-05-06T15:10:15.791Z" }, + { url = "https://files.pythonhosted.org/packages/b9/d5/973a43fc9c55e20f2051e9830997649f669be0cb3ca52192087c0143f118/orjson-3.11.9-cp312-cp312-win_amd64.whl", hash = "sha256:59e403b1cc5a676da8eaf31f6254801b7341b3e29efa85f92b48d272637e77be", size = 127101, upload-time = "2026-05-06T15:10:17.129Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ae/495470f0e4a18f73fa10b7f6b84b464ec4cc5291c4e0c7c2a6c400bef006/orjson-3.11.9-cp312-cp312-win_arm64.whl", hash = "sha256:9af678d6488357948f1f84c6cd1c1d397c014e1ae2f98ae082a44eb48f602624", size = 126736, upload-time = "2026-05-06T15:10:18.645Z" }, + { url = "https://files.pythonhosted.org/packages/32/33/93fcc25907235c344ae73122f8a4e01d2d393ef062b4af7d2e2487a32c37/orjson-3.11.9-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:4bab1b2d6141fe7b32ae71dac905666ece4f94936efbfb13d55bb7739a3a6021", size = 228458, upload-time = "2026-05-06T15:10:20.079Z" }, + { url = "https://files.pythonhosted.org/packages/8f/27/b1e6dadb3c080313c03fdd8067b85e6a0460c7d8d6a1c3984ef77b904e4d/orjson-3.11.9-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:844417969855fc7a41be124aafe83dc424592a7f77cd4501900c67307122b92c", size = 128368, upload-time = "2026-05-06T15:10:21.549Z" }, + { url = "https://files.pythonhosted.org/packages/21/0f/c9ede0bf052f6b4051e64a7d4fa91b725cccf8321a6a786e86eb03519f00/orjson-3.11.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffe02797b5e9f3a9d8292ddcd289b474ad13e81ad83cd1891a240811f1d2cb81", size = 132070, upload-time = "2026-05-06T15:10:23.371Z" }, + { url = "https://files.pythonhosted.org/packages/fd/26/d398e28048dc18205bbe812f2c88cb9b40313db2470778e25964796458fe/orjson-3.11.9-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e4eed3b200023042814d2fc8a5d2e880f13b52e1ed2485e83da4f3962f7dc1a", size = 127892, upload-time = "2026-05-06T15:10:24.714Z" }, + { url = "https://files.pythonhosted.org/packages/66/60/52b0054c4c700d5aa7fc5b7ca96917400d8f061307778578e67a10e25852/orjson-3.11.9-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8aff7da9952a5ad1cef8e68017724d96c7b9a66e99e91d6252e1b133d67a7b10", size = 135217, upload-time = "2026-05-06T15:10:26.084Z" }, + { url = "https://files.pythonhosted.org/packages/d5/97/1e3dc2b2a28b7b2528f403d2fc1d79ec5f39af3bc143ab65d3ec26426385/orjson-3.11.9-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d4e98d6f3b8afed8bc8cd9718ec0cdf46661826beefb53fe8eafb37f2bf0362", size = 145980, upload-time = "2026-05-06T15:10:28.062Z" }, + { url = "https://files.pythonhosted.org/packages/fc/39/31fbfe7850f2de32dee7e7e5c09f26d403ab01e440ac96001c6b01ad3c99/orjson-3.11.9-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a81d52442a7c99b3662333235b3adf96a1715864658b35bb797212be7bddb97", size = 132738, upload-time = "2026-05-06T15:10:29.727Z" }, + { url = "https://files.pythonhosted.org/packages/a1/08/dca0082dd2a194acb93e5457e73455388e2e2ca464a2672449a9ddbb679d/orjson-3.11.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e39364e726a8fff737309aff059ff67d8a8c8d5b677be7bb49a8b3e84b7e218", size = 134033, upload-time = "2026-05-06T15:10:31.152Z" }, + { url = "https://files.pythonhosted.org/packages/11/d4/5bdb0626801230139987385554c5d4c42255218ac906525bf4347f22cd95/orjson-3.11.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4fd66214623f1b17501df9f0543bef0b833979ab5b6ded1e1d123222866aa8c9", size = 141492, upload-time = "2026-05-06T15:10:32.641Z" }, + { url = "https://files.pythonhosted.org/packages/fa/88/a21fb53b3ede6703aede6dce4710ed4111e5b201cfa6bbff5e544f9d47d7/orjson-3.11.9-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:8ecc30f10465fa1e0ce13fd01d9e22c316e5053a719a8d915d4545a09a5ff677", size = 415087, upload-time = "2026-05-06T15:10:34.438Z" }, + { url = "https://files.pythonhosted.org/packages/3d/57/1b30daf70f0d8180e9a73cefbfbdd99e4bf19eb020466502b01fba7e0e50/orjson-3.11.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:97db4c94a7db398a5bd636273324f0b3fd58b350bbbac8bb380ceb825a9b40f4", size = 148031, upload-time = "2026-05-06T15:10:36.358Z" }, + { url = "https://files.pythonhosted.org/packages/04/83/45fbb6d962e260807f99441db9613cee868ceda4baceda59b3720a563f97/orjson-3.11.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9f78cf8fec5bd627f4082b8dfeac7871b43d7f3274904492a43dab39f18a19a0", size = 136915, upload-time = "2026-05-06T15:10:38.013Z" }, + { url = "https://files.pythonhosted.org/packages/5f/cc/2d10025f9056d376e4127ec05a5808b218d46f035fdc08178a5411b34250/orjson-3.11.9-cp313-cp313-win32.whl", hash = "sha256:d4087e5c0209a0a8efe4de3303c234b9c44d1174161dcd851e8eea07c7560b32", size = 131613, upload-time = "2026-05-06T15:10:39.569Z" }, + { url = "https://files.pythonhosted.org/packages/67/bd/2775ff28bfe883b9aa1ff348300542eb2ef1ee18d8ae0e3a49846817a865/orjson-3.11.9-cp313-cp313-win_amd64.whl", hash = "sha256:051b102c93b4f634e89f3866b07b9a9a98915ada541f4ec30f177067b2694979", size = 127086, upload-time = "2026-05-06T15:10:41.262Z" }, + { url = "https://files.pythonhosted.org/packages/91/2b/d26799e580939e32a7da9a39531bc9e58e15ca32ffaa6a8cb3e9bb0d22cd/orjson-3.11.9-cp313-cp313-win_arm64.whl", hash = "sha256:cce9127885941bd28f080cecf1f1d288336b7e0d812c345b08be88b572796254", size = 126696, upload-time = "2026-05-06T15:10:42.651Z" }, + { url = "https://files.pythonhosted.org/packages/8e/eb/5da01e356015aee6ecfa1187ced87aef51364e306f5e695dd52719bf0e78/orjson-3.11.9-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b6ef1979adc4bc243523f1a2ba91418030a8e29b0a99cbe7e0e2d6807d4dce6e", size = 228465, upload-time = "2026-05-06T15:10:44.097Z" }, + { url = "https://files.pythonhosted.org/packages/64/62/3e0e0c14c957133bcd855395c62b55ed4e3b0af23ffea11b032cb1dcbdb1/orjson-3.11.9-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:f36b7f32c7c0db4a719f1fc5824db4a9c6f8bd1a354debb91faf26ebf3a4c71e", size = 128364, upload-time = "2026-05-06T15:10:45.839Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5a/07d8aa117211a8ed7630bda80c8c0b14d04e0f8dcf99bcf49656e4a710eb/orjson-3.11.9-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08f4d8ebb44925c794e535b2bebc507cebf32209df81de22ae285fb0d8d66de0", size = 132063, upload-time = "2026-05-06T15:10:47.267Z" }, + { url = "https://files.pythonhosted.org/packages/d6/ec/4acaf21483e18aa945be74a474c74b434f284b549f275a0a39b9f98956e9/orjson-3.11.9-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6cc7923789694fd58f001cbcac7e47abc13af4d560ebbfcf3b41a8b1a0748124", size = 122356, upload-time = "2026-05-06T15:10:48.765Z" }, + { url = "https://files.pythonhosted.org/packages/13/d8/5f0555e7638801323b7a75850f92e7dfa891bc84fe27a1ba4449170d1200/orjson-3.11.9-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea5c46eb2d3af39e806b986f4b09d5c2706a1f5afde3cbf7544ce6616127173c", size = 129592, upload-time = "2026-05-06T15:10:50.13Z" }, + { url = "https://files.pythonhosted.org/packages/b6/30/ed9860412a3603ceb3c5955bfd72d28b9d0e7ba6ed81add14f83d7114236/orjson-3.11.9-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f5d89a2ed90731df3be64bab0aa44f78bff39fdc9d71c291f4a8023aa46425b7", size = 140491, upload-time = "2026-05-06T15:10:51.582Z" }, + { url = "https://files.pythonhosted.org/packages/d0/17/adc514dea7ac7c505527febf884934b815d34f0c7b8693c1a8b39c5c4a57/orjson-3.11.9-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25e4aed0312d292c09f61af25bba34e0b2c88546041472b09088c39a4d828af1", size = 127309, upload-time = "2026-05-06T15:10:53.329Z" }, + { url = "https://files.pythonhosted.org/packages/76/3e/c0b690253f0b82d86e99949af13533363acfb5432ecb5d53dd5b3bce9c34/orjson-3.11.9-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaea64f3f467d22e70eeed68bdccb3bc4f83f650446c4a03c59f2cba28a108db", size = 134030, upload-time = "2026-05-06T15:10:54.988Z" }, + { url = "https://files.pythonhosted.org/packages/c1/7a/bc82a0bb25e9faaf92dc4d9ef002732efc09737706af83e346788641d4a7/orjson-3.11.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a028425d1b440c5d92a6be1e1a020739dfe67ea87d96c6dbe828c1b30041728b", size = 141482, upload-time = "2026-05-06T15:10:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/01/55/e69188b939f77d5d32a9833745ace31ea5ccae3ab613a1ec185d3cd2c4fb/orjson-3.11.9-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:5b192c6cf397e4455b11523c5cf2b18ed084c1bbd61b6c0926344d2129481972", size = 415178, upload-time = "2026-05-06T15:10:58.446Z" }, + { url = "https://files.pythonhosted.org/packages/2e/1a/b8a5a7ac527e80b9cb11d51e3f6689b709279183264b9ec5c7bc680bb8b5/orjson-3.11.9-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ea407d4ccf5891d667d045fecae97a7a1e5e87b3b97f97ae1803c2e741130be0", size = 148089, upload-time = "2026-05-06T15:11:00.441Z" }, + { url = "https://files.pythonhosted.org/packages/97/4e/00503f64204bf859b37213a63927028f30fb6268cd8677fb0a5ad48155e1/orjson-3.11.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f63aaf97afd9f6dec5b1a68e1b8da12bfccb4cb9a9a65c3e0b6c847849e7586", size = 136921, upload-time = "2026-05-06T15:11:02.176Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ba/a23b82a0a8d0ed7bed4e5f5035aae751cad4ff6a1e8d2ecd14d8860f5929/orjson-3.11.9-cp314-cp314-win32.whl", hash = "sha256:e30ab17845bb9fa54ccf67fa4f9f5282652d54faa6d17452f47d0f369d038673", size = 131638, upload-time = "2026-05-06T15:11:03.696Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/0c6798456bade745c75c452342dabacce5798196483e77e643be1f53877d/orjson-3.11.9-cp314-cp314-win_amd64.whl", hash = "sha256:32ef5f4283a3be81913947d19608eacb7c6608026851123790cd9cc8982af34b", size = 127078, upload-time = "2026-05-06T15:11:05.123Z" }, + { url = "https://files.pythonhosted.org/packages/16/21/5a3f1e8913103b703a436a5664238e5b965ec392b555fe68943ea3691e6b/orjson-3.11.9-cp314-cp314-win_arm64.whl", hash = "sha256:eebdbdeef0094e4f5aefa20dcd4eb2368ab5e7a3b4edea27f1e7b2892e009cf9", size = 126687, upload-time = "2026-05-06T15:11:06.602Z" }, ] [[package]] name = "packaging" -version = "26.0" +version = "26.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, ] [[package]] @@ -1311,83 +1279,80 @@ wheels = [ [[package]] name = "pandas" -version = "3.0.1" +version = "3.0.3" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", "python_full_version >= '3.14' and sys_platform == 'emscripten'", "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'win32'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "python-dateutil", marker = "python_full_version >= '3.11'" }, { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/07/c7087e003ceee9b9a82539b40414ec557aa795b584a1a346e89180853d79/pandas-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de09668c1bf3b925c07e5762291602f0d789eca1b3a781f99c1c78f6cac0e7ea", size = 10323380, upload-time = "2026-02-17T22:18:16.133Z" }, - { url = "https://files.pythonhosted.org/packages/c1/27/90683c7122febeefe84a56f2cde86a9f05f68d53885cebcc473298dfc33e/pandas-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:24ba315ba3d6e5806063ac6eb717504e499ce30bd8c236d8693a5fd3f084c796", size = 9923455, upload-time = "2026-02-17T22:18:19.13Z" }, - { url = "https://files.pythonhosted.org/packages/0e/f1/ed17d927f9950643bc7631aa4c99ff0cc83a37864470bc419345b656a41f/pandas-3.0.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:406ce835c55bac912f2a0dcfaf27c06d73c6b04a5dde45f1fd3169ce31337389", size = 10753464, upload-time = "2026-02-17T22:18:21.134Z" }, - { url = "https://files.pythonhosted.org/packages/2e/7c/870c7e7daec2a6c7ff2ac9e33b23317230d4e4e954b35112759ea4a924a7/pandas-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:830994d7e1f31dd7e790045235605ab61cff6c94defc774547e8b7fdfbff3dc7", size = 11255234, upload-time = "2026-02-17T22:18:24.175Z" }, - { url = "https://files.pythonhosted.org/packages/5c/39/3653fe59af68606282b989c23d1a543ceba6e8099cbcc5f1d506a7bae2aa/pandas-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a64ce8b0f2de1d2efd2ae40b0abe7f8ae6b29fbfb3812098ed5a6f8e235ad9bf", size = 11767299, upload-time = "2026-02-17T22:18:26.824Z" }, - { url = "https://files.pythonhosted.org/packages/9b/31/1daf3c0c94a849c7a8dab8a69697b36d313b229918002ba3e409265c7888/pandas-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9832c2c69da24b602c32e0c7b1b508a03949c18ba08d4d9f1c1033426685b447", size = 12333292, upload-time = "2026-02-17T22:18:28.996Z" }, - { url = "https://files.pythonhosted.org/packages/1f/67/af63f83cd6ca603a00fe8530c10a60f0879265b8be00b5930e8e78c5b30b/pandas-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:84f0904a69e7365f79a0c77d3cdfccbfb05bf87847e3a51a41e1426b0edb9c79", size = 9892176, upload-time = "2026-02-17T22:18:31.79Z" }, - { url = "https://files.pythonhosted.org/packages/79/ab/9c776b14ac4b7b4140788eca18468ea39894bc7340a408f1d1e379856a6b/pandas-3.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:4a68773d5a778afb31d12e34f7dd4612ab90de8c6fb1d8ffe5d4a03b955082a1", size = 9151328, upload-time = "2026-02-17T22:18:35.721Z" }, - { url = "https://files.pythonhosted.org/packages/37/51/b467209c08dae2c624873d7491ea47d2b47336e5403309d433ea79c38571/pandas-3.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:476f84f8c20c9f5bc47252b66b4bb25e1a9fc2fa98cead96744d8116cb85771d", size = 10344357, upload-time = "2026-02-17T22:18:38.262Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f1/e2567ffc8951ab371db2e40b2fe068e36b81d8cf3260f06ae508700e5504/pandas-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ab749dfba921edf641d4036c4c21c0b3ea70fea478165cb98a998fb2a261955", size = 9884543, upload-time = "2026-02-17T22:18:41.476Z" }, - { url = "https://files.pythonhosted.org/packages/d7/39/327802e0b6d693182403c144edacbc27eb82907b57062f23ef5a4c4a5ea7/pandas-3.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8e36891080b87823aff3640c78649b91b8ff6eea3c0d70aeabd72ea43ab069b", size = 10396030, upload-time = "2026-02-17T22:18:43.822Z" }, - { url = "https://files.pythonhosted.org/packages/3d/fe/89d77e424365280b79d99b3e1e7d606f5165af2f2ecfaf0c6d24c799d607/pandas-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:532527a701281b9dd371e2f582ed9094f4c12dd9ffb82c0c54ee28d8ac9520c4", size = 10876435, upload-time = "2026-02-17T22:18:45.954Z" }, - { url = "https://files.pythonhosted.org/packages/b5/a6/2a75320849dd154a793f69c951db759aedb8d1dd3939eeacda9bdcfa1629/pandas-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:356e5c055ed9b0da1580d465657bc7d00635af4fd47f30afb23025352ba764d1", size = 11405133, upload-time = "2026-02-17T22:18:48.533Z" }, - { url = "https://files.pythonhosted.org/packages/58/53/1d68fafb2e02d7881df66aa53be4cd748d25cbe311f3b3c85c93ea5d30ca/pandas-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d810036895f9ad6345b8f2a338dd6998a74e8483847403582cab67745bff821", size = 11932065, upload-time = "2026-02-17T22:18:50.837Z" }, - { url = "https://files.pythonhosted.org/packages/75/08/67cc404b3a966b6df27b38370ddd96b3b023030b572283d035181854aac5/pandas-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:536232a5fe26dd989bd633e7a0c450705fdc86a207fec7254a55e9a22950fe43", size = 9741627, upload-time = "2026-02-17T22:18:53.905Z" }, - { url = "https://files.pythonhosted.org/packages/86/4f/caf9952948fb00d23795f09b893d11f1cacb384e666854d87249530f7cbe/pandas-3.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f463ebfd8de7f326d38037c7363c6dacb857c5881ab8961fb387804d6daf2f7", size = 9052483, upload-time = "2026-02-17T22:18:57.31Z" }, - { url = "https://files.pythonhosted.org/packages/0b/48/aad6ec4f8d007534c091e9a7172b3ec1b1ee6d99a9cbb936b5eab6c6cf58/pandas-3.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5272627187b5d9c20e55d27caf5f2cd23e286aba25cadf73c8590e432e2b7262", size = 10317509, upload-time = "2026-02-17T22:18:59.498Z" }, - { url = "https://files.pythonhosted.org/packages/a8/14/5990826f779f79148ae9d3a2c39593dc04d61d5d90541e71b5749f35af95/pandas-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:661e0f665932af88c7877f31da0dc743fe9c8f2524bdffe23d24fdcb67ef9d56", size = 9860561, upload-time = "2026-02-17T22:19:02.265Z" }, - { url = "https://files.pythonhosted.org/packages/fa/80/f01ff54664b6d70fed71475543d108a9b7c888e923ad210795bef04ffb7d/pandas-3.0.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75e6e292ff898679e47a2199172593d9f6107fd2dd3617c22c2946e97d5df46e", size = 10365506, upload-time = "2026-02-17T22:19:05.017Z" }, - { url = "https://files.pythonhosted.org/packages/f2/85/ab6d04733a7d6ff32bfc8382bf1b07078228f5d6ebec5266b91bfc5c4ff7/pandas-3.0.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ff8cf1d2896e34343197685f432450ec99a85ba8d90cce2030c5eee2ef98791", size = 10873196, upload-time = "2026-02-17T22:19:07.204Z" }, - { url = "https://files.pythonhosted.org/packages/48/a9/9301c83d0b47c23ac5deab91c6b39fd98d5b5db4d93b25df8d381451828f/pandas-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eca8b4510f6763f3d37359c2105df03a7a221a508f30e396a51d0713d462e68a", size = 11370859, upload-time = "2026-02-17T22:19:09.436Z" }, - { url = "https://files.pythonhosted.org/packages/59/fe/0c1fc5bd2d29c7db2ab372330063ad555fb83e08422829c785f5ec2176ca/pandas-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:06aff2ad6f0b94a17822cf8b83bbb563b090ed82ff4fe7712db2ce57cd50d9b8", size = 11924584, upload-time = "2026-02-17T22:19:11.562Z" }, - { url = "https://files.pythonhosted.org/packages/d6/7d/216a1588b65a7aa5f4535570418a599d943c85afb1d95b0876fc00aa1468/pandas-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:9fea306c783e28884c29057a1d9baa11a349bbf99538ec1da44c8476563d1b25", size = 9742769, upload-time = "2026-02-17T22:19:13.926Z" }, - { url = "https://files.pythonhosted.org/packages/c4/cb/810a22a6af9a4e97c8ab1c946b47f3489c5bca5adc483ce0ffc84c9cc768/pandas-3.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:a8d37a43c52917427e897cb2e429f67a449327394396a81034a4449b99afda59", size = 9043855, upload-time = "2026-02-17T22:19:16.09Z" }, - { url = "https://files.pythonhosted.org/packages/92/fa/423c89086cca1f039cf1253c3ff5b90f157b5b3757314aa635f6bf3e30aa/pandas-3.0.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d54855f04f8246ed7b6fc96b05d4871591143c46c0b6f4af874764ed0d2d6f06", size = 10752673, upload-time = "2026-02-17T22:19:18.304Z" }, - { url = "https://files.pythonhosted.org/packages/22/23/b5a08ec1f40020397f0faba72f1e2c11f7596a6169c7b3e800abff0e433f/pandas-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e1b677accee34a09e0dc2ce5624e4a58a1870ffe56fc021e9caf7f23cd7668f", size = 10404967, upload-time = "2026-02-17T22:19:20.726Z" }, - { url = "https://files.pythonhosted.org/packages/5c/81/94841f1bb4afdc2b52a99daa895ac2c61600bb72e26525ecc9543d453ebc/pandas-3.0.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9cabbdcd03f1b6cd254d6dda8ae09b0252524be1592594c00b7895916cb1324", size = 10320575, upload-time = "2026-02-17T22:19:24.919Z" }, - { url = "https://files.pythonhosted.org/packages/0a/8b/2ae37d66a5342a83adadfd0cb0b4bf9c3c7925424dd5f40d15d6cfaa35ee/pandas-3.0.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ae2ab1f166668b41e770650101e7090824fd34d17915dd9cd479f5c5e0065e9", size = 10710921, upload-time = "2026-02-17T22:19:27.181Z" }, - { url = "https://files.pythonhosted.org/packages/a2/61/772b2e2757855e232b7ccf7cb8079a5711becb3a97f291c953def15a833f/pandas-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6bf0603c2e30e2cafac32807b06435f28741135cb8697eae8b28c7d492fc7d76", size = 11334191, upload-time = "2026-02-17T22:19:29.411Z" }, - { url = "https://files.pythonhosted.org/packages/1b/08/b16c6df3ef555d8495d1d265a7963b65be166785d28f06a350913a4fac78/pandas-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6c426422973973cae1f4a23e51d4ae85974f44871b24844e4f7de752dd877098", size = 11782256, upload-time = "2026-02-17T22:19:32.34Z" }, - { url = "https://files.pythonhosted.org/packages/55/80/178af0594890dee17e239fca96d3d8670ba0f5ff59b7d0439850924a9c09/pandas-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b03f91ae8c10a85c1613102c7bef5229b5379f343030a3ccefeca8a33414cf35", size = 10485047, upload-time = "2026-02-17T22:19:34.605Z" }, - { url = "https://files.pythonhosted.org/packages/bb/8b/4bb774a998b97e6c2fd62a9e6cfdaae133b636fd1c468f92afb4ae9a447a/pandas-3.0.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:99d0f92ed92d3083d140bf6b97774f9f13863924cf3f52a70711f4e7588f9d0a", size = 10322465, upload-time = "2026-02-17T22:19:36.803Z" }, - { url = "https://files.pythonhosted.org/packages/72/3a/5b39b51c64159f470f1ca3b1c2a87da290657ca022f7cd11442606f607d1/pandas-3.0.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3b66857e983208654294bb6477b8a63dee26b37bdd0eb34d010556e91261784f", size = 9910632, upload-time = "2026-02-17T22:19:39.001Z" }, - { url = "https://files.pythonhosted.org/packages/4e/f7/b449ffb3f68c11da12fc06fbf6d2fa3a41c41e17d0284d23a79e1c13a7e4/pandas-3.0.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56cf59638bf24dc9bdf2154c81e248b3289f9a09a6d04e63608c159022352749", size = 10440535, upload-time = "2026-02-17T22:19:41.157Z" }, - { url = "https://files.pythonhosted.org/packages/55/77/6ea82043db22cb0f2bbfe7198da3544000ddaadb12d26be36e19b03a2dc5/pandas-3.0.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1a9f55e0f46951874b863d1f3906dcb57df2d9be5c5847ba4dfb55b2c815249", size = 10893940, upload-time = "2026-02-17T22:19:43.493Z" }, - { url = "https://files.pythonhosted.org/packages/03/30/f1b502a72468c89412c1b882a08f6eed8a4ee9dc033f35f65d0663df6081/pandas-3.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1849f0bba9c8a2fb0f691d492b834cc8dadf617e29015c66e989448d58d011ee", size = 11442711, upload-time = "2026-02-17T22:19:46.074Z" }, - { url = "https://files.pythonhosted.org/packages/0d/f0/ebb6ddd8fc049e98cabac5c2924d14d1dda26a20adb70d41ea2e428d3ec4/pandas-3.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3d288439e11b5325b02ae6e9cc83e6805a62c40c5a6220bea9beb899c073b1c", size = 11963918, upload-time = "2026-02-17T22:19:48.838Z" }, - { url = "https://files.pythonhosted.org/packages/09/f8/8ce132104074f977f907442790eaae24e27bce3b3b454e82faa3237ff098/pandas-3.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:93325b0fe372d192965f4cca88d97667f49557398bbf94abdda3bf1b591dbe66", size = 9862099, upload-time = "2026-02-17T22:19:51.081Z" }, - { url = "https://files.pythonhosted.org/packages/e6/b7/6af9aac41ef2456b768ef0ae60acf8abcebb450a52043d030a65b4b7c9bd/pandas-3.0.1-cp314-cp314-win_arm64.whl", hash = "sha256:97ca08674e3287c7148f4858b01136f8bdfe7202ad25ad04fec602dd1d29d132", size = 9185333, upload-time = "2026-02-17T22:19:53.266Z" }, - { url = "https://files.pythonhosted.org/packages/66/fc/848bb6710bc6061cb0c5badd65b92ff75c81302e0e31e496d00029fe4953/pandas-3.0.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:58eeb1b2e0fb322befcf2bbc9ba0af41e616abadb3d3414a6bc7167f6cbfce32", size = 10772664, upload-time = "2026-02-17T22:19:55.806Z" }, - { url = "https://files.pythonhosted.org/packages/69/5c/866a9bbd0f79263b4b0db6ec1a341be13a1473323f05c122388e0f15b21d/pandas-3.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cd9af1276b5ca9e298bd79a26bda32fa9cc87ed095b2a9a60978d2ca058eaf87", size = 10421286, upload-time = "2026-02-17T22:19:58.091Z" }, - { url = "https://files.pythonhosted.org/packages/51/a4/2058fb84fb1cfbfb2d4a6d485e1940bb4ad5716e539d779852494479c580/pandas-3.0.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f87a04984d6b63788327cd9f79dda62b7f9043909d2440ceccf709249ca988", size = 10342050, upload-time = "2026-02-17T22:20:01.376Z" }, - { url = "https://files.pythonhosted.org/packages/22/1b/674e89996cc4be74db3c4eb09240c4bb549865c9c3f5d9b086ff8fcfbf00/pandas-3.0.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85fe4c4df62e1e20f9db6ebfb88c844b092c22cd5324bdcf94bfa2fc1b391221", size = 10740055, upload-time = "2026-02-17T22:20:04.328Z" }, - { url = "https://files.pythonhosted.org/packages/d0/f8/e954b750764298c22fa4614376531fe63c521ef517e7059a51f062b87dca/pandas-3.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:331ca75a2f8672c365ae25c0b29e46f5ac0c6551fdace8eec4cd65e4fac271ff", size = 11357632, upload-time = "2026-02-17T22:20:06.647Z" }, - { url = "https://files.pythonhosted.org/packages/6d/02/c6e04b694ffd68568297abd03588b6d30295265176a5c01b7459d3bc35a3/pandas-3.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:15860b1fdb1973fffade772fdb931ccf9b2f400a3f5665aef94a00445d7d8dd5", size = 11810974, upload-time = "2026-02-17T22:20:08.946Z" }, - { url = "https://files.pythonhosted.org/packages/89/41/d7dfb63d2407f12055215070c42fc6ac41b66e90a2946cdc5e759058398b/pandas-3.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:44f1364411d5670efa692b146c748f4ed013df91ee91e9bec5677fb1fd58b937", size = 10884622, upload-time = "2026-02-17T22:20:11.711Z" }, - { url = "https://files.pythonhosted.org/packages/68/b0/34937815889fa982613775e4b97fddd13250f11012d769949c5465af2150/pandas-3.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:108dd1790337a494aa80e38def654ca3f0968cf4f362c85f44c15e471667102d", size = 9452085, upload-time = "2026-02-17T22:20:14.331Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/f8/87/4341c6252d1c47b08768c3d25ac487362bf403f0313ddae4a2a26c9b1b4c/pandas-3.0.3.tar.gz", hash = "sha256:696a4a00a2a2a35d4e5deb3fc946641b96c944f02230e4f76137fe35d806c4fc", size = 4651414, upload-time = "2026-05-11T18:54:29.21Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/16/b5c76b838fd9bf6ce84d3a53346b8874ec05c5f0040d75ef2c320100cd2a/pandas-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:455f6f8139d4282188f526868dbc3c828470e88a3d9d59a891bd46a455f21b98", size = 10338495, upload-time = "2026-05-11T18:52:11.558Z" }, + { url = "https://files.pythonhosted.org/packages/5a/b0/a4ffc4ae74d2d822200dcc46898987d8eb6032d1e2b219cae39da6f5cbcc/pandas-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4e15135e2ee5df1063313e2425ceef8ac0f4ae775893815b0923651b806a5639", size = 9938250, upload-time = "2026-05-11T18:52:17.005Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b2/3323601a52caee42c019e370090ca4544b241437240ca04f786cce82b0cf/pandas-3.0.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:05f1f1752b8533ea03f7f39a9c15b1a058d067bb48f4748948e7a8691e0510f2", size = 10770558, upload-time = "2026-05-11T18:52:19.865Z" }, + { url = "https://files.pythonhosted.org/packages/32/f1/bbecd2f867b97abebe0f9b53d750f862251b40337e061b36676ded3d920f/pandas-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a1e45c80cceb3b4a21bc5939d52e8cbd8d9b7305309219d59e9754d9ce09e27", size = 11274611, upload-time = "2026-05-11T18:52:22.622Z" }, + { url = "https://files.pythonhosted.org/packages/7f/4f/eafabf2d5fae5adf143b4d18d3706c5efdc368a7c4eb1ee8a3eddabbd0f6/pandas-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:14da8316da4d0c5a77618425996bfb1248ca87fc2c1486e6fde4652bd18b5824", size = 11784670, upload-time = "2026-05-11T18:52:25.4Z" }, + { url = "https://files.pythonhosted.org/packages/49/44/1eb20389301b57b19cc099a1c2f662501f72f08a65f912d05822613c1532/pandas-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a55066a0505dae0ba2b50a46637db34b46f9094c65c5d4800794ef6335010938", size = 12353708, upload-time = "2026-05-11T18:52:28.139Z" }, + { url = "https://files.pythonhosted.org/packages/eb/62/c321f13b5ba1819fc8dca456c7fce578da2dcfecff1abbf0eaddf8406c0f/pandas-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6674ab18ad8c57802867264b00e15e7bb904700cdd9046e3b2fa1fce237439ea", size = 9907609, upload-time = "2026-05-11T18:52:30.982Z" }, + { url = "https://files.pythonhosted.org/packages/53/85/1b7f563ebc6357c27233a02a96b589bcce1fa9c6eb89fb4f0e56421d277e/pandas-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:5cc09a68b3120e0f54870dede8287a7bb1fa463907e4fcec1ea77cab6179bf7a", size = 9165596, upload-time = "2026-05-11T18:52:33.334Z" }, + { url = "https://files.pythonhosted.org/packages/24/f1/392f8c5bfc16f66a0d2d41561c01627c228fe7ed2a0d056ef11315042570/pandas-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fed2ff7fd9779120e388e285fc029bd5cf9490cdd2e4166a9ee22c0e49a9ab09", size = 10357846, upload-time = "2026-05-11T18:52:36.143Z" }, + { url = "https://files.pythonhosted.org/packages/cf/3d/b16412745651e855f357e5e66930248688378853a6e2698a214e331fba1f/pandas-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b168fc218fd80a6cbdbdbc1a97ddc7889ed057d7eb45f50d866ceab5f39904c4", size = 9899550, upload-time = "2026-05-11T18:52:38.976Z" }, + { url = "https://files.pythonhosted.org/packages/31/a8/fa2535168fffcedf67f4f6de28d2dd903a747ca7c8ea6989451aaeb3a92f/pandas-3.0.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0383c72c75cdcca61a9e116e611143902dbfd08bff356829c2f6d1cf40a9ca8c", size = 10412965, upload-time = "2026-05-11T18:52:41.915Z" }, + { url = "https://files.pythonhosted.org/packages/65/b6/09b01cdbc15224e2850365192d17b7bdebb8bdbd8780ed221fcdf0d9a515/pandas-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6dc0b3fd2169c9157deed50b4d519553a3655c8c6a96027136d654592be973a9", size = 10894600, upload-time = "2026-05-11T18:52:45.02Z" }, + { url = "https://files.pythonhosted.org/packages/c9/a4/2eb28f2fccb4ced4a2c79ab2a5dee9ade1ebf44922ebad6fea158c9f95d4/pandas-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e65d5407dc0b394f509699650e4a2ec01c0514f21850f453fa60f3be79a5dbf", size = 11422824, upload-time = "2026-05-11T18:52:48.058Z" }, + { url = "https://files.pythonhosted.org/packages/f8/45/830bb57f533a4604b355e07edcb8ea18cf88b5f94e5fca92f27052d7c597/pandas-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f8894dc474d648fe7b6ff0ca9b0bd73950d19952bc1a6534540762c5d79d305c", size = 11950889, upload-time = "2026-05-11T18:52:50.905Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c5/fc1b368f303087d20e8c9bf3d6ceb186263cfac0ade735cd938538bea839/pandas-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:c7be265b62cef88e253a941e4698604973736dcfe242fdb5198f0f7bc473cdcc", size = 9755463, upload-time = "2026-05-11T18:52:53.386Z" }, + { url = "https://files.pythonhosted.org/packages/86/bd/fda8f9705b1b09c6ebe14bfc0fa0e4ec8584d54ea673628f157ff55131af/pandas-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:557409bc4178e70ee8d9ddb494798e51ebf6ea59330f6be22c51bab2a7db6c49", size = 9066158, upload-time = "2026-05-11T18:52:56.038Z" }, + { url = "https://files.pythonhosted.org/packages/c5/90/62d8302883c44308c477e222c3daf7c813a34c8e96985882fbd53d964352/pandas-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:67b3b64c11910cfa29f4e94a14d3bff9ee693b6fc76055e7cad549cee0aec5fa", size = 10331071, upload-time = "2026-05-11T18:52:58.838Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ae/6a6493c783a101f165e4356953ba3c74d6f77f0042fa7d753da9dfbb640c/pandas-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39436b377d56d2a2e52d0395bdbee171f01068e99af5250509aceeb929f765c7", size = 9875690, upload-time = "2026-05-11T18:53:01.431Z" }, + { url = "https://files.pythonhosted.org/packages/62/7c/5df8e9f56c69a2769fbe9382a5ef8f2658c007e376434e1e2cbb57ad895f/pandas-3.0.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4be06d68f9ddcfc645b87534911da79a8fbffc7573c80e0edcf42a5020624d8", size = 10381634, upload-time = "2026-05-11T18:53:04.393Z" }, + { url = "https://files.pythonhosted.org/packages/99/68/1237369725aa617bb358263d535803e3053fdbc593513ec5ed9c9896b5b6/pandas-3.0.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4eeb6830daf35a71cc09649bd823e2b542dac246cdee9614c6e4bd65028cd6a", size = 10891243, upload-time = "2026-05-11T18:53:07.643Z" }, + { url = "https://files.pythonhosted.org/packages/25/93/77d108e8af7222b4a503ebde0e30215b1c2e4f8e53a526431890f22d5586/pandas-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1928e07221f82db493cd4af1e23c1bfca524a19a4699887975bff68f49a72bfb", size = 11388659, upload-time = "2026-05-11T18:53:10.634Z" }, + { url = "https://files.pythonhosted.org/packages/d0/bd/eff5b4399f332ac386c853f6cd2bd3fa2ca0061b9f36ecd9c4d7c4265649/pandas-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51b1fe551acb77dac643c6fda86084d8d446c10fe64b06a9cc29c4cc8540e7f2", size = 11942880, upload-time = "2026-05-11T18:53:13.536Z" }, + { url = "https://files.pythonhosted.org/packages/2c/20/559ace4200982c3887d0b86bfd0d856a2143ef8ddab63cc07934951a964c/pandas-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:a82d532a3351d435432cd913edbccaf8b8e01d4dd0e5ced5a8d2e8ecd94c7e44", size = 9757091, upload-time = "2026-05-11T18:53:16.306Z" }, + { url = "https://files.pythonhosted.org/packages/3a/66/69055a09fe200f29f922a3eeec4804611900b95f52d932ece3393c3c0c19/pandas-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:275c14e0fce14a2ec20eee474aecd305478ea3c1e6f6a9d8fe219a165542717e", size = 9057282, upload-time = "2026-05-11T18:53:18.768Z" }, + { url = "https://files.pythonhosted.org/packages/57/0e/efe801b0e6811e8e650cd21b7f2608e30f08a7067e2bf6e8752b0d56ee3c/pandas-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:46997386d528eb40376ecd6b033cf4a8a1e5282580f68f43de875b78cba2199d", size = 10767016, upload-time = "2026-05-11T18:53:21.227Z" }, + { url = "https://files.pythonhosted.org/packages/ea/dc/eb55135a1d5f0f0519f28da1f609a206d2cad1f9c35c32d51e38dd7261ae/pandas-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261e308dfb22448384b7580cf719d2f998fe2966c92893c3e77d14008af1f066", size = 10420210, upload-time = "2026-05-11T18:53:23.982Z" }, + { url = "https://files.pythonhosted.org/packages/c6/3e/b1d5d955ce33ffecb407465a60bc32769d74fcf68224b7ae67ae11d4dea4/pandas-3.0.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd1a5d1def6a46002e964510bdc67c368aa0951df5d1d9f8365336f5a1f490cd", size = 10336126, upload-time = "2026-05-11T18:53:26.731Z" }, + { url = "https://files.pythonhosted.org/packages/f5/76/a01261711ab60a22d71b862f0de20e4c504bf80457270ad8cb42110f6abc/pandas-3.0.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d72828c20c6d6e83e1e22a6a3b47b326b71664112fa9705dcbccfd7a39b62085", size = 10728051, upload-time = "2026-05-11T18:53:29.125Z" }, + { url = "https://files.pythonhosted.org/packages/e9/21/ea191195e587b18cf682e97f433f81b2d0fbe341380e80a3e0d6e4403c8e/pandas-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d26cbe1fcfc12e8fd900e2454163e466b2d3af84f7c75481df7683ffc073d870", size = 11350796, upload-time = "2026-05-11T18:53:32.056Z" }, + { url = "https://files.pythonhosted.org/packages/64/69/f0eaaf54939f0e8c6768fd06be9af2cef9b36048b96dfb9e1b2c685a807e/pandas-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e91cec1879ada0624fc3dc9953c5cbd60208e59c0db28f540c5d6d47502422f", size = 11799741, upload-time = "2026-05-11T18:53:34.985Z" }, + { url = "https://files.pythonhosted.org/packages/45/a4/865e0e510cae5fc2194de4db28be638952de942571ba9125934fd9c01d47/pandas-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:08d789b41f87e0905880e293cedf6197ce71fe67cc081358b1e148a491b9bd13", size = 10499958, upload-time = "2026-05-11T18:53:37.857Z" }, + { url = "https://files.pythonhosted.org/packages/86/54/effdcc3c0ff7a08037889200e148ebe94c16c4f653be078c7b3675955df1/pandas-3.0.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3650109c0f22879df8bd6179ab9ee3d7f1d1d4e7e0094a3f0032d9f51e2e64ac", size = 10336065, upload-time = "2026-05-11T18:53:41.099Z" }, + { url = "https://files.pythonhosted.org/packages/68/10/bf2d6738d72748b961a3751ab89522d58c54efc36a8e1a12161216cd45cf/pandas-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bab900348131a7db1f69a7309ef141fd5680f1487094193bcbbb61791573bf8f", size = 9926101, upload-time = "2026-05-11T18:53:43.515Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e9/e35cf11c8a136e757b956f5f0efdcaa50aecde85ea055f1898dfc68262f3/pandas-3.0.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba7e08b9ac1d54569cd1e256e3668975ed624d6826f7b68df0342b012007bddb", size = 10457553, upload-time = "2026-05-11T18:53:46.394Z" }, + { url = "https://files.pythonhosted.org/packages/58/3b/1cdec6772bdbaf7b25dab360c59f03cadf05492dd724c6540af905389b07/pandas-3.0.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d71c63ae4ebdbf70209742096f1fc46a83a0613c99d4b23766cced9ff8cd62a", size = 10914065, upload-time = "2026-05-11T18:53:49.134Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c2/1ef644445fcd72e3627bceec77e3560636f87ddce4ed841afe76b83b5bf9/pandas-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e3a2ec42c98ffa2565a67e08e218d06d72576d758d90facb7c00805194d8f360", size = 11459188, upload-time = "2026-05-11T18:53:52.527Z" }, + { url = "https://files.pythonhosted.org/packages/7e/49/4d8d4f42cbc9c4adc7a1870f269c02cbd6cd40d059622c06fb298addcbad/pandas-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:335f62418ed562cfc3c49e9e196375c28b729dcef8543abf4f9438e381bf3c76", size = 11982966, upload-time = "2026-05-11T18:53:55.043Z" }, + { url = "https://files.pythonhosted.org/packages/38/55/792619469bab9882d8bbd5865d45a72f6478762d04a9af4bf0d08c503e95/pandas-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:3c20a521bbb85902f79f7270c80a59e1b5452d96d170c034f207181870f97ac5", size = 9876755, upload-time = "2026-05-11T18:53:58.067Z" }, + { url = "https://files.pythonhosted.org/packages/2a/af/33c469653b0ba03b50c3a98192d4c07f0c75c66b263ceb097fce0ee97d31/pandas-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:a2d2dff8a04f3917b55ab3910c32990f8ddf7eceba114947838cefa976a68977", size = 9198658, upload-time = "2026-05-11T18:54:00.733Z" }, + { url = "https://files.pythonhosted.org/packages/a2/fa/b8c257bd76b8bd060c3a9151c1fca05e9b9c5e3af5d0f549c0356f6d143d/pandas-3.0.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:0d589105b3c14645af1738ff279b2995102d8f7a03b0a66dc8d95550eb513e04", size = 10787242, upload-time = "2026-05-11T18:54:03.564Z" }, + { url = "https://files.pythonhosted.org/packages/54/eb/f19206ffb0bf1919002969aa448b4702c6594845156a6f8050674855aac3/pandas-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:13fc1e853d9e04743d11ba75a985ccbc2a317fe07d8af61e445a6fd24dacd6a6", size = 10436369, upload-time = "2026-05-11T18:54:06.311Z" }, + { url = "https://files.pythonhosted.org/packages/fd/24/c7c39fb4fe22b71a0c2d78bf0c585c600092d85f94f086d2b3b2f6ca27e2/pandas-3.0.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:819959dab7bbd0049c15623fbac4e29a191b9528160a61fb1032242d8ced2d9c", size = 10358306, upload-time = "2026-05-11T18:54:09.085Z" }, + { url = "https://files.pythonhosted.org/packages/16/ec/dd2a9eb7fa1204df88c0864164e35b228ac581062ac612ba0a67fd812e4c/pandas-3.0.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:60ae316d3fd75d1858d450d0db0103ea2be3e7d4a95ec2f064f7e2ae63f7b028", size = 10758394, upload-time = "2026-05-11T18:54:11.956Z" }, + { url = "https://files.pythonhosted.org/packages/95/6e/00c61ea8e85b4f6d8d35e11852a1a4998fc7fafc91c6a602d1cc9c972d64/pandas-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bd3a518890b400d32f9023722dc9a9a5c969f00b415419a3c06c043f09bb5d7d", size = 11375717, upload-time = "2026-05-11T18:54:14.539Z" }, + { url = "https://files.pythonhosted.org/packages/31/89/8fc1c268969fac43688d65fd92e67df24bd128d53cb4d2eee534cd307399/pandas-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c39be2d709d01fa972a0cabc522389fceca4f3969332ba25a7d6c5802cf976a", size = 11828897, upload-time = "2026-05-11T18:54:17.146Z" }, + { url = "https://files.pythonhosted.org/packages/56/3b/e7d20dea247a3e6dc0bd8a6953854afbedc03951def4e7371e05e7263e25/pandas-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4db8c527972a821cf5286b40ccc57642a39bc62e62022b42f99f8a67fca8c3a1", size = 10900855, upload-time = "2026-05-11T18:54:19.72Z" }, + { url = "https://files.pythonhosted.org/packages/0f/54/68a0978d1ef8502b8492099beaa6e7a0c1b32e3b5d4f677f5810cb08711c/pandas-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b2c95f8bfc1ee412bf482605d7bfd30c12d1d26bd59fdd91efeef1d4718decb1", size = 9466464, upload-time = "2026-05-11T18:54:22.754Z" }, ] [[package]] name = "parso" -version = "0.8.6" +version = "0.8.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/81/76/a1e769043c0c0c9fe391b702539d594731a4362334cdf4dc25d0c09761e7/parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd", size = 401621, upload-time = "2026-02-09T15:45:24.425Z" } +sdist = { url = "https://files.pythonhosted.org/packages/30/4b/90c937815137d43ce71ba043cd3566221e9df6b9c805f24b5d138c9d40a7/parso-0.8.7.tar.gz", hash = "sha256:eaaac4c9fdd5e9e8852dc778d2d7405897ec510f2a298071453e5e3a07914bb1", size = 401824, upload-time = "2026-05-01T23:13:02.138Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff", size = 106894, upload-time = "2026-02-09T15:45:21.391Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/8268b644392ee874ee82a635cd0df1773de230bde356c38de28e298392cc/parso-0.8.7-py2.py3-none-any.whl", hash = "sha256:a8926eb2a1b915486941fdbd31e86a4baf88fe8c210f25f2f35ecec5b574ca1c", size = 107025, upload-time = "2026-05-01T23:12:58.867Z" }, ] [[package]] @@ -1404,11 +1369,11 @@ wheels = [ [[package]] name = "platformdirs" -version = "4.9.4" +version = "4.9.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" }, + { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" }, ] [[package]] @@ -1460,6 +1425,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, ] +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + [[package]] name = "ptyprocess" version = "0.7.0" @@ -1640,16 +1633,16 @@ wheels = [ [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] [[package]] name = "pytest" -version = "9.0.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1660,9 +1653,9 @@ dependencies = [ { name = "pygments" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] @@ -1706,15 +1699,15 @@ wheels = [ [[package]] name = "python-discovery" -version = "1.2.0" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9c/90/bcce6b46823c9bec1757c964dc37ed332579be512e17a30e9698095dcae4/python_discovery-1.2.0.tar.gz", hash = "sha256:7d33e350704818b09e3da2bd419d37e21e7c30db6e0977bb438916e06b41b5b1", size = 58055, upload-time = "2026-03-19T01:43:08.248Z" } +sdist = { url = "https://files.pythonhosted.org/packages/48/60/e88788207d81e46362cfbef0d4aaf4c0f49efc3c12d4c3fa3f542c34ebec/python_discovery-1.3.1.tar.gz", hash = "sha256:62f6db28064c9613e7ca76cb3f00c38c839a07c31c00dfe7ed0986493d2150a6", size = 68011, upload-time = "2026-05-12T20:53:36.336Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/3c/2005227cb951df502412de2fa781f800663cccbef8d90ec6f1b371ac2c0d/python_discovery-1.2.0-py3-none-any.whl", hash = "sha256:1e108f1bbe2ed0ef089823d28805d5ad32be8e734b86a5f212bf89b71c266e4a", size = 31524, upload-time = "2026-03-19T01:43:07.045Z" }, + { url = "https://files.pythonhosted.org/packages/b7/6f/a05a317a66fee0aad270011461f1a63a453ed12471249f172f7d2e2bc7b4/python_discovery-1.3.1-py3-none-any.whl", hash = "sha256:ed188687ebb3b82c01a17cd5ac62fc94d9f6487a7f1a0f9dfe89753fec91039c", size = 33185, upload-time = "2026-05-12T20:53:34.969Z" }, ] [[package]] @@ -1728,11 +1721,11 @@ wheels = [ [[package]] name = "pytz" -version = "2026.1.post1" +version = "2026.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/46/dd499ec9038423421951e4fad73051febaa13d2df82b4064f87af8b8c0c3/pytz-2026.2.tar.gz", hash = "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a", size = 320861, upload-time = "2026-05-04T01:35:29.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" }, + { url = "https://files.pythonhosted.org/packages/ec/dd/96da98f892250475bdf2328112d7468abdd4acc7b902b6af23f4ed958ea0/pytz-2026.2-py2.py3-none-any.whl", hash = "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", size = 510141, upload-time = "2026-05-04T01:35:27.408Z" }, ] [[package]] @@ -1801,7 +1794,7 @@ wheels = [ [[package]] name = "requests" -version = "2.33.0" +version = "2.34.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "certifi" }, @@ -1809,9 +1802,9 @@ dependencies = [ { name = "idna" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/34/64/8860370b167a9721e8956ae116825caff829224fbca0ca6e7bf8ddef8430/requests-2.33.0.tar.gz", hash = "sha256:c7ebc5e8b0f21837386ad0e1c8fe8b829fa5f544d8df3b2253bff14ef29d7652", size = 134232, upload-time = "2026-03-25T15:10:41.586Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/c3/e2a2b89f2d3e2179abd6d00ebd70bff6273f37fb3e0cc209f48b39d00cbf/requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed", size = 142856, upload-time = "2026-05-14T19:25:27.735Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" }, + { url = "https://files.pythonhosted.org/packages/a0/f4/c67b0b3f1b9245e8d266f0f112c500d50e5b4e83cb6f3b71b6528104182a/requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", size = 73075, upload-time = "2026-05-14T19:25:26.443Z" }, ] [[package]] @@ -1987,56 +1980,56 @@ wheels = [ [[package]] name = "tomli" -version = "2.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" }, - { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" }, - { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" }, - { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" }, - { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" }, - { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" }, - { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" }, - { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" }, - { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" }, - { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" }, - { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" }, - { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" }, - { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" }, - { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" }, - { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" }, - { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" }, - { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" }, - { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" }, - { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" }, - { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" }, - { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" }, - { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" }, - { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" }, - { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" }, - { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" }, - { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" }, - { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" }, - { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" }, - { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" }, - { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" }, - { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" }, - { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" }, - { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" }, - { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" }, - { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" }, - { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" }, - { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" }, - { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" }, - { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" }, - { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" }, - { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" }, - { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, +version = "2.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543, upload-time = "2026-03-25T20:22:03.828Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/11/db3d5885d8528263d8adc260bb2d28ebf1270b96e98f0e0268d32b8d9900/tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30", size = 154704, upload-time = "2026-03-25T20:21:10.473Z" }, + { url = "https://files.pythonhosted.org/packages/6d/f7/675db52c7e46064a9aa928885a9b20f4124ecb9bc2e1ce74c9106648d202/tomli-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a", size = 149454, upload-time = "2026-03-25T20:21:12.036Z" }, + { url = "https://files.pythonhosted.org/packages/61/71/81c50943cf953efa35bce7646caab3cf457a7d8c030b27cfb40d7235f9ee/tomli-2.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076", size = 237561, upload-time = "2026-03-25T20:21:13.098Z" }, + { url = "https://files.pythonhosted.org/packages/48/c1/f41d9cb618acccca7df82aaf682f9b49013c9397212cb9f53219e3abac37/tomli-2.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9", size = 243824, upload-time = "2026-03-25T20:21:14.569Z" }, + { url = "https://files.pythonhosted.org/packages/22/e4/5a816ecdd1f8ca51fb756ef684b90f2780afc52fc67f987e3c61d800a46d/tomli-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c", size = 242227, upload-time = "2026-03-25T20:21:15.712Z" }, + { url = "https://files.pythonhosted.org/packages/6b/49/2b2a0ef529aa6eec245d25f0c703e020a73955ad7edf73e7f54ddc608aa5/tomli-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc", size = 247859, upload-time = "2026-03-25T20:21:17.001Z" }, + { url = "https://files.pythonhosted.org/packages/83/bd/6c1a630eaca337e1e78c5903104f831bda934c426f9231429396ce3c3467/tomli-2.4.1-cp311-cp311-win32.whl", hash = "sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049", size = 97204, upload-time = "2026-03-25T20:21:18.079Z" }, + { url = "https://files.pythonhosted.org/packages/42/59/71461df1a885647e10b6bb7802d0b8e66480c61f3f43079e0dcd315b3954/tomli-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e", size = 108084, upload-time = "2026-03-25T20:21:18.978Z" }, + { url = "https://files.pythonhosted.org/packages/b8/83/dceca96142499c069475b790e7913b1044c1a4337e700751f48ed723f883/tomli-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece", size = 95285, upload-time = "2026-03-25T20:21:20.309Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ba/42f134a3fe2b370f555f44b1d72feebb94debcab01676bf918d0cb70e9aa/tomli-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a", size = 155924, upload-time = "2026-03-25T20:21:21.626Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c7/62d7a17c26487ade21c5422b646110f2162f1fcc95980ef7f63e73c68f14/tomli-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085", size = 150018, upload-time = "2026-03-25T20:21:23.002Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/79d13d7c15f13bdef410bdd49a6485b1c37d28968314eabee452c22a7fda/tomli-2.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9", size = 244948, upload-time = "2026-03-25T20:21:24.04Z" }, + { url = "https://files.pythonhosted.org/packages/10/90/d62ce007a1c80d0b2c93e02cab211224756240884751b94ca72df8a875ca/tomli-2.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5", size = 253341, upload-time = "2026-03-25T20:21:25.177Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7e/caf6496d60152ad4ed09282c1885cca4eea150bfd007da84aea07bcc0a3e/tomli-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585", size = 248159, upload-time = "2026-03-25T20:21:26.364Z" }, + { url = "https://files.pythonhosted.org/packages/99/e7/c6f69c3120de34bbd882c6fba7975f3d7a746e9218e56ab46a1bc4b42552/tomli-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1", size = 253290, upload-time = "2026-03-25T20:21:27.46Z" }, + { url = "https://files.pythonhosted.org/packages/d6/2f/4a3c322f22c5c66c4b836ec58211641a4067364f5dcdd7b974b4c5da300c/tomli-2.4.1-cp312-cp312-win32.whl", hash = "sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917", size = 98141, upload-time = "2026-03-25T20:21:28.492Z" }, + { url = "https://files.pythonhosted.org/packages/24/22/4daacd05391b92c55759d55eaee21e1dfaea86ce5c571f10083360adf534/tomli-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9", size = 108847, upload-time = "2026-03-25T20:21:29.386Z" }, + { url = "https://files.pythonhosted.org/packages/68/fd/70e768887666ddd9e9f5d85129e84910f2db2796f9096aa02b721a53098d/tomli-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257", size = 95088, upload-time = "2026-03-25T20:21:30.677Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/b823a7e818c756d9a7123ba2cda7d07bc2dd32835648d1a7b7b7a05d848d/tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54", size = 155866, upload-time = "2026-03-25T20:21:31.65Z" }, + { url = "https://files.pythonhosted.org/packages/14/6f/12645cf7f08e1a20c7eb8c297c6f11d31c1b50f316a7e7e1e1de6e2e7b7e/tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a", size = 149887, upload-time = "2026-03-25T20:21:33.028Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e0/90637574e5e7212c09099c67ad349b04ec4d6020324539297b634a0192b0/tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897", size = 243704, upload-time = "2026-03-25T20:21:34.51Z" }, + { url = "https://files.pythonhosted.org/packages/10/8f/d3ddb16c5a4befdf31a23307f72828686ab2096f068eaf56631e136c1fdd/tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f", size = 251628, upload-time = "2026-03-25T20:21:36.012Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f1/dbeeb9116715abee2485bf0a12d07a8f31af94d71608c171c45f64c0469d/tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d", size = 247180, upload-time = "2026-03-25T20:21:37.136Z" }, + { url = "https://files.pythonhosted.org/packages/d3/74/16336ffd19ed4da28a70959f92f506233bd7cfc2332b20bdb01591e8b1d1/tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5", size = 251674, upload-time = "2026-03-25T20:21:38.298Z" }, + { url = "https://files.pythonhosted.org/packages/16/f9/229fa3434c590ddf6c0aa9af64d3af4b752540686cace29e6281e3458469/tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd", size = 97976, upload-time = "2026-03-25T20:21:39.316Z" }, + { url = "https://files.pythonhosted.org/packages/6a/1e/71dfd96bcc1c775420cb8befe7a9d35f2e5b1309798f009dca17b7708c1e/tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36", size = 108755, upload-time = "2026-03-25T20:21:40.248Z" }, + { url = "https://files.pythonhosted.org/packages/83/7a/d34f422a021d62420b78f5c538e5b102f62bea616d1d75a13f0a88acb04a/tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd", size = 95265, upload-time = "2026-03-25T20:21:41.219Z" }, + { url = "https://files.pythonhosted.org/packages/3c/fb/9a5c8d27dbab540869f7c1f8eb0abb3244189ce780ba9cd73f3770662072/tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf", size = 155726, upload-time = "2026-03-25T20:21:42.23Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/d2f816630cc771ad836af54f5001f47a6f611d2d39535364f148b6a92d6b/tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac", size = 149859, upload-time = "2026-03-25T20:21:43.386Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/66341bdb858ad9bd0ceab5a86f90eddab127cf8b046418009f2125630ecb/tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662", size = 244713, upload-time = "2026-03-25T20:21:44.474Z" }, + { url = "https://files.pythonhosted.org/packages/df/6d/c5fad00d82b3c7a3ab6189bd4b10e60466f22cfe8a08a9394185c8a8111c/tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853", size = 252084, upload-time = "2026-03-25T20:21:45.62Z" }, + { url = "https://files.pythonhosted.org/packages/00/71/3a69e86f3eafe8c7a59d008d245888051005bd657760e96d5fbfb0b740c2/tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15", size = 247973, upload-time = "2026-03-25T20:21:46.937Z" }, + { url = "https://files.pythonhosted.org/packages/67/50/361e986652847fec4bd5e4a0208752fbe64689c603c7ae5ea7cb16b1c0ca/tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba", size = 256223, upload-time = "2026-03-25T20:21:48.467Z" }, + { url = "https://files.pythonhosted.org/packages/8c/9a/b4173689a9203472e5467217e0154b00e260621caa227b6fa01feab16998/tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6", size = 98973, upload-time = "2026-03-25T20:21:49.526Z" }, + { url = "https://files.pythonhosted.org/packages/14/58/640ac93bf230cd27d002462c9af0d837779f8773bc03dee06b5835208214/tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7", size = 109082, upload-time = "2026-03-25T20:21:50.506Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2f/702d5e05b227401c1068f0d386d79a589bb12bf64c3d2c72ce0631e3bc49/tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232", size = 96490, upload-time = "2026-03-25T20:21:51.474Z" }, + { url = "https://files.pythonhosted.org/packages/45/4b/b877b05c8ba62927d9865dd980e34a755de541eb65fffba52b4cc495d4d2/tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4", size = 164263, upload-time = "2026-03-25T20:21:52.543Z" }, + { url = "https://files.pythonhosted.org/packages/24/79/6ab420d37a270b89f7195dec5448f79400d9e9c1826df982f3f8e97b24fd/tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c", size = 160736, upload-time = "2026-03-25T20:21:53.674Z" }, + { url = "https://files.pythonhosted.org/packages/02/e0/3630057d8eb170310785723ed5adcdfb7d50cb7e6455f85ba8a3deed642b/tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d", size = 270717, upload-time = "2026-03-25T20:21:55.129Z" }, + { url = "https://files.pythonhosted.org/packages/7a/b4/1613716072e544d1a7891f548d8f9ec6ce2faf42ca65acae01d76ea06bb0/tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41", size = 278461, upload-time = "2026-03-25T20:21:56.228Z" }, + { url = "https://files.pythonhosted.org/packages/05/38/30f541baf6a3f6df77b3df16b01ba319221389e2da59427e221ef417ac0c/tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c", size = 274855, upload-time = "2026-03-25T20:21:57.653Z" }, + { url = "https://files.pythonhosted.org/packages/77/a3/ec9dd4fd2c38e98de34223b995a3b34813e6bdadf86c75314c928350ed14/tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f", size = 283144, upload-time = "2026-03-25T20:21:59.089Z" }, + { url = "https://files.pythonhosted.org/packages/ef/be/605a6261cac79fba2ec0c9827e986e00323a1945700969b8ee0b30d85453/tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8", size = 108683, upload-time = "2026-03-25T20:22:00.214Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/da524626d3b9cc40c168a13da8335fe1c51be12c0a63685cc6db7308daae/tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26", size = 121196, upload-time = "2026-03-25T20:22:01.169Z" }, + { url = "https://files.pythonhosted.org/packages/5a/cd/e80b62269fc78fc36c9af5a6b89c835baa8af28ff5ad28c7028d60860320/tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396", size = 100393, upload-time = "2026-03-25T20:22:02.137Z" }, + { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" }, ] [[package]] @@ -2050,11 +2043,11 @@ wheels = [ [[package]] name = "traitlets" -version = "5.14.3" +version = "5.15.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/22/40f55b26baeab80c2d7b3f1db0682f8954e4617fee7d90ce634022ef05c6/traitlets-5.15.0.tar.gz", hash = "sha256:4fead733f81cf1c4c938e06f8ca4633896833c9d89eff878159457f4d4392971", size = 163197, upload-time = "2026-05-06T08:05:58.016Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, + { url = "https://files.pythonhosted.org/packages/da/98/a9937a969d018a23badfea0b381f66783649d48e0ea6c41923265c3cbeb3/traitlets-5.15.0-py3-none-any.whl", hash = "sha256:fb36a18867a6803deab09f3c5e0fa81bb7b26a5c9e82501c9933f759166eff40", size = 85877, upload-time = "2026-05-06T08:05:55.853Z" }, ] [[package]] @@ -2080,20 +2073,20 @@ wheels = [ [[package]] name = "tzdata" -version = "2025.3" +version = "2026.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/19/1b9b0e29f30c6d35cb345486df41110984ea67ae69dddbc0e8a100999493/tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", size = 198254, upload-time = "2026-04-24T15:22:08.651Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e4/dccd7f47c4b64213ac01ef921a1337ee6e30e8c6466046018326977efd95/tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7", size = 349321, upload-time = "2026-04-24T15:22:05.876Z" }, ] [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, + { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" }, ] [[package]] @@ -2107,7 +2100,7 @@ wheels = [ [[package]] name = "virtualenv" -version = "21.2.0" +version = "21.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, @@ -2116,16 +2109,16 @@ dependencies = [ { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/aa/92/58199fe10049f9703c2666e809c4f686c54ef0a68b0f6afccf518c0b1eb9/virtualenv-21.2.0.tar.gz", hash = "sha256:1720dc3a62ef5b443092e3f499228599045d7fea4c79199770499df8becf9098", size = 5840618, upload-time = "2026-03-09T17:24:38.013Z" } +sdist = { url = "https://files.pythonhosted.org/packages/15/ba/1f6e8c957e4932be060dcdc482d339c12e0216351478add3645cdaa53c05/virtualenv-21.3.3.tar.gz", hash = "sha256:f5bda277e553b1c2b3c1a8debfc30496e1288cc93ce6b7b71b3280047e317328", size = 7613784, upload-time = "2026-05-13T18:01:30.19Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl", hash = "sha256:1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f", size = 5825084, upload-time = "2026-03-09T17:24:35.378Z" }, + { url = "https://files.pythonhosted.org/packages/f4/34/a9dbe051de88a63eb7408ea66630bac38e72f7f6077d4be58737106860d9/virtualenv-21.3.3-py3-none-any.whl", hash = "sha256:7d5987d8369e098e41406efb780a3d4ca79280097293899e351a6407ee153ab3", size = 7594554, upload-time = "2026-05-13T18:01:27.815Z" }, ] [[package]] name = "wcwidth" -version = "0.6.0" +version = "0.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/35/a2/8e3becb46433538a38726c948d3399905a4c7cabd0df578ede5dc51f0ec2/wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159", size = 159684, upload-time = "2026-02-06T19:19:40.919Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/ee/afaf0f85a9a18fe47a67f1e4422ed6cf1fe642f0ae0a2f81166231303c52/wcwidth-0.7.0.tar.gz", hash = "sha256:90e3a7ea092341c44b99562e75d09e4d5160fe7a3974c6fb842a101a95e7eed0", size = 182132, upload-time = "2026-05-02T16:04:12.653Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, + { url = "https://files.pythonhosted.org/packages/41/52/e465037f5375f43533d1a80b6923955201596a99142ed524d77b571a1418/wcwidth-0.7.0-py3-none-any.whl", hash = "sha256:5d69154c429a82910e241c738cd0e2976fac8a2dd47a1a805f4afed1c0f136f2", size = 110825, upload-time = "2026-05-02T16:04:11.033Z" }, ] From 8c3873a004ff9e3978bcb994fe6a02a52a328217 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 15 May 2026 11:00:08 -0700 Subject: [PATCH 19/23] updating github actions --- .github/workflows/main.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 0a751d29..ac007896 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -15,10 +15,10 @@ jobs: architecture: ['x64'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} architecture: ${{ matrix.architecture }} @@ -50,7 +50,7 @@ jobs: - name: Upload coverage if: ${{ matrix.python-version == '3.14' }} - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} file: coverage.xml From 83e9e61a06f567c9406da13d66a3a187c6ac2c6a Mon Sep 17 00:00:00 2001 From: AI Agent Date: Fri, 15 May 2026 11:40:33 -0700 Subject: [PATCH 20/23] ignore non-deterministic worker stats in test_cache_deeply_nested_a2 Multiprocessing added WORKER * keys to diff.get_stats() output, but those counts vary across runs. Filter them out before comparing to expected_stats so the slow test stays deterministic on 3.14 (the only matrix entry that runs --runslow). --- tests/test_cache.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_cache.py b/tests/test_cache.py index b4545ebe..c070c7dd 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -46,7 +46,9 @@ def test_cache_deeply_nested_a2(self, nested_a_t1, nested_a_t2, nested_a_result) "MAX PASS LIMIT REACHED": False, "MAX DIFF LIMIT REACHED": False, } - assert not DeepDiff(expected_stats, stats, use_log_scale=True) + # Worker-prefixed keys come from multiprocessing and are non-deterministic. + filtered_stats = {k: v for k, v in stats.items() if not k.startswith("WORKER ")} + assert not DeepDiff(expected_stats, filtered_stats, use_log_scale=True) assert nested_a_result == diff diff_of_diff = DeepDiff(nested_a_result, diff.to_dict(), ignore_order=False) assert not diff_of_diff From f40f163a75fc143175d5abea912bd98b7855c25b Mon Sep 17 00:00:00 2001 From: AI Agent Date: Fri, 15 May 2026 11:52:16 -0700 Subject: [PATCH 21/23] memoize GlobPathMatcher to remove exponential cliff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wildcard matcher previously made ~280k recursive calls on a pattern like root[**][**][**][**][**]['x'] against a 20-deep target — the ** backtracking explored overlapping (pi, ti) states repeatedly. Add a per-call memo dict keyed on (pi, ti) to both _match_segments and _could_match_descendant, bounding work to O(len(pattern) * len(target)). Same input now uses ~940 calls. Also collapse match_or_is_descendant from an O(len(target)) loop over every prefix into a single _match_segments pass with allow_extra_target=True — equivalent to 'pattern matches some prefix of target'. Switch sentinel comparisons from == to is (the wildcards are module-level singletons; saves the _WildcardToken.__eq__ call on the hot path). Add two regression tests asserting bounded call counts so this can't silently regress. --- deepdiff/path.py | 107 +++++++++++++++++++++------------------ tests/test_glob_paths.py | 43 ++++++++++++++++ 2 files changed, 102 insertions(+), 48 deletions(-) diff --git a/deepdiff/path.py b/deepdiff/path.py index e710a101..3854b6bf 100644 --- a/deepdiff/path.py +++ b/deepdiff/path.py @@ -389,9 +389,8 @@ def __init__(self, pattern_path): def match(self, path_string): """Return True if *path_string* matches this pattern exactly.""" - elements = _path_to_elements(path_string, root_element=('root', GETATTR)) - target = elements[1:] - return self._match_segments(self._pattern, target, 0, 0) + target = _path_to_elements(path_string, root_element=('root', GETATTR))[1:] + return self._match_segments(target, 0, 0, {}, allow_extra_target=False) def match_or_is_ancestor(self, path_string): """Return True if *path_string* matches OR is an ancestor of a potential match. @@ -399,79 +398,91 @@ def match_or_is_ancestor(self, path_string): This is needed for ``include_paths``: we must not prune a path that could lead to a matching descendant. """ - elements = _path_to_elements(path_string, root_element=('root', GETATTR)) - target = elements[1:] - return (self._match_segments(self._pattern, target, 0, 0) or - self._could_match_descendant(self._pattern, target, 0, 0)) + target = _path_to_elements(path_string, root_element=('root', GETATTR))[1:] + memo = {} + return (self._match_segments(target, 0, 0, memo, allow_extra_target=False) + or self._could_match_descendant(target, 0, 0, {})) def match_or_is_descendant(self, path_string): """Return True if *path_string* matches OR is a descendant of a matching path. - This checks whether the pattern matches any prefix of *path_string*, - meaning the path is "inside" a matched subtree. + Equivalent to: the pattern matches some prefix of *path_string*. """ - elements = _path_to_elements(path_string, root_element=('root', GETATTR)) - target = elements[1:] - # Check exact match first - if self._match_segments(self._pattern, target, 0, 0): - return True - # Check if any prefix of target matches (making this path a descendant) - for length in range(len(target)): - if self._match_segments(self._pattern, target[:length], 0, 0): - return True - return False - - @staticmethod - def _match_segments(pattern, target, pi, ti): - """Recursive segment matcher with backtracking for ``**``.""" - while pi < len(pattern) and ti < len(target): - pat_elem = pattern[pi][0] + target = _path_to_elements(path_string, root_element=('root', GETATTR))[1:] + return self._match_segments(target, 0, 0, {}, allow_extra_target=True) + + def _match_segments(self, target, pi, ti, memo, allow_extra_target): + """Recursive segment matcher with backtracking for ``**``. - if pat_elem == MULTI_WILDCARD: + ``memo`` is a per-top-level-call dict keyed by ``(pi, ti)`` so each + state is computed at most once — turns the worst case from + exponential to ``O(len(pattern) * len(target))``. + """ + key = (pi, ti) + if key in memo: + return memo[key] + pattern = self._pattern + target_len = len(target) + pattern_len = len(pattern) + + while pi < pattern_len and ti < target_len: + pat_elem = pattern[pi][0] + if pat_elem is MULTI_WILDCARD: # ** matches zero or more segments — try every suffix - for k in range(ti, len(target) + 1): - if GlobPathMatcher._match_segments(pattern, target, pi + 1, k): + for k in range(ti, target_len + 1): + if self._match_segments(target, pi + 1, k, memo, allow_extra_target): + memo[key] = True return True + memo[key] = False return False - elif pat_elem == SINGLE_WILDCARD: - # * matches exactly one segment regardless of value/action + elif pat_elem is SINGLE_WILDCARD: pi += 1 ti += 1 else: - tgt_elem = target[ti][0] - if pat_elem != tgt_elem: + if pat_elem != target[ti][0]: + memo[key] = False return False pi += 1 ti += 1 # Consume any trailing ** (they can match zero segments) - while pi < len(pattern) and pattern[pi][0] == MULTI_WILDCARD: + while pi < pattern_len and pattern[pi][0] is MULTI_WILDCARD: pi += 1 - return pi == len(pattern) and ti == len(target) + if allow_extra_target: + result = pi == pattern_len + else: + result = pi == pattern_len and ti == target_len + memo[key] = result + return result - @staticmethod - def _could_match_descendant(pattern, target, pi, ti): + def _could_match_descendant(self, target, pi, ti, memo): """Check if *target* is a prefix that could lead to a match deeper down.""" + key = (pi, ti) + if key in memo: + return memo[key] + pattern = self._pattern if ti == len(target): - # Target exhausted — it's an ancestor if pattern has remaining segments - return pi < len(pattern) - + result = pi < len(pattern) + memo[key] = result + return result if pi >= len(pattern): + memo[key] = False return False pat_elem = pattern[pi][0] - - if pat_elem == MULTI_WILDCARD: - return (GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti) or - GlobPathMatcher._could_match_descendant(pattern, target, pi, ti + 1)) - elif pat_elem == SINGLE_WILDCARD: - return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + if pat_elem is MULTI_WILDCARD: + result = (self._could_match_descendant(target, pi + 1, ti, memo) + or self._could_match_descendant(target, pi, ti + 1, memo)) + elif pat_elem is SINGLE_WILDCARD: + result = self._could_match_descendant(target, pi + 1, ti + 1, memo) else: - tgt_elem = target[ti][0] - if pat_elem != tgt_elem: + if pat_elem != target[ti][0]: + memo[key] = False return False - return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + result = self._could_match_descendant(target, pi + 1, ti + 1, memo) + memo[key] = result + return result def compile_glob_paths(paths): diff --git a/tests/test_glob_paths.py b/tests/test_glob_paths.py index d97ead2b..6c128083 100644 --- a/tests/test_glob_paths.py +++ b/tests/test_glob_paths.py @@ -717,3 +717,46 @@ def test_mixed_exact_include_and_glob_include(self): assert "root['b']['x']" in changed # root['b']['y'] is NOT covered by either assert "root['b']['y']" not in changed + + +class TestGlobMatcherPerformance: + """Guards against exponential blowup in the glob matcher. + + Memoization caps the recursion at O(len(pattern) * len(target)) states. + Without it, patterns with multiple ``**`` and long targets exploded into + hundreds of thousands of recursive calls. + """ + + def _instrument(self, monkeypatch): + counts = {'ms': 0, 'cmd': 0} + orig_ms = GlobPathMatcher._match_segments + orig_cmd = GlobPathMatcher._could_match_descendant + + def wrap_ms(self, *a, **k): + counts['ms'] += 1 + return orig_ms(self, *a, **k) + + def wrap_cmd(self, *a, **k): + counts['cmd'] += 1 + return orig_cmd(self, *a, **k) + + monkeypatch.setattr(GlobPathMatcher, '_match_segments', wrap_ms) + monkeypatch.setattr(GlobPathMatcher, '_could_match_descendant', wrap_cmd) + return counts + + def test_match_or_is_descendant_bounded(self, monkeypatch): + # Pre-memoization this exact case made ~280k recursive calls. + counts = self._instrument(monkeypatch) + m = GlobPathMatcher('root' + '[**]' * 5 + "['x']") + target = 'root' + ''.join(f'[{i}]' for i in range(20)) + m.match_or_is_descendant(target) + assert counts['ms'] < 5000, f"_match_segments call count regressed: {counts['ms']}" + + def test_match_or_is_ancestor_bounded(self, monkeypatch): + counts = self._instrument(monkeypatch) + m = GlobPathMatcher('root' + '[**]' * 8) + target = 'root' + ''.join(f'[{i}]' for i in range(40)) + "['extra']" + m.match_or_is_ancestor(target) + assert counts['ms'] + counts['cmd'] < 5000, ( + f"call count regressed: ms={counts['ms']}, cmd={counts['cmd']}" + ) From 1fc129fe36c6d337b0c92ab62c52a35144ca3b81 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 15 May 2026 13:12:36 -0700 Subject: [PATCH 22/23] updating docs --- CHANGELOG.md | 14 +++++++++++++ README.md | 34 ++++++++++++++----------------- deepdiff/docstrings/changelog.rst | 13 ++++++++++++ 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab48fc76..46217041 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # DeepDiff Change log +- v9-1-0 + - Added multiprocessing support for DeepDiff: parallel distance computation and parallel subtree diffing with aggregated worker stats, deterministic ordering, and automatic fallback to serial when unsafe (e.g. `custom_operators`, `*_obj_callback`, `ignore_order_func`) + - Added wildcard/glob pattern support for `exclude_paths` and `include_paths` thanks to [akshat62](https://github.com/akshat62) + - Reimplemented internal cache for improved performance + - Memoized `GlobPathMatcher` to remove exponential-time matching cliff + - Comprehensive type-hint corrections across `deephash.py`, `helper.py`, `delta.py`, `diff.py`, `distance.py`, `path.py`, and `serialization.py` (also fixed real bugs: misplaced paren in `path._guess_type` call, and `len(other.indexes > 1)` → `len(other.indexes) > 1` in `diff._compare_in_order`) + - Security: Delta dunder-attribute traversal in `check_elem()` now raises immediately instead of going through `_raise_or_log()`, with full-path preflight validation in `_get_elements_and_details()` so the `set_item_added` path cannot silently skip malicious dunder paths + - Fixed nested NamedTuple set/frozenset Delta updates dropping the outer container + - Fixed tuple Deltas using iterable opcodes silently doing nothing for insert/delete-only changes + - Fixed Delta with both moved and added iterable items mutating the Delta's own internal diff data + - Fixed crash during path sorting when removing multiple dictionary items with complex keys + - Packaging: added missing files to sdist and removed obsolete `MANIFEST.in` thanks to [mgorny](https://github.com/mgorny) + - Updated GitHub Actions workflows and dependencies + - v9-0-0 - migration note: - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. diff --git a/README.md b/README.md index bb98b0b5..3ccdb4a0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 9.0.0 +# DeepDiff v 9.1.0 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -21,29 +21,25 @@ Tested on Python 3.10+ and PyPy3. -- **[Documentation](https://zepworks.com/deepdiff/9.0.0/)** +- **[Documentation](https://zepworks.com/deepdiff/9.1.0/)** ## What is new? Please check the [ChangeLog](CHANGELOG.md) file for the detailed information. -DeepDiff 9-0-0 -- migration note: - - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. -- Dropping support for Python 3.9 -- Support for python 3.14 -- Added support for callable `group_by` thanks to @echan5 -- Added `FlatDeltaDict` TypedDict for `to_flat_dicts` return type -- Fixed colored view display when all list items are removed thanks to @yannrouillard -- Fixed `hasattr()` swallowing `AttributeError` in `__slots__` handling for objects with `__getattr__` thanks to @tpvasconcelos -- Fixed `ignore_order=True` missing int-vs-float type changes -- Fixed Delta producing phantom entries when items both move and change values with `iterable_compare_func` thanks to @devin13cox -- Fixed `_convert_oversized_ints` failing on NamedTuples -- Fixed orjson `TypeError` for integers exceeding 64-bit range -- Fixed parameter bug in `to_flat_dicts` where `include_action_in_path` and `report_type_changes` were not being passed through -- Fixed `ignore_keys` issue in `detailed__dict__` thanks to @vitalis89 -- Fixed logarithmic similarity type hint thanks to @ljames8 -- Added `Fraction` numeric support thanks to @akshat62 +DeepDiff 9-1-0 +- Added multiprocessing support for DeepDiff: parallel distance computation and parallel subtree diffing with aggregated worker stats, deterministic ordering, and automatic fallback to serial when unsafe (e.g. `custom_operators`, `*_obj_callback`, `ignore_order_func`) +- Added wildcard/glob pattern support for `exclude_paths` and `include_paths` thanks to @akshat62 +- Reimplemented internal cache for improved performance +- Memoized `GlobPathMatcher` to remove exponential-time matching cliff +- Comprehensive type-hint corrections across `deephash.py`, `helper.py`, `delta.py`, `diff.py`, `distance.py`, `path.py`, and `serialization.py` (also fixed real bugs: misplaced paren in `path._guess_type` call, and `len(other.indexes > 1)` → `len(other.indexes) > 1` in `diff._compare_in_order`) +- Security: Delta dunder-attribute traversal in `check_elem()` now raises immediately instead of going through `_raise_or_log()`, with full-path preflight validation in `_get_elements_and_details()` so the `set_item_added` path cannot silently skip malicious dunder paths +- Fixed nested NamedTuple set/frozenset Delta updates dropping the outer container +- Fixed tuple Deltas using iterable opcodes silently doing nothing for insert/delete-only changes +- Fixed Delta with both moved and added iterable items mutating the Delta's own internal diff data +- Fixed crash during path sorting when removing multiple dictionary items with complex keys +- Packaging: added missing files to sdist and removed obsolete `MANIFEST.in` thanks to @mgorny +- Updated GitHub Actions workflows and dependencies ## Installation diff --git a/deepdiff/docstrings/changelog.rst b/deepdiff/docstrings/changelog.rst index 79972c27..b29bb886 100644 --- a/deepdiff/docstrings/changelog.rst +++ b/deepdiff/docstrings/changelog.rst @@ -5,6 +5,19 @@ Changelog DeepDiff Changelog +- v9-1-0 + - Added multiprocessing support for DeepDiff: parallel distance computation and parallel subtree diffing with aggregated worker stats, deterministic ordering, and automatic fallback to serial when unsafe (e.g. ``custom_operators``, ``*_obj_callback``, ``ignore_order_func``) + - Added wildcard/glob pattern support for ``exclude_paths`` and ``include_paths`` thanks to `akshat62 `__ + - Reimplemented internal cache for improved performance + - Memoized ``GlobPathMatcher`` to remove exponential-time matching cliff + - Comprehensive type-hint corrections across ``deephash.py``, ``helper.py``, ``delta.py``, ``diff.py``, ``distance.py``, ``path.py``, and ``serialization.py`` (also fixed real bugs: misplaced paren in ``path._guess_type`` call, and ``len(other.indexes > 1)`` → ``len(other.indexes) > 1`` in ``diff._compare_in_order``) + - Security: Delta dunder-attribute traversal in ``check_elem()`` now raises immediately instead of going through ``_raise_or_log()``, with full-path preflight validation in ``_get_elements_and_details()`` so the ``set_item_added`` path cannot silently skip malicious dunder paths + - Fixed nested NamedTuple set/frozenset Delta updates dropping the outer container + - Fixed tuple Deltas using iterable opcodes silently doing nothing for insert/delete-only changes + - Fixed Delta with both moved and added iterable items mutating the Delta's own internal diff data + - Fixed crash during path sorting when removing multiple dictionary items with complex keys + - Packaging: added missing files to sdist and removed obsolete ``MANIFEST.in`` thanks to `mgorny `__ + - Updated GitHub Actions workflows and dependencies - v9-0-0 - migration note: - `to_dict()` and `to_json()` now accept a `verbose_level` parameter and always return a usable text-view dict. When the original view is `'tree'`, they default to `verbose_level=2` for full detail. The old `view_override` parameter is removed. To get the previous results, you will need to pass the explicit verbose_level to `to_json` and `to_dict` if you are using the tree view. From f4e58b5b758d6f0dcc9aba2f8bb0be498effa0db Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 15 May 2026 13:14:30 -0700 Subject: [PATCH 23/23] updating authors --- AUTHORS.md | 2 ++ deepdiff/docstrings/authors.rst | 2 ++ 2 files changed, 4 insertions(+) diff --git a/AUTHORS.md b/AUTHORS.md index aea82b37..ff3306eb 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -86,3 +86,5 @@ Authors in order of the timeline of their contributions: - [srini047](https://github.com/srini047) for fixing README typo. - [Nagato-Yuzuru](https://github.com/Nagato-Yuzuru) for colored view tests. - [akshat62](https://github.com/akshat62) for adding Fraction numeric support. +- [akshat62](https://github.com/akshat62) for adding wildcard/glob pattern support for `exclude_paths` and `include_paths`. +- [mgorny](https://github.com/mgorny) for adding missing files to sdist and removing obsolete `MANIFEST.in`. diff --git a/deepdiff/docstrings/authors.rst b/deepdiff/docstrings/authors.rst index d971b3b8..a216c78b 100644 --- a/deepdiff/docstrings/authors.rst +++ b/deepdiff/docstrings/authors.rst @@ -128,6 +128,8 @@ Authors in order of the timeline of their contributions: - `srini047 `__ for fixing README typo. - `Nagato-Yuzuru `__ for colored view tests. - `akshat62 `__ for adding Fraction numeric support. +- `akshat62 `__ for adding wildcard/glob pattern support for ``exclude_paths`` and ``include_paths``. +- `mgorny `__ for adding missing files to sdist and removing obsolete ``MANIFEST.in``. .. _Sep Dehpour (Seperman): http://www.zepworks.com