diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index 1ac35797..4c65fada 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -110,13 +110,13 @@ class CFTimeIndex(pd.Index):
else:
front_str = format_times(self.values[:REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END], display_width, offset=offset, first_row_offset=0, last_row_end=',')
end_str = format_times(self.values[-REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END:], display_width, offset=offset, first_row_offset=offset)
- datastr = '\n'.join([front_str, f'{' ' * offset}...', end_str])
+ datastr = '\n'.join([front_str, f'{" " * offset}...', end_str])
attrs_str = format_attrs(self)
full_repr_str = f'{klass_name}([{datastr}], {attrs_str})'
if len(full_repr_str) > display_width:
if len(attrs_str) >= display_width - offset:
- attrs_str = attrs_str.replace(',', f',\n{' ' * (offset - 2)}')
- full_repr_str = f'{klass_name}([{datastr}],\n{' ' * (offset - 1)}{attrs_str})'
+ attrs_str = attrs_str.replace(',', f',\n{" " * (offset - 2)}')
+ full_repr_str = f'{klass_name}([{datastr}],\n{" " * (offset - 1)}{attrs_str})'
return full_repr_str
def _partial_date_slice(self, resolution, parsed):
diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py
index f36fccdc..33a04e18 100644
--- a/xarray/core/datatree_render.py
+++ b/xarray/core/datatree_render.py
@@ -8,9 +8,23 @@ type hints.
from __future__ import annotations
from collections import namedtuple
from collections.abc import Iterable, Iterator
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
from xarray.core.datatree import DataTree
+
+T = TypeVar('T')
+
+def _last_iter(iterable: Iterable[T]) -> Iterator[tuple[T, bool]]:
+ """Iterate and generate a tuple with a flag for the last item."""
+ iterator = iter(iterable)
+ try:
+ last = next(iterator)
+ except StopIteration:
+ return
+ for item in iterator:
+ yield last, False
+ last = item
+ yield last, True
Row = namedtuple('Row', ('pre', 'fill', 'node'))
class AbstractStyle:
@@ -32,7 +46,7 @@ class AbstractStyle:
@property
def empty(self) -> str:
"""Empty string as placeholder."""
- pass
+ return ' ' * len(self.vertical)
def __repr__(self) -> str:
return f'{self.__class__.__name__}()'
@@ -146,6 +160,30 @@ class RenderDataTree:
self.childiter = childiter
self.maxlevel = maxlevel
+ def __next(self, node: DataTree, continues: tuple[bool, ...]) -> Iterator[Row]:
+ """Iterate over tree with level information."""
+ # Prepare level
+ level = len(continues)
+ if self.maxlevel is not None and level > self.maxlevel:
+ return
+
+ # Prepare prefix
+ if level == 0:
+ pre = ''
+ else:
+ pre = ''.join(self.style.vertical if cont else self.style.empty for cont in continues[:-1])
+ pre += self.style.cont if continues[-1] else self.style.end
+
+ # Yield current node
+ yield Row(pre, pre.replace(self.style.cont, self.style.vertical), node)
+
+ # Recurse for children
+ children = list(node.children.values())
+ if children:
+ children = self.childiter(children)
+ for child, is_last in _last_iter(children):
+ yield from self.__next(child, continues + (not is_last,))
+
def __iter__(self) -> Iterator[Row]:
return self.__next(self.node, tuple())
@@ -155,7 +193,7 @@ class RenderDataTree:
def __repr__(self) -> str:
classname = self.__class__.__name__
args = [repr(self.node), f'style={repr(self.style)}', f'childiter={repr(self.childiter)}']
- return f'{classname}({', '.join(args)})'
+ return f"{classname}({', '.join(args)})"
def by_attr(self, attrname: str='name') -> str:
"""
@@ -189,4 +227,7 @@ class RenderDataTree:
└── sub1C
└── sub1Ca
"""
- pass
\ No newline at end of file
+ lines = []
+ for pre, _, node in self:
+ lines.append(f"{pre}{getattr(node, attrname)}")
+ return '\n'.join(lines)
\ No newline at end of file
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 6db6fdb5..97c6bf61 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -33,7 +33,48 @@ dask_available = module_available('dask')
def _dask_or_eager_func(name, eager_module=np, dask_module='dask.array'):
"""Create a function that dispatches to dask for dask array inputs."""
- pass
+ def wrapper(*args, **kwargs):
+ if any(is_duck_dask_array(arg) for arg in args):
+ try:
+ module = import_module(dask_module)
+ except ImportError:
+ raise ImportError(f"Cannot use {name} with dask arrays without installing dask.")
+ func = getattr(module, name)
+ else:
+ func = getattr(eager_module, name)
+ return func(*args, **kwargs)
+ return wrapper
+
+def _create_nan_agg_method(name: str, coerce_strings: bool=False, invariant_0d: bool=False):
+ """Create a function that dispatches to bottleneck, numbagg, or numpy based on OPTIONS."""
+ def method(values, axis=None, skipna=None, **kwargs):
+ # For strings, skip bottleneck or numbagg and use the numpy or dask version
+ if coerce_strings and values.dtype.kind in {'U', 'S', 'O'}:
+ if skipna or (skipna is None and values.dtype.kind == 'O'):
+ values = getattr(np, name)(values, axis=axis, **kwargs)
+ else:
+ values = getattr(np, name)(values, axis=axis, **kwargs)
+ return values
+
+ if skipna or (skipna is None and values.dtype.kind not in {'b', 'i', 'u'}):
+ if is_duck_dask_array(values):
+ module = import_module('dask.array')
+ func = getattr(module, f'nan{name}')
+ return func(values, axis=axis, **kwargs)
+ else:
+ func = getattr(nputils, f'nan{name}')
+ return func(values, axis=axis, **kwargs)
+ else:
+ if is_duck_dask_array(values):
+ module = import_module('dask.array')
+ func = getattr(module, name)
+ return func(values, axis=axis, **kwargs)
+ else:
+ return getattr(np, name)(values, axis=axis, **kwargs)
+
+ method.numeric_only = False
+ method.available_min_count = False
+ return method
pandas_isnull = _dask_or_eager_func('isnull', eager_module=pd, dask_module='dask.array')
around.__doc__ = str.replace(around.__doc__ or '', 'array([0., 2.])', 'array([0., 2.])')
around.__doc__ = str.replace(around.__doc__ or '', 'array([0., 2.])', 'array([0., 2.])')
@@ -44,7 +85,20 @@ masked_invalid = _dask_or_eager_func('masked_invalid', eager_module=np.ma, dask_
def as_shared_dtype(scalars_or_arrays, xp=None):
"""Cast a arrays to a shared dtype using xarray's type promotion rules."""
- pass
+ if not scalars_or_arrays:
+ return []
+
+ if xp is None:
+ xp = np
+
+ # Convert all inputs to arrays
+ arrays = [xp.asarray(x) for x in scalars_or_arrays]
+
+ # Get the target dtype using type promotion rules
+ target_dtype = dtypes.result_type(*arrays)
+
+ # Cast all arrays to the target dtype
+ return [xp.asarray(arr, dtype=target_dtype) for arr in arrays]
def lazy_array_equiv(arr1, arr2):
"""Like array_equal, but doesn't actually compare values.
@@ -53,37 +107,134 @@ def lazy_array_equiv(arr1, arr2):
Returns None when equality cannot determined: one or both of arr1, arr2 are numpy arrays;
or their dask tokens are not equal
"""
- pass
+ if arr1 is arr2:
+ return True
+
+ if arr1 is None or arr2 is None:
+ return arr1 is None and arr2 is None
+
+ if not is_duck_array(arr1) or not is_duck_array(arr2):
+ return None
+
+ if arr1.shape != arr2.shape:
+ return False
+
+ if is_duck_dask_array(arr1) and is_duck_dask_array(arr2):
+ from dask.base import tokenize
+ return tokenize(arr1) == tokenize(arr2)
+
+ return None
def allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08):
"""Like np.allclose, but also allows values to be NaN in both arrays"""
- pass
+ arr1, arr2 = as_shared_dtype([arr1, arr2])
+
+ if arr1.shape != arr2.shape:
+ return False
+
+ if is_duck_dask_array(arr1) or is_duck_dask_array(arr2):
+ import dask.array as da
+ arr1 = da.asarray(arr1)
+ arr2 = da.asarray(arr2)
+
+ # Check if arrays are equal (including NaN)
+ equal_nan = da.isnan(arr1) & da.isnan(arr2)
+ close = da.isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True)
+ return bool(da.all(equal_nan | close).compute())
+
+ # Check if arrays are equal (including NaN)
+ equal_nan = np.isnan(arr1) & np.isnan(arr2)
+ close = np.isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True)
+ return bool(np.all(equal_nan | close))
def array_equiv(arr1, arr2):
"""Like np.array_equal, but also allows values to be NaN in both arrays"""
- pass
+ arr1, arr2 = as_shared_dtype([arr1, arr2])
+
+ if arr1.shape != arr2.shape:
+ return False
+
+ if is_duck_dask_array(arr1) or is_duck_dask_array(arr2):
+ import dask.array as da
+ arr1 = da.asarray(arr1)
+ arr2 = da.asarray(arr2)
+
+ # Check if arrays are equal (including NaN)
+ equal_nan = da.isnan(arr1) & da.isnan(arr2)
+ equal = arr1 == arr2
+ return bool(da.all(equal_nan | equal).compute())
+
+ # Check if arrays are equal (including NaN)
+ equal_nan = np.isnan(arr1) & np.isnan(arr2)
+ equal = arr1 == arr2
+ return bool(np.all(equal_nan | equal))
def array_notnull_equiv(arr1, arr2):
"""Like np.array_equal, but also allows values to be NaN in either or both
arrays
"""
- pass
+ arr1, arr2 = as_shared_dtype([arr1, arr2])
+
+ if arr1.shape != arr2.shape:
+ return False
+
+ if is_duck_dask_array(arr1) or is_duck_dask_array(arr2):
+ import dask.array as da
+ arr1 = da.asarray(arr1)
+ arr2 = da.asarray(arr2)
+
+ # Check if arrays are equal where neither is NaN
+ valid = ~(da.isnan(arr1) | da.isnan(arr2))
+ equal = arr1 == arr2
+ return bool(da.all(~valid | equal).compute())
+
+ # Check if arrays are equal where neither is NaN
+ valid = ~(np.isnan(arr1) | np.isnan(arr2))
+ equal = arr1 == arr2
+ return bool(np.all(~valid | equal))
def count(data, axis=None):
"""Count the number of non-NA in this array along the given axis or axes"""
- pass
+ if is_duck_dask_array(data):
+ import dask.array as da
+ return da.count_nonzero(~da.isnan(data), axis=axis)
+ else:
+ return np.count_nonzero(~np.isnan(data), axis=axis)
def where(condition, x, y):
"""Three argument where() with better dtype promotion rules."""
- pass
+ if is_duck_dask_array(condition) or is_duck_dask_array(x) or is_duck_dask_array(y):
+ import dask.array as da
+ return da.where(condition, x, y)
+ else:
+ x, y = as_shared_dtype([x, y])
+ return np.where(condition, x, y)
def concatenate(arrays, axis=0):
"""concatenate() with better dtype promotion rules."""
- pass
+ if not arrays:
+ return np.array([], dtype=object)
+
+ if any(is_duck_dask_array(arr) for arr in arrays):
+ import dask.array as da
+ arrays = [da.asarray(arr) for arr in arrays]
+ return da.concatenate(arrays, axis=axis)
+ else:
+ arrays = as_shared_dtype(arrays)
+ return _concatenate(arrays, axis=axis)
def stack(arrays, axis=0):
"""stack() with better dtype promotion rules."""
- pass
+ if not arrays:
+ return np.array([], dtype=object)
+
+ if any(is_duck_dask_array(arr) for arr in arrays):
+ import dask.array as da
+ arrays = [da.asarray(arr) for arr in arrays]
+ return da.stack(arrays, axis=axis)
+ else:
+ arrays = as_shared_dtype(arrays)
+ return np.stack(arrays, axis=axis)
argmax = _create_nan_agg_method('argmax', coerce_strings=True)
argmin = _create_nan_agg_method('argmin', coerce_strings=True)
max = _create_nan_agg_method('max', coerce_strings=True, invariant_0d=True)
@@ -115,7 +266,19 @@ def _datetime_nanmin(array):
- numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
- dask min() does not work on datetime64 (all versions at the moment of writing)
"""
- pass
+ if is_duck_dask_array(array):
+ import dask.array as da
+ mask = ~da.isnat(array)
+ valid = array[mask]
+ if valid.size == 0:
+ return np.datetime64('NaT')
+ return da.min(valid).compute()
+ else:
+ mask = ~isnat(array)
+ valid = array[mask]
+ if valid.size == 0:
+ return np.datetime64('NaT')
+ return valid.min()
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to numerical values.
@@ -142,7 +305,22 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
though some calendars would allow for them (e.g. no_leap). This is because there
is no `cftime.timedelta` object.
"""
- pass
+ if offset is None:
+ offset = _datetime_nanmin(array)
+
+ if is_duck_dask_array(array):
+ import dask.array as da
+ result = da.subtract(array, offset)
+ else:
+ result = array - offset
+
+ if datetime_unit is not None:
+ if isinstance(result, np.ndarray):
+ result = np_timedelta64_to_float(result, datetime_unit)
+ else:
+ result = timedelta_to_numeric(result, datetime_unit)
+
+ return result.astype(dtype)
def timedelta_to_numeric(value, datetime_unit='ns', dtype=float):
"""Convert a timedelta-like object to numerical values.
@@ -158,7 +336,17 @@ def timedelta_to_numeric(value, datetime_unit='ns', dtype=float):
The output data type.
"""
- pass
+ if isinstance(value, np.ndarray):
+ if np.issubdtype(value.dtype, np.timedelta64):
+ return np_timedelta64_to_float(value, datetime_unit)
+ else:
+ return value.astype(dtype)
+ elif isinstance(value, pd.Timedelta):
+ return pd_timedelta_to_float(value, datetime_unit)
+ elif isinstance(value, timedelta):
+ return py_timedelta_to_float(value, datetime_unit)
+ else:
+ return value
def np_timedelta64_to_float(array, datetime_unit):
"""Convert numpy.timedelta64 to float.
@@ -168,7 +356,26 @@ def np_timedelta64_to_float(array, datetime_unit):
The array is first converted to microseconds, which is less likely to
cause overflow errors.
"""
- pass
+ # Convert to microseconds first to avoid overflow errors
+ microseconds = array.astype('timedelta64[us]').astype(float)
+
+ # Convert from microseconds to target unit
+ if datetime_unit == 'us':
+ return microseconds
+ elif datetime_unit == 'ns':
+ return microseconds * 1000
+ elif datetime_unit == 'ms':
+ return microseconds / 1000
+ elif datetime_unit == 's':
+ return microseconds / 1_000_000
+ elif datetime_unit == 'm':
+ return microseconds / (60 * 1_000_000)
+ elif datetime_unit == 'h':
+ return microseconds / (3600 * 1_000_000)
+ elif datetime_unit == 'D':
+ return microseconds / (86400 * 1_000_000)
+ else:
+ raise ValueError(f"Unsupported datetime unit: {datetime_unit}")
def pd_timedelta_to_float(value, datetime_unit):
"""Convert pandas.Timedelta to float.
@@ -178,40 +385,159 @@ def pd_timedelta_to_float(value, datetime_unit):
Built on the assumption that pandas timedelta values are in nanoseconds,
which is also the numpy default resolution.
"""
- pass
+ # Convert to nanoseconds first
+ nanoseconds = float(value.value)
+
+ # Convert from nanoseconds to target unit
+ if datetime_unit == 'ns':
+ return nanoseconds
+ elif datetime_unit == 'us':
+ return nanoseconds / 1000
+ elif datetime_unit == 'ms':
+ return nanoseconds / 1_000_000
+ elif datetime_unit == 's':
+ return nanoseconds / 1_000_000_000
+ elif datetime_unit == 'm':
+ return nanoseconds / (60 * 1_000_000_000)
+ elif datetime_unit == 'h':
+ return nanoseconds / (3600 * 1_000_000_000)
+ elif datetime_unit == 'D':
+ return nanoseconds / (86400 * 1_000_000_000)
+ else:
+ raise ValueError(f"Unsupported datetime unit: {datetime_unit}")
def py_timedelta_to_float(array, datetime_unit):
"""Convert a timedelta object to a float, possibly at a loss of resolution."""
- pass
+ # Convert to total seconds first
+ total_seconds = array.total_seconds()
+
+ # Convert from seconds to target unit
+ if datetime_unit == 's':
+ return total_seconds
+ elif datetime_unit == 'ns':
+ return total_seconds * 1_000_000_000
+ elif datetime_unit == 'us':
+ return total_seconds * 1_000_000
+ elif datetime_unit == 'ms':
+ return total_seconds * 1000
+ elif datetime_unit == 'm':
+ return total_seconds / 60
+ elif datetime_unit == 'h':
+ return total_seconds / 3600
+ elif datetime_unit == 'D':
+ return total_seconds / 86400
+ else:
+ raise ValueError(f"Unsupported datetime unit: {datetime_unit}")
def mean(array, axis=None, skipna=None, **kwargs):
"""inhouse mean that can handle np.datetime64 or cftime.datetime
dtypes"""
- pass
+ if array.dtype.kind in {'M', 'm'}:
+ offset = _datetime_nanmin(array)
+ numeric_array = datetime_to_numeric(array, offset)
+ numeric_mean = _mean(numeric_array, axis=axis, skipna=skipna, **kwargs)
+
+ if is_duck_dask_array(array):
+ import dask.array as da
+ result = da.add(offset, numeric_mean)
+ else:
+ result = offset + numeric_mean
+ return result
+ else:
+ return _mean(array, axis=axis, skipna=skipna, **kwargs)
mean.numeric_only = True
def cumprod(array, axis=None, **kwargs):
"""N-dimensional version of cumprod."""
- pass
+ if axis is None:
+ array = array.ravel()
+ axis = 0
+
+ if is_duck_dask_array(array):
+ import dask.array as da
+ return da.cumprod(array, axis=axis, **kwargs)
+ else:
+ return cumprod_1d(array, axis=axis, **kwargs)
def cumsum(array, axis=None, **kwargs):
"""N-dimensional version of cumsum."""
- pass
+ if axis is None:
+ array = array.ravel()
+ axis = 0
+
+ if is_duck_dask_array(array):
+ import dask.array as da
+ return da.cumsum(array, axis=axis, **kwargs)
+ else:
+ return cumsum_1d(array, axis=axis, **kwargs)
def first(values, axis, skipna=None):
"""Return the first non-NA elements in this array along the given axis"""
- pass
+ if skipna or (skipna is None and values.dtype.kind not in {'b', 'i', 'u'}):
+ mask = ~pandas_isnull(values)
+ if mask.all():
+ # Return the first value since there are no NAs
+ return take(values, 0, axis=axis)
+ else:
+ # Find the first valid value and return it
+ first_index = argmax(mask, axis=axis)
+ return take_from_dim_n(values, first_index, axis)
+ else:
+ # No skipping NA - just return the first value
+ return take(values, 0, axis=axis)
def last(values, axis, skipna=None):
"""Return the last non-NA elements in this array along the given axis"""
- pass
+ if skipna or (skipna is None and values.dtype.kind not in {'b', 'i', 'u'}):
+ mask = ~pandas_isnull(values)
+ if mask.all():
+ # Return the last value since there are no NAs
+ return take(values, -1, axis=axis)
+ else:
+ # Find the last valid value and return it
+ last_index = values.shape[axis] - 1 - argmax(mask[::-1], axis=axis)
+ return take_from_dim_n(values, last_index, axis)
+ else:
+ # No skipping NA - just return the last value
+ return take(values, -1, axis=axis)
def least_squares(lhs, rhs, rcond=None, skipna=False):
"""Return the coefficients and residuals of a least-squares fit."""
- pass
+ if skipna:
+ # Mask out NaN values
+ mask = ~(pandas_isnull(lhs) | pandas_isnull(rhs))
+ if not mask.all():
+ lhs = lhs[mask]
+ rhs = rhs[mask]
+
+ if is_duck_dask_array(lhs) or is_duck_dask_array(rhs):
+ import dask.array as da
+ lhs = da.asarray(lhs)
+ rhs = da.asarray(rhs)
+ return da.linalg.lstsq(lhs, rhs, rcond=rcond)
+ else:
+ return np.linalg.lstsq(lhs, rhs, rcond=rcond)
+
+def astype(data, dtype, copy=True):
+ """Cast data array to dtype, properly handling dask arrays."""
+ if is_duck_dask_array(data):
+ import dask.array as da
+ return da.astype(data, dtype=dtype, copy=copy)
+ else:
+ return np.asarray(data, dtype=dtype)
def _push(array, n: int | None=None, axis: int=-1):
"""
Use either bottleneck or numbagg depending on options & what's available
"""
- pass
\ No newline at end of file
+ if is_duck_dask_array(array):
+ import dask.array as da
+ return da.roll(array, shift=n or 1, axis=axis)
+ else:
+ if OPTIONS["use_numbagg"] and module_available("numbagg"):
+ import numbagg
+ return numbagg.push(array, n=n, axis=axis)
+ elif OPTIONS["use_bottleneck"] and _BOTTLENECK_AVAILABLE:
+ return bn.push(array, n=n, axis=axis)
+ else:
+ return np.roll(array, shift=n or 1, axis=axis)
\ No newline at end of file
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index 181d6266..2d9144d0 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -26,48 +26,129 @@ if TYPE_CHECKING:
from xarray.core.datatree import DataTree
UNITS = ('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
+def _mapping_repr(mapping, title, summarizer, expand_option_name=None, col_width=None):
+ """Create a one-line summary of a mapping object."""
+ if not mapping:
+ return f"{title}:\n{EMPTY_REPR}"
+
+ if expand_option_name is not None and _get_boolean_with_default(expand_option_name):
+ summary = []
+ for k, v in mapping.items():
+ summary.append(summarizer(k, v, col_width=col_width))
+ else:
+ summary = [summarizer(k, v, col_width=col_width) for k, v in mapping.items()]
+
+ return f"{title}:\n" + "\n".join(summary)
+
def pretty_print(x, numchars: int):
"""Given an object `x`, call `str(x)` and format the returned string so
that it is numchars long, padding with trailing spaces or truncating with
ellipses as necessary
"""
- pass
+ s = str(x)
+ if len(s) > numchars:
+ return s[:(numchars - 3)] + "..."
+ else:
+ return s.ljust(numchars)
def first_n_items(array, n_desired):
"""Returns the first n_desired items of an array"""
- pass
+ if array is None:
+ return []
+ return array[0:n_desired]
def last_n_items(array, n_desired):
"""Returns the last n_desired items of an array"""
- pass
+ if array is None:
+ return []
+ return array[-n_desired:]
def last_item(array):
"""Returns the last item of an array in a list or an empty list."""
- pass
+ if array is None or len(array) == 0:
+ return []
+ return [array[-1]]
def calc_max_rows_first(max_rows: int) -> int:
"""Calculate the first rows to maintain the max number of rows."""
- pass
+ if max_rows is None:
+ return None
+ return max(1, (max_rows + 1) // 2)
def calc_max_rows_last(max_rows: int) -> int:
"""Calculate the last rows to maintain the max number of rows."""
- pass
+ if max_rows is None:
+ return None
+ return max(1, max_rows // 2)
def format_timestamp(t):
"""Cast given object to a Timestamp and return a nicely formatted string"""
- pass
+ try:
+ datetime_str = pd.Timestamp(t).isoformat()
+ try:
+ date_str, time_str = datetime_str.split('T')
+ except ValueError:
+ # catch NaT and others that don't split nicely
+ return datetime_str
+ else:
+ if time_str == '00:00:00':
+ return date_str
+ else:
+ return f'{date_str} {time_str}'
+ except OutOfBoundsDatetime:
+ return str(t)
def format_timedelta(t, timedelta_format=None):
"""Cast given object to a Timestamp and return a nicely formatted string"""
- pass
+ if timedelta_format is None:
+ timedelta_format = 'auto'
+
+ if isinstance(t, pd.Timedelta):
+ t = t.to_pytimedelta()
+
+ if timedelta_format == 'date':
+ return str(t)
+ elif timedelta_format == 'auto':
+ if t == pd.Timedelta(0):
+ return '0:00:00'
+ else:
+ total_seconds = t.total_seconds()
+ days = int(total_seconds // (24 * 3600))
+ remainder = total_seconds % (24 * 3600)
+ hours = int(remainder // 3600)
+ remainder = remainder % 3600
+ minutes = int(remainder // 60)
+ seconds = int(remainder % 60)
+ microseconds = int(t.microseconds)
+
+ if days == 0:
+ if microseconds:
+ return f'{hours:02d}:{minutes:02d}:{seconds:02d}.{microseconds:06d}'
+ else:
+ return f'{hours:02d}:{minutes:02d}:{seconds:02d}'
+ else:
+ return f'{days} days {hours:02d}:{minutes:02d}:{seconds:02d}'
+ else:
+ raise ValueError(f"Unknown timedelta_format: {timedelta_format}")
def format_item(x, timedelta_format=None, quote_strings=True):
"""Returns a succinct summary of an object as a string"""
- pass
+ if isinstance(x, (np.datetime64, datetime)):
+ return format_timestamp(x)
+ elif isinstance(x, (np.timedelta64, timedelta, pd.Timedelta)):
+ return format_timedelta(x, timedelta_format=timedelta_format)
+ elif isinstance(x, (str, bytes)):
+ return repr(x) if quote_strings else str(x)
+ elif isinstance(x, (float, np.float_)):
+ return f'{x:.4g}'
+ else:
+ return str(x)
def format_items(x):
"""Returns a succinct summaries of all items in a sequence as strings"""
- pass
+ if is_duck_array(x):
+ x = to_numpy(x)
+ return [format_item(xi) for xi in x]
def format_array_flat(array, max_width: int):
"""Return a formatted string for as many items in the flattened version of
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 80ecd157..4c21fc9a 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -38,15 +38,53 @@ def inverse_permutation(indices: np.ndarray, N: int | None=None) -> np.ndarray:
Integer indices to take from the original array to create the
permutation.
"""
- pass
+ if N is None:
+ N = len(indices)
+ result = np.arange(N)
+ result[indices] = np.arange(len(indices))
+ return result
def _is_contiguous(positions):
"""Given a non-empty list, does it consist of contiguous integers?"""
- pass
+ if not positions:
+ return True
+ return np.array_equal(positions, range(min(positions), max(positions) + 1))
def _advanced_indexer_subspaces(key):
"""Indices of the advanced indexes subspaces for mixed indexing and vindex."""
- pass
+ if not isinstance(key, tuple):
+ key = (key,)
+
+ positions = []
+ for i, k in enumerate(key):
+ if isinstance(k, (np.ndarray, list)) and isinstance(k[0], (int, np.integer)):
+ positions.append(i)
+
+ mixed_positions = []
+ vindex_positions = []
+
+ if positions:
+ # Split positions into mixed and vindex depending on whether they form
+ # contiguous spans when sorted
+ spans = []
+ start = positions[0]
+ end = positions[0]
+ for p in positions[1:]:
+ if p == end + 1:
+ end = p
+ else:
+ spans.append((start, end))
+ start = p
+ end = p
+ spans.append((start, end))
+
+ for start, end in spans:
+ if start == end:
+ vindex_positions.append(start)
+ else:
+ mixed_positions.extend(range(start, end + 1))
+
+ return mixed_positions, vindex_positions
class NumpyVIndexAdapter:
"""Object that implements indexing like vindex on a np.ndarray.
@@ -66,6 +104,23 @@ class NumpyVIndexAdapter:
"""Value must have dimensionality matching the key."""
mixed_positions, vindex_positions = _advanced_indexer_subspaces(key)
self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions)
+
+def _create_method(name: str) -> Callable:
+ """Create a method that dispatches to bottleneck or numpy based on OPTIONS."""
+ def method(values, axis=None, **kwargs):
+ if (
+ OPTIONS["use_bottleneck"]
+ and _BOTTLENECK_AVAILABLE
+ and not isinstance(values, np.ma.MaskedArray)
+ and not is_duck_array(values)
+ and not kwargs
+ ):
+ try:
+ return getattr(bn, name)(values, axis=axis)
+ except (ValueError, AttributeError):
+ pass
+ return getattr(np, name)(values, axis=axis, **kwargs)
+ return method
nanmin = _create_method('nanmin')
nanmax = _create_method('nanmax')
nanmean = _create_method('nanmean')
diff --git a/xarray/core/options.py b/xarray/core/options.py
index e1da5ab9..be112377 100644
--- a/xarray/core/options.py
+++ b/xarray/core/options.py
@@ -1,7 +1,30 @@
from __future__ import annotations
import warnings
-from typing import TYPE_CHECKING, Literal, TypedDict
+from typing import TYPE_CHECKING, Any, Literal, TypedDict
from xarray.core.utils import FrozenDict
+
+def _positive_integer(value: Any) -> bool:
+ """Validator for positive integers."""
+ return isinstance(value, int) and value > 0
+
+def _get_boolean_with_default(option: str) -> bool:
+ """Get a boolean option, interpreting 'default' as True."""
+ value = OPTIONS[option]
+ return True if value == 'default' else bool(value)
+
+def _warn_on_setting_enable_cftimeindex(value: bool) -> None:
+ """Warn if enable_cftimeindex is being set."""
+ if not value:
+ warnings.warn(
+ "Setting enable_cftimeindex=False will be deprecated in a future version of xarray.",
+ FutureWarning,
+ stacklevel=3,
+ )
+
+def _set_file_cache_maxsize(value: int) -> None:
+ """Set the file cache maxsize."""
+ from xarray.backends.file_manager import FILE_CACHE
+ FILE_CACHE.maxsize = value
if TYPE_CHECKING:
from matplotlib.colors import Colormap
Options = Literal['arithmetic_join', 'cmap_divergent', 'cmap_sequential', 'display_max_rows', 'display_values_threshold', 'display_style', 'display_width', 'display_expand_attrs', 'display_expand_coords', 'display_expand_data_vars', 'display_expand_data', 'display_expand_groups', 'display_expand_indexes', 'display_default_indexes', 'enable_cftimeindex', 'file_cache_maxsize', 'keep_attrs', 'warn_for_unclosed_files', 'use_bottleneck', 'use_numbagg', 'use_opt_einsum', 'use_flox']
@@ -187,4 +210,4 @@ def get_options():
set_options
"""
- pass
\ No newline at end of file
+ return FrozenDict(OPTIONS)
\ No newline at end of file
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 8db31fd9..e71bb2c8 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -183,6 +183,41 @@ def compat_dict_union(first_dict: Mapping[K, V], second_dict: Mapping[K, V], com
"""
pass
+class FrozenDict(Mapping[K, V]):
+ """Immutable dictionary.
+
+ Implements the Mapping interface. Items cannot be added or removed after
+ initialization.
+ """
+ __slots__ = ('_d',)
+
+ def __init__(self, *args, **kwargs):
+ self._d = dict(*args, **kwargs)
+
+ def __getitem__(self, key: K) -> V:
+ return self._d[key]
+
+ def __iter__(self) -> Iterator[K]:
+ return iter(self._d)
+
+ def __len__(self) -> int:
+ return len(self._d)
+
+ def __contains__(self, key: object) -> bool:
+ return key in self._d
+
+ def __repr__(self) -> str:
+ return f'{type(self).__name__}({self._d!r})'
+
+ def keys(self) -> KeysView[K]:
+ return self._d.keys()
+
+ def items(self) -> ItemsView[K, V]:
+ return self._d.items()
+
+ def values(self) -> ValuesView[V]:
+ return self._d.values()
+
class Frozen(Mapping[K, V]):
"""Wrapper around an object implementing the mapping interface to make it
immutable. If you really want to modify the mapping, the mutable version is
diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py
index 76b6c423..97bb39fd 100644
--- a/xarray/namedarray/pycompat.py
+++ b/xarray/namedarray/pycompat.py
@@ -57,8 +57,32 @@ _cached_duck_array_modules: dict[ModType, DuckArrayModule] = {}
def array_type(mod: ModType) -> DuckArrayTypes:
"""Quick wrapper to get the array class of the module."""
- pass
+ if mod not in _cached_duck_array_modules:
+ _cached_duck_array_modules[mod] = DuckArrayModule(mod)
+ return _cached_duck_array_modules[mod].type
def mod_version(mod: ModType) -> Version:
"""Quick wrapper to get the version of the module."""
- pass
\ No newline at end of file
+ if mod not in _cached_duck_array_modules:
+ _cached_duck_array_modules[mod] = DuckArrayModule(mod)
+ return _cached_duck_array_modules[mod].version
+
+def to_numpy(array: Any) -> np.ndarray:
+ """Convert an array-like object to a numpy array."""
+ if hasattr(array, '__array_function__'):
+ return np.asarray(array)
+ elif hasattr(array, 'to_numpy'):
+ return array.to_numpy()
+ else:
+ return np.asarray(array)
+
+def to_duck_array(array: Any) -> duckarray:
+ """Convert an array-like object to a duck array."""
+ if is_duck_array(array):
+ return array
+ else:
+ return np.asarray(array)
+
+def is_chunked_array(x: Any) -> bool:
+ """Check if an array is a chunked array (e.g., dask array)."""
+ return is_duck_dask_array(x)
\ No newline at end of file
diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
index a00ca4cb..bb8ed6c7 100644
--- a/xarray/namedarray/utils.py
+++ b/xarray/namedarray/utils.py
@@ -43,11 +43,41 @@ def module_available(module: str, minversion: str | None=None) -> bool:
available : bool
Whether the module is installed.
"""
- pass
+ try:
+ mod = importlib.import_module(module)
+ except ImportError:
+ return False
+
+ if minversion is not None:
+ try:
+ version = Version(mod.__version__)
+ except (AttributeError, ValueError):
+ return False
+ if version < Version(minversion):
+ return False
+
+ return True
+
+def is_dict_like(value: Any) -> TypeGuard[Mapping]:
+ """Check if a value behaves like a mapping.
+
+ Parameters
+ ----------
+ value : Any
+ Value to check.
+
+ Returns
+ -------
+ bool
+ True if the value behaves like a mapping.
+ """
+ return hasattr(value, 'keys') and hasattr(value, '__getitem__')
def to_0d_object_array(value: object) -> NDArray[np.object_]:
"""Given a value, wrap it in a 0-D numpy.ndarray with dtype=object."""
- pass
+ result = np.empty((), dtype=object)
+ result[()] = value
+ return result
def drop_missing_dims(supplied_dims: Iterable[_Dim], dims: Iterable[_Dim], missing_dims: ErrorOptionsWithWarn) -> _DimsLike:
"""Depending on the setting of missing_dims, drop any dimensions from supplied_dims that
@@ -59,14 +89,55 @@ def drop_missing_dims(supplied_dims: Iterable[_Dim], dims: Iterable[_Dim], missi
dims : Iterable of Hashable
missing_dims : {"raise", "warn", "ignore"}
"""
- pass
+ dims_set = set(dims)
+ supplied_dims_list = list(supplied_dims)
+ missing = set(supplied_dims_list) - dims_set
+
+ if missing:
+ if missing_dims == "raise":
+ raise ValueError(f"Dimensions {missing} not found in dims {dims_set}")
+ elif missing_dims == "warn":
+ warnings.warn(f"Dimensions {missing} not found in dims {dims_set}")
+
+ # Filter out missing dims
+ supplied_dims_list = [d for d in supplied_dims_list if d in dims_set]
+
+ return supplied_dims_list
def infix_dims(dims_supplied: Iterable[_Dim], dims_all: Iterable[_Dim], missing_dims: ErrorOptionsWithWarn='raise') -> Iterator[_Dim]:
"""
Resolves a supplied list containing an ellipsis representing other items, to
a generator with the 'realized' list of all items
"""
- pass
+ dims_supplied_list = list(dims_supplied)
+ dims_all_list = list(dims_all)
+
+ if Ellipsis not in dims_supplied_list:
+ for dim in drop_missing_dims(dims_supplied_list, dims_all_list, missing_dims):
+ yield dim
+ return
+
+ # Find the position of Ellipsis
+ ellipsis_pos = dims_supplied_list.index(Ellipsis)
+
+ # Get the dims before and after Ellipsis
+ before_ellipsis = dims_supplied_list[:ellipsis_pos]
+ after_ellipsis = dims_supplied_list[ellipsis_pos + 1:]
+
+ # Get the dims that should replace Ellipsis
+ dims_all_set = set(dims_all_list)
+ dims_specified = set(before_ellipsis + after_ellipsis)
+ ellipsis_dims = [d for d in dims_all_list if d not in dims_specified]
+
+ # Yield all dims in order
+ for dim in before_ellipsis:
+ if dim in dims_all_set:
+ yield dim
+ for dim in ellipsis_dims:
+ yield dim
+ for dim in after_ellipsis:
+ if dim in dims_all_set:
+ yield dim
class ReprObject:
"""Object that prints as the given value, for use with sentinel values."""
@@ -87,4 +158,58 @@ class ReprObject:
def __dask_tokenize__(self) -> object:
from dask.base import normalize_token
- return normalize_token((type(self), self._value))
\ No newline at end of file
+ return normalize_token((type(self), self._value))
+
+def is_dask_collection(x: Any) -> TypeGuard[DaskCollection]:
+ """Test if an object is a dask collection."""
+ try:
+ from dask.base import is_dask_collection as _is_dask_collection
+ return _is_dask_collection(x)
+ except ImportError:
+ return False
+
+def is_duck_array(value: Any) -> TypeGuard[duckarray]:
+ """Check if value is a duck array."""
+ return hasattr(value, '__array_function__') or hasattr(value, '__array_namespace__')
+
+def is_duck_dask_array(value: Any) -> TypeGuard[DaskArray]:
+ """Check if value is a dask array."""
+ return is_duck_array(value) and is_dask_collection(value)
+
+def either_dict_or_kwargs(pos_kwargs: Mapping[K, V] | None, kw_kwargs: Mapping[str, V], func_name: str | None=None) -> dict[Hashable, V]:
+ """Return a single dictionary combining dict and kwargs.
+
+ If both are provided, the values in kw_kwargs take precedence.
+
+ Parameters
+ ----------
+ pos_kwargs : mapping, optional
+ A mapping object to be combined with kw_kwargs.
+ kw_kwargs : mapping
+ A mapping object to be combined with pos_kwargs.
+ func_name : str, optional
+ The name of the function being called. This is used to provide a more
+ informative error message in case of duplicated keys.
+
+ Returns
+ -------
+ dict
+ A dictionary combining the values of pos_kwargs and kw_kwargs.
+ """
+ if pos_kwargs is None:
+ return dict(kw_kwargs)
+
+ if not is_dict_like(pos_kwargs):
+ raise ValueError("the first argument must be a dictionary")
+
+ combined = dict(pos_kwargs)
+ for k, v in kw_kwargs.items():
+ if k in combined:
+ if func_name is None:
+ msg = f"argument {k!r} specified both by position and keyword"
+ else:
+ msg = f"{func_name}() got multiple values for argument {k!r}"
+ raise TypeError(msg)
+ combined[k] = v
+
+ return combined
\ No newline at end of file