Source code for metpy.xarray

# Copyright (c) 2018,2019 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Provide accessors to enhance interoperability between xarray and MetPy.

MetPy relies upon the `CF Conventions <http://cfconventions.org/>`_. to provide helpful
attributes and methods on xarray DataArrays and Dataset for working with
coordinate-related metadata. Also included are several attributes and methods for unit
operations.

These accessors will be activated with any import of MetPy. Do not use the
``MetPyDataArrayAccessor`` or ``MetPyDatasetAccessor`` classes directly, instead, utilize the
applicable properties and methods via the ``.metpy`` attribute on an xarray DataArray or
Dataset.

See Also: :doc:`xarray with MetPy Tutorial </tutorials/xarray_tutorial>`.
"""
from __future__ import absolute_import

import functools
import logging
import re
import warnings

import xarray as xr

from ._vendor.xarray import either_dict_or_kwargs, expanded_indexer, is_dict_like
from .units import DimensionalityError, UndefinedUnitError, units

__all__ = []
readable_to_cf_axes = {'time': 'T', 'vertical': 'Z', 'y': 'Y', 'x': 'X'}
cf_to_readable_axes = {readable_to_cf_axes[key]: key for key in readable_to_cf_axes}

# Define the criteria for coordinate matches
coordinate_criteria = {
    'standard_name': {
        'time': 'time',
        'vertical': {'air_pressure', 'height', 'geopotential_height', 'altitude',
                     'model_level_number', 'atmosphere_ln_pressure_coordinate',
                     'atmosphere_sigma_coordinate',
                     'atmosphere_hybrid_sigma_pressure_coordinate',
                     'atmosphere_hybrid_height_coordinate', 'atmosphere_sleve_coordinate',
                     'height_above_geopotential_datum', 'height_above_reference_ellipsoid',
                     'height_above_mean_sea_level'},
        'y': 'projection_y_coordinate',
        'lat': 'latitude',
        'x': 'projection_x_coordinate',
        'lon': 'longitude'
    },
    '_CoordinateAxisType': {
        'time': 'Time',
        'vertical': {'GeoZ', 'Height', 'Pressure'},
        'y': 'GeoY',
        'lat': 'Lat',
        'x': 'GeoX',
        'lon': 'Lon'
    },
    'axis': readable_to_cf_axes,
    'positive': {
        'vertical': {'up', 'down'}
    },
    'units': {
        'vertical': {
            'match': 'dimensionality',
            'units': 'Pa'
        },
        'lat': {
            'match': 'name',
            'units': {'degree_north', 'degree_N', 'degreeN', 'degrees_north', 'degrees_N',
                      'degreesN'}
        },
        'lon': {
            'match': 'name',
            'units': {'degree_east', 'degree_E', 'degreeE', 'degrees_east', 'degrees_E',
                      'degreesE'}
        },
    },
    'regular_expression': {
        'time': r'time[0-9]*',
        'vertical': (r'(bottom_top|sigma|h(ei)?ght|altitude|depth|isobaric|pres|'
                     r'isotherm)[a-z_]*[0-9]*'),
        'y': r'y',
        'lat': r'x?lat[a-z0-9]*',
        'x': r'x',
        'lon': r'x?lon[a-z0-9]*'
    }
}

log = logging.getLogger(__name__)


[docs]@xr.register_dataarray_accessor('metpy') class MetPyDataArrayAccessor(object): r"""Provide custom attributes and methods on xarray DataArrays for MetPy functionality. This accessor provides several convenient attributes and methods through the `.metpy` attribute on a DataArray. For example, MetPy can identify the coordinate corresponding to a particular axis (given sufficent metadata): >>> import xarray as xr >>> temperature = xr.DataArray([[0, 1], [2, 3]], dims=('lat', 'lon'), ... coords={'lat': [40, 41], 'lon': [-105, -104]}, ... attrs={'units': 'degC'}) >>> temperature.metpy.x <xarray.DataArray 'lon' (lon: 2)> array([-105, -104]) Coordinates: * lon (lon) int64 -105 -104 Attributes: _metpy_axis: X """ def __init__(self, data_array): # noqa: D107 # Initialize accessor with a DataArray. (Do not use directly). self._data_array = data_array self._units = self._data_array.attrs.get('units', 'dimensionless') @property def units(self): """Return the units of this DataArray as a `pint.Quantity`.""" if self._units != '%': return units(self._units) else: return units('percent') @property def unit_array(self): """Return the data values of this DataArray as a `pint.Quantity`.""" return self._data_array.values * self.units @unit_array.setter def unit_array(self, values): """Set data values from a `pint.Quantity`.""" self._data_array.values = values.magnitude self._units = self._data_array.attrs['units'] = str(values.units)
[docs] def convert_units(self, units): """Convert the data values to different units in-place.""" self.unit_array = self.unit_array.to(units)
@property def crs(self): """Return the coordinate reference system (CRS) as a CFProjection object.""" if 'crs' in self._data_array.coords: return self._data_array.coords['crs'].item() raise AttributeError('crs attribute is not available.') @property def cartopy_crs(self): """Return the coordinate reference system (CRS) as a cartopy object.""" return self.crs.to_cartopy() @property def cartopy_globe(self): """Return the globe belonging to the coordinate reference system (CRS).""" return self.crs.cartopy_globe def _fixup_coordinate_map(self, coord_map): """Ensure sure we have coordinate variables in map, not coordinate names.""" for axis in coord_map: if coord_map[axis] is not None and not isinstance(coord_map[axis], xr.DataArray): coord_map[axis] = self._data_array[coord_map[axis]]
[docs] def assign_coordinates(self, coordinates): """Assign the given coordinates to the given CF axis types. Parameters ---------- coordinates : dict or None Mapping from axis types ('T', 'Z', 'Y', 'X') to coordinates of this DataArray. Coordinates can either be specified directly or by their name. If ``None``, clears the `_metpy_axis` attribute on all coordinates, which will trigger reparsing of all coordinates on next access. """ if coordinates: # Assign the _metpy_axis attributes according to supplied mapping self._fixup_coordinate_map(coordinates) for axis in coordinates: if coordinates[axis] is not None: coordinates[axis].attrs['_metpy_axis'] = axis else: # Clear _metpy_axis attribute on all coordinates for coord_var in self._data_array.coords.values(): coord_var.attrs.pop('_metpy_axis', None)
def _generate_coordinate_map(self): """Generate a coordinate map via CF conventions and other methods.""" coords = self._data_array.coords.values() # Parse all the coordinates, attempting to identify x, y, vertical, time coord_lists = {'T': [], 'Z': [], 'Y': [], 'X': []} for coord_var in coords: # Identify the coordinate type using check_axis helper axes_to_check = { 'T': ('time',), 'Z': ('vertical',), 'Y': ('y', 'lat'), 'X': ('x', 'lon') } for axis_cf, axes_readable in axes_to_check.items(): if check_axis(coord_var, *axes_readable): coord_lists[axis_cf].append(coord_var) # Resolve any coordinate conflicts axis_conflicts = [axis for axis in coord_lists if len(coord_lists[axis]) > 1] for axis in axis_conflicts: self._resolve_axis_conflict(axis, coord_lists) # Collapse the coord_lists to a coord_map return {axis: (coord_lists[axis][0] if len(coord_lists[axis]) > 0 else None) for axis in coord_lists} def _resolve_axis_conflict(self, axis, coord_lists): """Handle axis conflicts if they arise.""" if axis in ('Y', 'X'): # Horizontal coordinate, can be projection x/y or lon/lat. So, check for # existence of unique projection x/y (preferred over lon/lat) and use that if # it exists uniquely projection_coords = [coord_var for coord_var in coord_lists[axis] if check_axis(coord_var, 'x', 'y')] if len(projection_coords) == 1: coord_lists[axis] = projection_coords return # If one and only one of the possible axes is a dimension, use it dimension_coords = [coord_var for coord_var in coord_lists[axis] if coord_var.name in coord_var.dims] if len(dimension_coords) == 1: coord_lists[axis] = dimension_coords return # Ambiguous axis, raise warning and do not parse warnings.warn('More than one ' + cf_to_readable_axes[axis] + ' coordinate present ' + 'for variable "' + self._data_array.name + '".') coord_lists[axis] = [] def _metpy_axis_search(self, cf_axis): """Search for cached _metpy_axis attribute on the coordinates, otherwise parse.""" # Search for coord with proper _metpy_axis coords = self._data_array.coords.values() for coord_var in coords: if coord_var.attrs.get('_metpy_axis') == cf_axis: return coord_var # Opportunistically parse all coordinates, and assign if not already assigned coord_map = self._generate_coordinate_map() for axis, coord_var in coord_map.items(): if coord_var is not None and not any(coord.attrs.get('_metpy_axis') == axis for coord in coords): coord_var.attrs['_metpy_axis'] = axis # Return parsed result (can be None if none found) return coord_map[cf_axis] def _axis(self, axis): """Return the coordinate variable corresponding to the given individual axis type.""" if axis in readable_to_cf_axes: coord_var = self._metpy_axis_search(readable_to_cf_axes[axis]) if coord_var is not None: return coord_var else: raise AttributeError(axis + ' attribute is not available.') else: raise AttributeError("'" + axis + "' is not an interpretable axis.")
[docs] def coordinates(self, *args): """Return the coordinate variables corresponding to the given axes types. Parameters ---------- args : str Strings describing the axes type(s) to obtain. Currently understood types are 'time', 'vertical', 'y', and 'x'. Notes ----- This method is designed for use with mutliple coordinates; it returns a generator. To access a single coordinate, use the appropriate attribute on the accessor, or use tuple unpacking. """ for arg in args: yield self._axis(arg)
@property def time(self): """Return the time coordinate.""" return self._axis('time') @property def vertical(self): """Return the vertical coordinate.""" return self._axis('vertical') @property def y(self): """Return the y or latitude coordinate.""" return self._axis('y') @property def x(self): """Return the x or longitude coordinate.""" return self._axis('x')
[docs] def coordinates_identical(self, other): """Return whether or not the coordinates of other match this DataArray's.""" # If the number of coordinates do not match, we know they can't match. if len(self._data_array.coords) != len(other.coords): return False # If same length, iterate over all of them and check for coord_name, coord_var in self._data_array.coords.items(): if coord_name not in other.coords or not other[coord_name].identical(coord_var): return False # Otherwise, they match. return True
[docs] def as_timestamp(self): """Return the data as unix timestamp (for easier time derivatives).""" attrs = {key: self._data_array.attrs[key] for key in {'standard_name', 'long_name', 'axis', '_metpy_axis'} & set(self._data_array.attrs)} attrs['units'] = 'seconds' return xr.DataArray(self._data_array.values.astype('datetime64[s]').astype('int'), name=self._data_array.name, coords=self._data_array.coords, dims=self._data_array.dims, attrs=attrs)
[docs] def find_axis_name(self, axis): """Return the name of the axis corresponding to the given identifier. Parameters ---------- axis : str or int Identifier for an axis. Can be the an axis number (integer), dimension coordinate name (string) or a standard axis type (string). """ if isinstance(axis, int): # If an integer, use the corresponding dimension return self._data_array.dims[axis] elif axis not in self._data_array.dims and axis in readable_to_cf_axes: # If not a dimension name itself, but a valid axis type, get the name of the # coordinate corresponding to that axis type return self._axis(axis).name elif axis in self._data_array.dims and axis in self._data_array.coords: # If this is a dimension coordinate name, use it directly return axis else: # Otherwise, not valid raise ValueError('Given axis is not valid. Must be an axis number, a dimension ' 'coordinate name, or a standard axis type.')
class _LocIndexer(object): """Provide the unit-wrapped .loc indexer for data arrays.""" def __init__(self, data_array): self.data_array = data_array def expand(self, key): """Parse key using xarray utils to ensure we have dimension names.""" if not is_dict_like(key): labels = expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) return key def __getitem__(self, key): key = _reassign_quantity_indexer(self.data_array, self.expand(key)) return self.data_array.loc[key] def __setitem__(self, key, value): key = _reassign_quantity_indexer(self.data_array, self.expand(key)) self.data_array.loc[key] = value @property def loc(self): """Wrap DataArray.loc with an indexer to handle units and coordinate types.""" return self._LocIndexer(self._data_array)
[docs] def sel(self, indexers=None, method=None, tolerance=None, drop=False, **indexers_kwargs): """Wrap DataArray.sel to handle units and coordinate types.""" indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel') indexers = _reassign_quantity_indexer(self._data_array, indexers) return self._data_array.sel(indexers, method=method, tolerance=tolerance, drop=drop)
[docs]@xr.register_dataset_accessor('metpy') class MetPyDatasetAccessor(object): """Provide custom attributes and methods on XArray Datasets for MetPy functionality. This accessor provides parsing of CF metadata and unit-/coordinate-type-aware selection. >>> import xarray as xr >>> from metpy.testing import get_test_data >>> ds = xr.open_dataset(get_test_data('narr_example.nc', False)).metpy.parse_cf() >>> print(ds['crs'].item()) Projection: lambert_conformal_conic """ def __init__(self, dataset): # noqa: D107 # Initialize accessor with a Dataset. (Do not use directly). self._dataset = dataset
[docs] def parse_cf(self, varname=None, coordinates=None): """Parse Climate and Forecasting (CF) convention metadata. Parameters ---------- varname : str or iterable of str, optional Name of the variable(s) to extract from the dataset while parsing for CF metadata. Defaults to all variables. coordinates : dict, optional Dictionary mapping CF axis types to coordinates of the variable(s). Only specify if you wish to override MetPy's automatic parsing of some axis type(s). Returns ------- `xarray.DataArray` or `xarray.Dataset` Parsed DataArray (if varname is a string) or Dataset """ from .cbook import is_string_like, iterable from .plots.mapping import CFProjection if varname is None: # If no varname is given, parse all variables in the dataset varname = list(self._dataset.data_vars) if iterable(varname) and not is_string_like(varname): # If non-string iterable is given, apply recursively across the varnames subset = xr.merge([self.parse_cf(single_varname, coordinates=coordinates) for single_varname in varname]) subset.attrs = self._dataset.attrs return subset var = self._dataset[varname] if 'grid_mapping' in var.attrs: proj_name = var.attrs['grid_mapping'] try: proj_var = self._dataset.variables[proj_name] except KeyError: log.warning( 'Could not find variable corresponding to the value of ' 'grid_mapping: {}'.format(proj_name)) else: var.coords['crs'] = CFProjection(proj_var.attrs) self._fixup_coords(var) # Trying to guess whether we should be adding a crs to this variable's coordinates # First make sure it's missing CRS but isn't lat/lon itself if not check_axis(var, 'lat', 'lon') and 'crs' not in var.coords: # Look for both lat/lon in the coordinates has_lat = has_lon = False for coord_var in var.coords.values(): has_lat = has_lat or check_axis(coord_var, 'lat') has_lon = has_lon or check_axis(coord_var, 'lon') # If we found them, create a lat/lon projection as the crs coord if has_lat and has_lon: var.coords['crs'] = CFProjection({'grid_mapping_name': 'latitude_longitude'}) log.warning('Found lat/lon values, assuming latitude_longitude ' 'for projection grid_mapping variable') # Assign coordinates if the coordinates argument is given if coordinates is not None: var.metpy.assign_coordinates(coordinates) return var
def _fixup_coords(self, var): """Clean up the units on the coordinate variables.""" for coord_name, data_array in var.coords.items(): if (check_axis(data_array, 'x', 'y') and not check_axis(data_array, 'lon', 'lat')): try: var.coords[coord_name].metpy.convert_units('meters') except DimensionalityError: # Radians! if 'crs' in var.coords: new_data_array = data_array.copy() height = var.coords['crs'].item()['perspective_point_height'] scaled_vals = new_data_array.metpy.unit_array * (height * units.meters) new_data_array.metpy.unit_array = scaled_vals.to('meters') var.coords[coord_name] = new_data_array class _LocIndexer(object): """Provide the unit-wrapped .loc indexer for datasets.""" def __init__(self, dataset): self.dataset = dataset def __getitem__(self, key): parsed_key = _reassign_quantity_indexer(self.dataset, key) return self.dataset.loc[parsed_key] @property def loc(self): """Wrap Dataset.loc with an indexer to handle units and coordinate types.""" return self._LocIndexer(self._dataset)
[docs] def sel(self, indexers=None, method=None, tolerance=None, drop=False, **indexers_kwargs): """Wrap Dataset.sel to handle units.""" indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel') indexers = _reassign_quantity_indexer(self._dataset, indexers) return self._dataset.sel(indexers, method=method, tolerance=tolerance, drop=drop)
def check_axis(var, *axes): """Check if the criteria for any of the given axes are satisfied. Parameters ---------- var : `xarray.DataArray` DataArray belonging to the coordinate to be checked axes : str Axis type(s) to check for. Currently can check for 'time', 'vertical', 'y', 'lat', 'x', and 'lon'. """ for axis in axes: # Check for # - standard name (CF option) # - _CoordinateAxisType (from THREDDS) # - axis (CF option) # - positive (CF standard for non-pressure vertical coordinate) for criterion in ('standard_name', '_CoordinateAxisType', 'axis', 'positive'): if (var.attrs.get(criterion, 'absent') in coordinate_criteria[criterion].get(axis, set())): return True # Check for units, either by dimensionality or name try: if (axis in coordinate_criteria['units'] and ( ( coordinate_criteria['units'][axis]['match'] == 'dimensionality' and (units.get_dimensionality(var.attrs.get('units')) == units.get_dimensionality( coordinate_criteria['units'][axis]['units'])) ) or ( coordinate_criteria['units'][axis]['match'] == 'name' and var.attrs.get('units') in coordinate_criteria['units'][axis]['units'] ))): return True except UndefinedUnitError: pass # Check if name matches regular expression (non-CF failsafe) if re.match(coordinate_criteria['regular_expression'][axis], var.name.lower()): return True # If no match has been made, return False (rather than None) return False def preprocess_xarray(func): """Decorate a function to convert all DataArray arguments to pint.Quantities. This uses the metpy xarray accessors to do the actual conversion. """ @functools.wraps(func) def wrapper(*args, **kwargs): args = tuple(a.metpy.unit_array if isinstance(a, xr.DataArray) else a for a in args) kwargs = {name: (v.metpy.unit_array if isinstance(v, xr.DataArray) else v) for name, v in kwargs.items()} return func(*args, **kwargs) return wrapper def check_matching_coordinates(func): """Decorate a function to make sure all given DataArrays have matching coordinates.""" @functools.wraps(func) def wrapper(*args, **kwargs): data_arrays = ([a for a in args if isinstance(a, xr.DataArray)] + [a for a in kwargs.values() if isinstance(a, xr.DataArray)]) if len(data_arrays) > 1: first = data_arrays[0] for other in data_arrays[1:]: if not first.metpy.coordinates_identical(other): raise ValueError('Input DataArray arguments must be on same coordinates.') return func(*args, **kwargs) return wrapper # If DatetimeAccessor does not have a strftime (xarray <0.12.2), monkey patch one in try: from xarray.core.accessors import DatetimeAccessor if not hasattr(DatetimeAccessor, 'strftime'): def strftime(self, date_format): """Format time as a string.""" import pandas as pd values = self._obj.data values_as_series = pd.Series(values.ravel()) strs = values_as_series.dt.strftime(date_format) return strs.values.reshape(values.shape) DatetimeAccessor.strftime = strftime except ImportError: pass def _reassign_quantity_indexer(data, indexers): """Reassign a units.Quantity indexer to units of relevant coordinate.""" def _to_magnitude(val, unit): try: return val.to(unit).m except AttributeError: return val for coord_name in indexers: # Handle axis types for DataArrays if (isinstance(data, xr.DataArray) and coord_name not in data.dims and coord_name in readable_to_cf_axes): axis = coord_name coord_name = next(data.metpy.coordinates(axis)).name indexers[coord_name] = indexers[axis] del indexers[axis] # Handle slices of quantities if isinstance(indexers[coord_name], slice): start = _to_magnitude(indexers[coord_name].start, data[coord_name].metpy.units) stop = _to_magnitude(indexers[coord_name].stop, data[coord_name].metpy.units) step = _to_magnitude(indexers[coord_name].step, data[coord_name].metpy.units) indexers[coord_name] = slice(start, stop, step) # Handle quantities indexers[coord_name] = _to_magnitude(indexers[coord_name], data[coord_name].metpy.units) return indexers __all__ = ('MetPyDataArrayAccessor', 'MetPyDatasetAccessor')