# Copyright (c) 2013-2015 Siphon Contributors.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Support reading and parsing the dataset.xml documents from the netCDF Subset Service."""
from __future__ import print_function
import logging
import re
import numpy as np
logging.basicConfig(level=logging.WARNING)
log = logging.getLogger(__name__)
def _without_namespace(tagname):
"""Remove the xml namespace from a tag name."""
if '}' in tagname:
return tagname.rsplit('}', 1)[-1]
return tagname
class _Types(object):
@staticmethod
def handle_typed_values(val, type_name, value_type):
"""Translate typed values into the appropriate python object.
Takes an element name, value, and type and returns a list
with the string value(s) properly converted to a python type.
TypedValues are handled in ucar.ma2.DataType in netcdfJava
in the DataType enum. Possibilities are:
"boolean"
"byte"
"char"
"short"
"int"
"long"
"float"
"double"
"Sequence"
"String"
"Structure"
"enum1"
"enum2"
"enum4"
"opaque"
"object"
All of these are values written as strings in the xml, so simply
applying int, float to the values will work in most cases (i.e.
the TDS encodes them as string values properly).
Examle XML element:
<attribute name="scale_factor" type="double" value="0.0010000000474974513"/>
Parameters
----------
val : string
The string representation of the value attribute of the xml element
type_name : string
The string representation of the name attribute of the xml element
value_type : string
The string representation of the type attribute of the xml element
Returns
-------
val : list
A list containing the properly typed python values.
"""
if value_type in ['byte', 'short', 'int', 'long']:
try:
val = [int(v) for v in re.split('[ ,]', val) if v]
except ValueError:
log.warning('Cannot convert "%s" to int. Keeping type as str.', val)
elif value_type in ['float', 'double']:
try:
val = [float(v) for v in re.split('[ ,]', val) if v]
except ValueError:
log.warning('Cannot convert "%s" to float. Keeping type as str.', val)
elif value_type == 'boolean':
try:
# special case for boolean type
val = val.split()
# values must be either true or false
for potential_bool in val:
if potential_bool not in ['true', 'false']:
raise ValueError
val = [True if item == 'true' else False for item in val]
except ValueError:
msg = 'Cannot convert values %s to boolean.'
msg += ' Keeping type as str.'
log.warning(msg, val)
elif value_type == 'String':
# nothing special for String type
pass
else:
# possibilities - Sequence, Structure, enum, opaque, object,
# and char.
# Not sure how to handle these as I do not have an example
# of how they would show up in dataset.xml
log.warning('%s type %s not understood. Keeping as String.',
type_name, value_type)
if not isinstance(val, list):
val = [val]
return val
def handle_attribute(self, element): # noqa
type_name = 'attribute'
attribute_type = None
if 'type' in element.attrib:
attribute_type = element.attrib['type']
name = element.attrib['name']
val = element.attrib['value']
if attribute_type:
val = self.handle_typed_values(val, type_name, attribute_type)
return {name: val}
def handle_values(self, element, value_type=None): # noqa
type_name = 'value'
val = element.text
if val:
if value_type:
val = self.handle_typed_values(val, type_name, value_type)
else:
val = val.split()
else:
increment_attrs = ['start', 'increment', 'npts']
element_attrs = list(element.attrib)
increment_attrs.sort()
element_attrs.sort()
if increment_attrs == element_attrs:
start = float(element.attrib['start'])
inc = float(element.attrib['increment'])
npts = float(element.attrib['npts'])
val = start + np.arange(npts) * inc
val = val.tolist()
return {'values': val}
@staticmethod
def handle_projectionBox(element): # noqa
type_name = 'projectionBox'
pb = {}
if element.tag == type_name:
for child in element:
pb[child.tag] = float(child.text)
return {type_name: pb}
@staticmethod
def handle_axisRef(element): # noqa
return element.attrib['name']
@staticmethod
def handle_coordTransRef(element): # noqa
# type_name = "coordTransRef"
return {'coordTransRef': element.attrib['name']}
def handle_grid(self, element):
grid = {}
for attr in element.attrib:
grid[attr] = element.attrib[attr]
attrs = {}
for attribute in element:
attrs.update(self.handle_attribute(attribute))
grid['attributes'] = attrs
return grid
@staticmethod
def handle_parameter(element):
name = element.attrib['name']
value = element.attrib['value'].strip()
return {name: value}
@staticmethod
def handle_featureDataset(element): # noqa
fd = {}
for attr in element.attrib:
fd[attr] = element.attrib[attr]
return fd
def handle_variable(self, element):
return self.handle_grid(element)
def lookup(self, handler_name):
handler_name = 'handle_' + _without_namespace(handler_name)
if handler_name in dir(self):
return getattr(self, handler_name)
else:
msg = 'cannot find handler for element {}'.format(handler_name)
log.warning(msg)
[docs]class NCSSDataset(object):
"""Hold information contained in the dataset.xml NCSS document.
In general, if a dataset.xml NCSS document is missing the information
needed to construct an attribute, that attribute will not show up as
part of the `NCSSDataset` object.
Note that only gridded ncss datasets may contain the attributes
`gridsets`, `axes`, `coordinate_transforms`, and `lat_lon_box`.
Attributes
----------
variables : dict[str, str]
A dictionary of variables
time_span : dict[str, datetime.datetime]
A dictionary holding the beginning and ending iso time strings which
define the temporal bounds of the dataset
featureDataset : dict[str, str]
A dictionary containing the type ["grid", "point"] and location ["url"]
of the dataset
accept_list : dict[str, list[str]]
A dictionary holding the types of valid returns of the dataset by
access method [Grid, GridAsPoint, PointFeatureCollection]
gridsets : dict[str, set[str]]
A dictionary of gridSets contained within the dataset
axes : dict[str, object]
A dictionary of coordinate axes
coordinate_transforms : dict[str, object]
A dictionary of coordinate transforms
lat_lon_box : dict[str, float]
A dictionary holding the north, south, east, and west latitude and
longitude bounds of the dataset (in degree_east, degree_north)
"""
[docs] def __init__(self, element):
"""Initialize a NCSSDataset object.
Parameters
----------
element : :class:`~xml.etree.ElementTree.Element`
An :class:`~xml.etree.ElementTree.Element` representing the top level
node of an NCSS dataset.xml doc
"""
self._types = _Types()
self.gridsets = {}
self.variables = {}
self.axes = {}
self.coordinate_transforms = {}
self.accept_list = {}
self.lat_lon_box = None
self.time_span = None
self.featureDataset = None
element_name = element.tag
if element_name == 'gridDataset' or element_name == 'capabilities':
self.featureDataset = {'type': 'grid',
'url': element.attrib['location']}
for child in element:
self._parse_element(child)
else:
self._parse_element(element)
things_to_del = []
for thing in self.__dict__:
if not (thing.startswith('_') or thing.startswith('__')):
if not getattr(self, thing):
things_to_del.append(thing)
for thing in things_to_del:
delattr(self, thing)
def _get_handler(self, handler_name):
return self._types.lookup(handler_name)
def _parse_element(self, element):
element_name = element.tag
parser = {'gridSet': self._parse_gridset, 'axis': self._parse_axis,
'coordTransform': self._parse_coordTransform,
'LatLonBox': self._parse_LatLonBox, 'TimeSpan': self._parse_TimeSpan,
'AcceptList': self._parse_AcceptList,
'featureDataset': self._parse_featureDataset,
'variable': self._parse_variable}
try:
parser[element_name](element)
except KeyError:
log.warning('No parser found for element %s', element_name)
def _parse_gridset(self, element):
element_name = element.tag
gridset_name = element.attrib['name']
grid_set = {}
for child in element:
child_name = child.tag
handler = self._get_handler(child_name)
if child_name in ['projectionBox', 'coordTransRef']:
grid_set.update(handler(child))
elif child_name in ['axisRef']:
grid_set.setdefault(child_name, []).append(handler(child))
elif child_name in ['grid']:
tmp = handler(child)
grid_name = tmp['name']
tmp.pop('name', None)
grid_set.setdefault(child_name, {})[grid_name] = tmp
self.variables[grid_name] = tmp
else:
log.warning('Unknown child in %s: %s', element_name, child_name)
grid_set[child.tag] = 'not handled by _parse_gridset'
self.gridsets.update({gridset_name: grid_set})
def _parse_axis(self, element):
# element_name = element.tag
axis_name = element.attrib['name']
axis = {}
for attr in element.attrib:
if attr != 'name':
axis[attr] = element.attrib[attr]
if 'shape' in axis:
typed_vals = self._types.handle_typed_values(axis['shape'], 'shape', 'int')
axis['shape'] = typed_vals
attrs = []
for child in element:
child_name = child.tag
handler = self._get_handler(child_name)
attrs.append(handler(child))
if attrs:
axis['attributes'] = attrs
self.axes.update({axis_name: axis})
def _parse_coordTransform(self, element): # noqa
coord_trans = {}
name = element.attrib['name']
for attr in element.attrib:
if attr != 'name':
coord_trans[attr] = element.attrib[attr]
params = {}
for child in element:
child_name = child.tag
handler = self._get_handler(child_name)
params.update(handler(child))
if params:
coord_trans['parameters'] = params
self.coordinate_transforms.update({name: coord_trans})
def _parse_LatLonBox(self, element): # noqa
llb = {}
for child in element:
llb[child.tag] = float(child.text)
self.lat_lon_box = llb
def _parse_TimeSpan(self, element): # noqa
ts = {}
for child in element:
ts[child.tag] = child.text
self.time_span = ts
def _parse_AcceptList(self, element): # noqa
grid_req_types = ['Grid', 'GridAsPoint']
# check if station (i.e.
check = True
grid = False
point = False
for child in element:
request_type = child.tag
if check:
if request_type in grid_req_types:
grid = True
else:
point = True
check = False
if point:
# this is a PointFeatureCollection ncss
return_type = child.text
self.accept_list.setdefault('PointFeatureCollection',
[]).append(return_type)
elif grid:
# this is a grid ncss
for grandchild in child:
return_type = grandchild.text
self.accept_list.setdefault(request_type,
[]).append(return_type)
else:
log.warning('Cannot have grid=%s and point=%s', grid, point)
def _parse_featureDataset(self, element): # noqa
handler = self._get_handler(element.tag)
self.featureDataset = handler(element)
def _parse_variable(self, element):
handler = self._get_handler(element.tag)
tmp = handler(element)
name = tmp['name']
tmp = tmp.pop('name', None)
self.variables[name] = tmp