Source code for siphon.http_util

# Copyright (c) 2013-2015 Siphon Contributors.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Utility code to support making requests using HTTP."""

from collections import OrderedDict
from datetime import datetime, timedelta, tzinfo
from io import BytesIO
from itertools import chain
import posixpath
try:
    from urllib.parse import urlencode, urljoin  # noqa
except ImportError:
    from urllib import urlencode
    from urlparse import urljoin  # noqa

import requests

from . import __version__

HTTPError = requests.HTTPError


[docs]class BadQueryError(Exception): """Exception raised when a query fails."""
# A UTC class. Taken from standard library docs.
[docs]class UTC(tzinfo): """Represent UTC timezone.""" ZERO = timedelta(0)
[docs] def utcoffset(self, dt): # pylint:disable=unused-argument """Get the offset from UTC.""" return self.ZERO
[docs] def tzname(self, dt): # pylint:disable=unused-argument """Get the name of the timezone.""" return r'UTC'
[docs] def dst(self, dt): # pylint:disable=unused-argument """Get whether the timezone uses Daylight Savings Time.""" return self.ZERO
utc = UTC()
[docs]class HTTPSessionManager(object): """Manage the creation of sessions for HTTP access.""" def __init__(self): """Initialize ``HTTPSessionManager``.""" self.user_agent = 'Siphon ({})'.format(__version__) self.options = {}
[docs] def set_session_options(self, **kwargs): """Set options for created session instances. Takes keyword arguments and sets them as attributes on the returned :class:`requests.Session` instance. See Also -------- create_session """ self.options = kwargs
[docs] def create_session(self): """Create a new HTTP session with our user-agent set. Returns ------- session : requests.Session The created session See Also -------- urlopen, set_session_options """ ret = requests.Session() ret.headers['User-Agent'] = self.user_agent for k, v in self.options.items(): setattr(ret, k, v) return ret
[docs] def urlopen(self, url, **kwargs): """GET a file-like object for a URL using HTTP. This is a thin wrapper around :meth:`requests.Session.get` that returns a file-like object wrapped around the resulting content. Parameters ---------- url : str The URL to request kwargs : arbitrary keyword arguments Additional keyword arguments to pass to :meth:`requests.Session.get`. Returns ------- fobj : file-like object A file-like interface to the content in the response See Also -------- :meth:`requests.Session.get` """ return BytesIO(self.create_session().get(url, **kwargs).content)
session_manager = HTTPSessionManager()
[docs]def parse_iso_date(s): """Parse a string containing an ISO-8601 formatted date. Parameters ---------- s : str The string to be parsed Returns ------- dt : datetime.datetime The results of parsing the string """ return datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=utc)
[docs]class DataQuery(object): """Represent a query for data from a THREDDS server. This object provides a clear API to formulate a query for data, including a spatial query, a time query, and possibly some variables or other parameters. These objects provide a dictionary-like interface, (:meth:`items` and :meth:`__iter__`) sufficient to be passed to functions expecting a dictionary representing a URL query. Instances of this object can also be turned into a string, which will yield a properly escaped string for a URL. """ def __init__(self): """Construct an empty :class:`DataQuery`.""" self.var = set() self.time_query = OrderedDict() self.spatial_query = OrderedDict() self.extra_params = OrderedDict()
[docs] def variables(self, *var_names): """Specify one or more variables for the query. This function ensures that variable names are not repeated. This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. Parameters ---------- var_names : one or more strings One or more names of variables to request. Use 'all' to request all. Returns ------- self : DataQuery Returns self for chaining calls """ self.var.update(set(var_names)) return self
[docs] def add_query_parameter(self, **kwargs): """Add arbitrary query element (name=value) to the request. This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. Parameters ---------- kwargs : one or more strings passed as keyword arguments Names and values of parameters to add to the query Returns ------- self : DataQuery Returns self for chaining calls """ self.extra_params.update(kwargs) return self
[docs] def lonlat_box(self, west, east, south, north): """Add a latitude/longitude bounding box to the query. This adds a request for a spatial bounding box, bounded by ('north', 'south') for latitude and ('east', 'west') for the longitude. This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. This replaces any existing spatial queries that have been set. Parameters ---------- west: float The bounding longitude to the west, in degrees east of the prime meridian east : float The bounding longitude to the east, in degrees east of the prime meridian south : float The bounding latitude to the south, in degrees north of the equator north : float The bounding latitude to the north, in degrees north of the equator Returns ------- self : DataQuery Returns self for chaining calls """ self._set_query(self.spatial_query, west=west, east=east, south=south, north=north) return self
[docs] def lonlat_point(self, lon, lat): """Add a latitude/longitude point to the query. This adds a request for a (`lon`, `lat`) point. This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. This replaces any existing spatial queries that have been set. Parameters ---------- lon: float The longitude to request lat : float The latitude to request Returns ------- self : DataQuery Returns self for chaining calls """ self._set_query(self.spatial_query, longitude=lon, latitude=lat) return self
# Helper for resetting a dict @staticmethod def _set_query(query, **kwargs): query.clear() query.update(kwargs)
[docs] def all_times(self): """Add a request for all times to the query. This adds a request for all times (`temporal=all`). This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. This replaces any existing temporal queries that have been set. Returns ------- self : DataQuery Returns self for chaining calls """ self._set_query(self.time_query, temporal='all') return self
[docs] def time(self, time): """Add a request for a specific time to the query. This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. This replaces any existing temporal queries that have been set. Parameters ---------- time : datetime.datetime The time to request Returns ------- self : DataQuery Returns self for chaining calls """ self._set_query(self.time_query, time=self._format_time(time)) return self
[docs] def time_range(self, start, end): """Add a request for a time range to the query. This modifies the query in-place, but returns `self` so that multiple queries can be chained together on one line. This replaces any existing temporal queries that have been set. Parameters ---------- start : datetime.datetime The start of the requested time range end : datetime.datetime The end of the requested time range Returns ------- self : DataQuery Returns self for chaining calls """ self._set_query(self.time_query, time_start=self._format_time(start), time_end=self._format_time(end)) return self
# Helper for formatting times appropriately @staticmethod def _format_time(dt): return dt.isoformat() def __iter__(self): """Return an iterator of the various items (name=value pairs) that compose the query. Returns ------- items : iterator Sequence of tuples of name, value representing the query. """ return chain([('var', self.var)], self.time_query.items(), self.spatial_query.items(), self.extra_params.items())
[docs] def items(self): """Return the various name=value pairs that compose the query. Returns ------- items : iterator Sequence of tuples of name, value representing the query. """ return iter(self)
def __str__(self): """Format query as a urlencoded string.""" return urlencode(self, doseq=True) def __repr__(self): """Format query as a urlencoded string.""" return str(self)
[docs]class HTTPEndPoint(object): """An object representing an endpoint on a server that is accessed using HTTP. This provides a simple way to point to a URL, formulate appropriate queries and validate them, parse metadata as appropriate, and parse returns from requests. """ def __init__(self, url): """Create an HTTPEndPoint instance. Parameters ---------- url : str The base URL for the endpoint """ self._base = url self._session = session_manager.create_session() self._get_metadata()
[docs] def get_query(self, query): """Make a GET request, including a query, to the endpoint. The path of the request is to the base URL assigned to the endpoint. Parameters ---------- query : DataQuery The query to pass when making the request Returns ------- resp : requests.Response The server's response to the request See Also -------- get_path, get """ url = self._base[:-1] if self._base[-1] == '/' else self._base return self.get(url, query)
[docs] def url_path(self, path): """Assemble the full url to a path. Given a path relative to the base URL, assemble the full URL. Parameters ---------- path : str The path, relative to the endpoint Returns ------- url : str The full URL to `path` See Also -------- get_path """ return posixpath.join(self._base, path)
[docs] def get_path(self, path, query=None): """Make a GET request, optionally including a query, to a relative path. The path of the request includes a path on top of the base URL assigned to the endpoint. Parameters ---------- path : str The path to request, relative to the endpoint query : DataQuery, optional The query to pass when making the request Returns ------- resp : requests.Response The server's response to the request See Also -------- get_query, get, url_path """ return self.get(self.url_path(path), query)
[docs] def get(self, path, params=None): """Make a GET request, optionally including a parameters, to a path. The path of the request is the full URL. Parameters ---------- path : str The URL to request params : DataQuery, optional The query to pass when making the request Returns ------- resp : requests.Response The server's response to the request Raises ------ HTTPError If the server returns anything other than a 200 (OK) code See Also -------- get_query, get """ resp = self._session.get(path, params=params) if resp.status_code != 200: if resp.headers.get('Content-Type', '').startswith('text/html'): text = resp.reason else: text = resp.text raise requests.HTTPError('Error accessing {0}\n' 'Server Error ({1:d}: {2})'.format(resp.request.url, resp.status_code, text)) return resp
def _get_metadata(self): """Get the metadata associated with the endpoint. It is intended that this be implemented by subclasses as necessary. """
[docs] def validate_query(self, query): """Validate a query. Determines whether a query is well-formed. This includes checking for all required parameters, as well as checking parameters for valid values. The default implementation does nothing. It is presumed that subclasses implement this to do more detailed checking as appropriate. Parameters ---------- query : DataQuery (or subclass) Returns ------- valid : bool Whether `query` is valid. """ return len(str(query)) > 0 # Ensure not empty
[docs] def query(self): """Create a new query object. Returns a new :class:`DataQuery` instance appropriate for this endpoint. The default implementation returns a :class:`DataQuery` instance. Subclasses can override to return a subclass specific to this endpoint. Returns ------- valid : bool Whether `query` is valid. """ return DataQuery()