# Copyright (c) 2019 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Pull out station metadata."""
from collections import namedtuple
import numpy as np
import pandas as pd
from ..cbook import get_test_data
from ..package_tools import Exporter
from ..units import units
exporter = Exporter(globals())
Station = namedtuple('Station', ['id', 'synop_id', 'name', 'state', 'country',
'longitude', 'latitude', 'altitude', 'source'])
def to_dec_deg(dms):
"""Convert to decimal degrees."""
if not dms:
return 0.
deg, minutes = dms.split()
side = minutes[-1]
minutes = minutes[:2]
float_deg = int(deg) + int(minutes) / 60.
return float_deg if side in ('N', 'E') else -float_deg
def _read_station_table(input_file=None):
"""Read in the GEMPAK station table.
Yields tuple of station ID and `Station` for each entry.
if input_file is None:
input_file = get_test_data('sfstns.tbl', as_file_obj=False)
with open(input_file) as station_file:
for line in station_file:
stid = line[:9].strip()
synop_id = int(line[9:16].strip())
name = line[16:49].strip()
state = line[49:52].strip()
country = line[52:55].strip()
lat = int(line[55:61].strip()) / 100.
lon = int(line[61:68].strip()) / 100.
alt = int(line[68:74].strip())
yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
longitude=lon, altitude=alt, country=country, state=state,
def _read_master_text_file(input_file=None):
"""Read in the master text file.
Yields tuple of station ID and `Station` for each entry.
if input_file is None:
input_file = get_test_data('master.txt', as_file_obj=False)
with open(input_file) as station_file:
for line in station_file:
state = line[:3].strip()
name = line[3:20].strip().replace('_', ' ')
stid = line[20:25].strip()
synop_id = line[32:38].strip()
lat = to_dec_deg(line[39:46].strip())
lon = to_dec_deg(line[47:55].strip())
alt_part = line[55:60].strip()
alt = int(alt_part if alt_part else 0.)
if stid:
if stid[0] in ('P', 'K'):
country = 'US'
country = state
state = '--'
yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
longitude=lon, altitude=alt, country=country, state=state,
def _read_station_text_file(input_file=None):
"""Read the station text file.
Yields tuple of station ID and `Station` for each entry.
if input_file is None:
input_file = get_test_data('stations.txt', as_file_obj=False)
with open(input_file) as station_file:
for line in station_file:
if line[0] == '!':
lat = line[39:45].strip()
if not lat or lat == 'LAT':
lat = to_dec_deg(lat)
state = line[:3].strip()
name = line[3:20].strip().replace('_', ' ')
stid = line[20:25].strip()
synop_id = line[32:38].strip()
lon = to_dec_deg(line[47:55].strip())
alt = int(line[55:60].strip())
country = line[81:83].strip()
yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
longitude=lon, altitude=alt, country=country, state=state,
def _read_airports_file(input_file=None):
"""Read the airports file."""
if input_file is None:
input_file = get_test_data('airport-codes.csv', as_file_obj=False)
df = pd.read_csv(input_file)
station_map = pd.DataFrame({'id': df.ident.values, 'synop_id': 99999,
'latitude': df.latitude_deg.values,
'longitude': df.longitude_deg.values,
'altitude': units.Quantity(
df.elevation_ft.values, 'ft').to('m').m,
'country': df.iso_region.str.split('-', n=1,
'source': input_file
return station_map
class StationLookup:
"""Look up station information from multiple sources."""
def __init__(self):
"""Initialize different files."""
self._sources = [dict(_read_station_table()), dict(_read_master_text_file()),
dict(_read_station_text_file()), dict(_read_airports_file())]
def __getitem__(self, stid):
"""Lookup station information from the ID."""
for table in self._sources:
if stid in table:
return table[stid]
raise KeyError(f'No station information for {stid}')
with exporter:
station_info = StationLookup()
def add_station_lat_lon(df, stn_var):
"""Lookup station information to add the station latitude and longitude to the DataFrame.
This function will add two columns to the DataFrame ('latitude' and 'longitude') after
looking up all unique station identifiers available in the DataFrame.
df : `pandas.DataFrame`
The DataFrame that contains the station observations
stn_var : str
The string of the variable name that represents the station in the DataFrame. Common
examples are 'station', 'stid', and 'station_id'
`pandas.DataFrame` that contains original Dataframe now with the latitude and longitude
values for each location found in `station_info`.
df['latitude'] = None
df['longitude'] = None
for stn in df[stn_var].unique():
info = station_info[stn]
df.loc[df[stn_var] == stn, 'latitude'] = info.latitude
df.loc[df[stn_var] == stn, 'longitude'] = info.longitude
except KeyError:
df.loc[df[stn_var] == stn, 'latitude'] = np.nan
df.loc[df[stn_var] == stn, 'longitude'] = np.nan
return df