Source code for prereise.gather.winddata.rap.noaa_api

import datetime

import requests

from prereise.gather.request_util import TransientError, retry


[docs]class NoaaApi: """API client for downloading rap-130 data from NOAA. :param dict box: geographic area :raises TypeError: if box None or not a dict :raises ValueError: if box is missing keys or contains unknown keys """ base_url = "https://www.ncei.noaa.gov/thredds/ncss/model-rap130/" fallback_url = "https://www.ncei.noaa.gov/thredds/ncss/model-rap130-old/" var_u = "u-component_of_wind_height_above_ground" var_v = "v-component_of_wind_height_above_ground" def __init__(self, box): self.box = box self._check_box() self._set_params() def _check_box(self): if self.box is None or not isinstance(self.box, dict): raise TypeError("box must be a non-empty dict") valid_keys = {"north", "south", "east", "west"} if set(self.box.keys()) != valid_keys: raise ValueError(f"Keys must be one of: {','.join(valid_keys)}") def _set_params(self): """Set default query parameters that will be sent with each request""" self.params = [ ("var", NoaaApi.var_u), ("var", NoaaApi.var_v), ("disableProjSubset", "on"), ("horizStride", "1"), ("addLatLon", "true"), ("accept", "netCDF"), ] + [(k, v) for k, v in self.box.items()]
[docs] def get_path_list(self, start, end): """Enable calculating the final result size prior to download. Used for initializing data frames to the correct size. :param datetime start: the start date :param datetime end: the end date :return: (*list*) -- a list of url paths that span the date range """ result = [] for time_slice in self.iter_hours(start, end): result.append(time_slice) return result
[docs] def iter_hours(self, start, end): """Iterate over the hours in the given range, yielding a path segment matching the structure of NOAA's server :param datetime start: the start date :param datetime end: the end date :return: (*Generator[str]*) -- path part of the url pertaining to time range """ step = datetime.timedelta(days=1) while start <= end: ts = start.strftime("%Y%m%d") path = ts[:6] + "/" + ts + "/rap_130_" + ts for h in range(10000, 12400, 100): yield "_".join([path, str(h)[1:], "000.grb2"]) start += step
[docs] def build_url(self, time_slice, fallback=False): """Build the url for the given time slice :param str time_slice: url path segment specifying the time range :param bool fallback: whether to use the fallback url for older data :return: (*str*) -- the url to download """ url = NoaaApi.fallback_url if fallback else NoaaApi.base_url return url + time_slice
[docs] def get_hourly_data(self, start, end): """Iterate responses over the given time range :param datetime start: the start date :param datetime end: the end date :return: (*Generator[requests.Response]*) -- yield the next http response """ retry_limit = 3 @retry(max_attempts=retry_limit, allowed_exceptions=(TransientError)) def download(time_slice, fallback=False): url = self.build_url(time_slice, fallback) resp = requests.get(url, params=self.params) if resp.status_code == 500 and download.retry_count < retry_limit: msg = f"Server error for url={resp.url}, retry_count={download.retry_count}" raise TransientError(msg) return resp for time_slice in self.iter_hours(start, end): response = download(time_slice) if response.status_code == 404: print("Got 404 response, trying fallback url.") response = download(time_slice, fallback=True) if response.status_code == 404: print( "Content not found for the given range - it may be" + " available via tape archive, please contact NOAA for" + " support" ) yield response