Source code for prereise.gather.winddata.rap.noaa_api

import datetime

import requests

from prereise.gather.request_util import TransientError, retry


[docs]class NoaaApi:
    """API client for downloading rap-130 data from NOAA.

    :param dict box: geographic area
    :raises TypeError: if box None or not a dict
    :raises ValueError: if box is missing keys or contains unknown keys
    """

    base_url = "https://www.ncei.noaa.gov/thredds/ncss/model-rap130/"
    fallback_url = "https://www.ncei.noaa.gov/thredds/ncss/model-rap130-old/"
    var_u = "u-component_of_wind_height_above_ground"
    var_v = "v-component_of_wind_height_above_ground"

    def __init__(self, box):
        self.box = box
        self._check_box()
        self._set_params()

    def _check_box(self):
        if self.box is None or not isinstance(self.box, dict):
            raise TypeError("box must be a non-empty dict")
        valid_keys = {"north", "south", "east", "west"}
        if set(self.box.keys()) != valid_keys:
            raise ValueError(f"Keys must be one of: {','.join(valid_keys)}")

    def _set_params(self):
        """Set default query parameters that will be sent with each request"""
        self.params = [
            ("var", NoaaApi.var_u),
            ("var", NoaaApi.var_v),
            ("disableProjSubset", "on"),
            ("horizStride", "1"),
            ("addLatLon", "true"),
            ("accept", "netCDF"),
        ] + [(k, v) for k, v in self.box.items()]

[docs]    def get_path_list(self, start, end):
        """Enable calculating the final result size prior to download. Used for
        initializing data frames to the correct size.

        :param datetime start: the start date
        :param datetime end: the end date
        :return: (*list*) -- a list of url paths that span the date range
        """
        result = []
        for time_slice in self.iter_hours(start, end):
            result.append(time_slice)
        return result

[docs]    def iter_hours(self, start, end):
        """Iterate over the hours in the given range, yielding a path segment
        matching the structure of NOAA's server

        :param datetime start: the start date
        :param datetime end: the end date
        :return: (*Generator[str]*) -- path part of the url pertaining to time range
        """
        step = datetime.timedelta(days=1)
        while start <= end:
            ts = start.strftime("%Y%m%d")
            path = ts[:6] + "/" + ts + "/rap_130_" + ts
            for h in range(10000, 12400, 100):
                yield "_".join([path, str(h)[1:], "000.grb2"])
            start += step

[docs]    def build_url(self, time_slice, fallback=False):
        """Build the url for the given time slice

        :param str time_slice: url path segment specifying the time range
        :param bool fallback: whether to use the fallback url for older data
        :return: (*str*) -- the url to download
        """
        url = NoaaApi.fallback_url if fallback else NoaaApi.base_url
        return url + time_slice

[docs]    def get_hourly_data(self, start, end):
        """Iterate responses over the given time range

        :param datetime start: the start date
        :param datetime end: the end date
        :return: (*Generator[requests.Response]*) -- yield the next http response
        """

        retry_limit = 3

        @retry(max_attempts=retry_limit, allowed_exceptions=(TransientError))
        def download(time_slice, fallback=False):
            url = self.build_url(time_slice, fallback)
            resp = requests.get(url, params=self.params)
            if resp.status_code == 500 and download.retry_count < retry_limit:
                msg = f"Server error for url={resp.url}, retry_count={download.retry_count}"
                raise TransientError(msg)
            return resp

        for time_slice in self.iter_hours(start, end):
            response = download(time_slice)
            if response.status_code == 404:
                print("Got 404 response, trying fallback url.")
                response = download(time_slice, fallback=True)
                if response.status_code == 404:
                    print(
                        "Content not found for the given range - it may be"
                        + " available via tape archive, please contact NOAA for"
                        + " support"
                    )
            yield response
Source code for prereise.gather.winddata.rap.noaa_api

Navigation

Related Topics

Useful Links

Code