Source code for prereise.gather.hydrodata.eia.interpolate_capacity_factors

import datetime
import os

import numpy as np
import pandas as pd


[docs]def get_profile( plant_id, start=pd.Timestamp(2016, 1, 1), end=pd.Timestamp(2016, 12, 31, 23) ): """Creates hydro profile from monthly capacity factors reported by EIA `here <https://www.eia.gov/electricity/annual/html/epa_04_08_b.html>`_. :param list plant_id: id of the hydro plants. :param pandas.Timestamp/numpy.datetime64/datetime.datetime start: start date. :param pandas.Timestamp/numpy.datetime64/datetime.datetime end: end date. :return: (*pandas.DataFrame*) -- data frame with UTC timestamp as indices and plant id as column names. Values are the capacity factor. Note that a unique capacity factor is given for each month and for the entire US. Therefore, each plant will have the same profile. :raises TypeError: if plant_id is not a list and/or dates are str. :raises ValueError: if dates are invalid. """ if not isinstance(plant_id, list): raise TypeError("plant_id must be a list") for d in [start, end]: if not isinstance(d, (pd.Timestamp, np.datetime64, datetime.datetime)): raise TypeError( "dates must be a pandas.Timestamp, a numpy.datetime64 or " "a datetime.datetime object" ) filedir = os.path.join(os.path.join(os.path.dirname(__file__), ".."), "data") scaler = pd.read_csv( filedir + "/usa_hydro_capacity_factors.csv", header=None, index_col=0, names=["timestamp", "cf"], ) scaler.index = pd.to_datetime(scaler.index) scaler = scaler.reindex( pd.date_range(start=scaler.index[0], end=scaler.index[-1], freq="H") ) if start not in scaler.index: raise ValueError("Start date must be within [2015-01-15, 2017-12-15]") if end not in scaler.index: raise ValueError("End date must be within [2015-01-15, 2017-12-15]") if start >= end: raise ValueError("Start date must be greater than end date") scaler.interpolate(method="time", inplace=True) scaler = scaler[start:end] return pd.DataFrame({i: scaler.cf for i in plant_id}, index=scaler.index)