Source code for prereise.gather.demanddata.nrel_efs.map_states
from collections import defaultdict
import pandas as pd
from powersimdata.input.grid import Grid
from powersimdata.network.model import ModelImmutables
mi = ModelImmutables("usa_tamu")
abv2id = mi.zones["abv2id"]
abv2state = mi.zones["abv2state"]
id2abv = mi.zones["id2abv"]
id2timezone = mi.zones["id2timezone"]
interconnect2id = mi.zones["interconnect2id"]
[docs]def decompose_demand_profile_by_state_to_loadzone(
df, profile_type, regions=None, save=None
):
"""Transforms the sectoral demand data so that it is separated by load zone rather
than by state.
:param pandas.DataFrame df: DataFrame of the sectoral demand data, where the rows
are time steps (in local time) and the columns are the states. This input is
intended to be the output of :py:func:`combine_efs_demand` or the components
that are output from :py:func:`partition_flexibility_by_sector`.
:param str profile_type: A string that identifies the type of profile that is
provided. Can be one of *'demand'* or *'demand_flexibility'*.
:param iterable regions: The combination of interconnection names and state
abbreviations that dictate the zone IDs to be included. Can choose any of:
*'Eastern'*, *'Western'*, *'Texas'*, any state abbreviation in the contiguous
United States, or *'All'*. Defaults to None.
:param str save: Saves a .csv if a string representing a valid file path and file
name is provided. Defaults to None, indicating that a .csv file should not be
saved.
:return: (*pandas.DataFrame*) -- Sectoral demand, split by load zone ID.
:raises TypeError: if df is not a pandas.DataFrame, if profile_type is not a string,
if regions is not an iterable, if the components of regions are not strings, or
if save is not input as a string.
:raises ValueError: if df does not have the proper timestamps or the correct number
of states, if profile_type is not valid, or if the components of regions are
not valid.
"""
# Account for the immutable default parameter
if regions is None:
regions = {"All"}
# Check the data types of the inputs
if not isinstance(df, pd.DataFrame):
raise TypeError("df must be input as a pandas.DataFrame.")
if not isinstance(profile_type, str):
raise TypeError("profile_type must be input as a str.")
if not isinstance(regions, (set, list)):
raise TypeError(
"Combination of interconnections and state abbreviations must be input as "
+ "a set or list."
)
# Check the demand DataFrame timestamps and column headers
if not df.index.equals(
pd.date_range("2016-01-01", "2017-01-01", freq="H", inclusive="left")
):
raise ValueError("This data does not have the proper timestamps.")
if set(df.columns) != set(abv2state) - {"AK", "HI"}:
raise ValueError("This data does not include all 48 states.")
# Check the value of profile_type
if profile_type not in {"demand", "demand_flexibility"}:
raise ValueError(f"{profile_type} is not a valid selection for profile_type.")
# Check that the components of regions are str
if not all(isinstance(x, str) for x in regions):
raise TypeError(
"Individual interconnections and state abbreviations must be input as a "
+ "str."
)
# Reformat components of regions
regions = {x.upper() if len(x) == 2 else x.capitalize() for x in regions}
if "All" in regions:
regions = {"Eastern", "Western", "Texas"}
# Check that the components of regions are valid
possible_regions = ({"Eastern", "Western", "Texas"} | set(abv2state)) - {"AK", "HI"}
if not regions.issubset(possible_regions):
invalid_regions = regions - possible_regions
raise ValueError(f'Invalid regions: {", ".join(invalid_regions)}')
# Grab the grid information
grid = Grid(["USA"])
# Find Pd for each load zone and determine the fraction of Pd per load zone by state
pd_by_lz = grid.bus.groupby("zone_id")["Pd"].sum()
pd_state_total = defaultdict(float)
for i, s in id2abv.items():
pd_state_total[s] += pd_by_lz[i]
pd_frac = {i: pd_by_lz[i] / pd_state_total[id2abv[i]] for i in id2abv}
# Split states into load zones
df_lz = pd.DataFrame(index=df.index, columns=list(id2abv))
for i in df_lz.columns:
df_lz[i] = df[id2abv[i]] * pd_frac[i]
# Convert from local hours to UTC time
df_lz = shift_local_time_by_loadzone_to_utc(df_lz)
# Determine the loadzones to be inlcuded in the profile, as specified by regions
loadzones = sorted(
set().union(
*[abv2id[r] if len(r) == 2 else interconnect2id[r] for r in regions]
)
)
# Keep the appropriate loadzones
df_lz = df_lz[loadzones]
# Change the column headers if the profile_type is "demand_flexibility"
if profile_type == "demand_flexibility":
df_lz.columns = [f"zone.{x}" for x in loadzones]
# Save the demand data, if desired
if save is not None:
if not isinstance(save, str):
raise TypeError("The file path and file name must be input as a str.")
else:
df_lz.to_csv(save)
# Return the demand that is separated by load zone ID
return df_lz
[docs]def shift_local_time_by_loadzone_to_utc(df):
"""Maps the local time for each load zone to the corresponding UTC time.
:param pandas.DataFrame df: DataFrame of the demand data, where the rows are time
steps (in local time) and the columns are load zone IDs.
:return: (*pandas.DataFrame*) -- Demand, shifted to account for UTC time.
:raises TypeError: if df is not a pandas.DataFrame.
:raises ValueError: if df does not have the proper timestamps or the correct number
of states.
"""
# Check that a DataFrame is input
if not isinstance(df, pd.DataFrame):
raise TypeError("df must be input as a pandas.DataFrame.")
# Check the demand DataFrame dimensions and headers
if not df.index.equals(
pd.date_range("2016-01-01", "2017-01-01", freq="H", inclusive="left")
):
raise ValueError("This data does not have the proper timestamps.")
if set(df.columns) != set(id2abv):
raise ValueError("This data does not include all load zones.")
# Shift values according to UTC time correction
df_tz = df.copy()
for i in df_tz.columns:
tz_val = int(id2timezone[i][-1])
df_tz[i] = df_tz[i].shift(tz_val)
# Populate with data from December 30 (same day of week) that is the same time
df_tz.iloc[0:tz_val, df_tz.columns.get_loc(i)] = df.iloc[
(8736 - tz_val) : 8736
][i].values
# Rename index
df_tz.index.name = "UTC Time"
# Return the demand that has been shifted to meet UTC time
return df_tz