import numpy as np
import pandas as pd
import PySAM.Pvwattsv7 as PVWatts
import PySAM.PySSC as pssc  # noqa: N813
from tqdm import tqdm
from prereise.gather.solardata.helpers import get_plant_id_unique_location
from prereise.gather.solardata.nsrdb.nrel_api import NrelApi
from prereise.gather.solardata.pv_tracking import (
    get_pv_tracking_data,
    get_pv_tracking_ratio_state,
)
default_pv_parameters = {
    "adjust:constant": 0,
    "azimuth": 180,
    "gcr": 0.4,
    "inv_eff": 94,
    "losses": 14,
    "tilt": 30,
}
[docs]def generate_timestamps_without_leap_day(year):
    """For a given year, return timestamps for each non-leap-day hour, and the timestamp
    of the beginning of the leap day (if there is one).
    :param int/str year: year to generate timestamps for.
    :return: (*tuple*) --
        pandas.DatetimeIndex: for each non-leap-day-hour of the given year.
        pandas.Timestamp/None: timestamp for the first hour of the leap day (if any).
    """
    # SAM only takes 365 days, so for a leap year: leave out the leap day.
    try:
        leap_day = (pd.Timestamp(f"{year}-02-29-00").dayofyear - 1) * 24
        sam_dates = pd.date_range(start=f"{year}-01-01-00", freq="H", periods=365 * 24)
        sam_dates = sam_dates.map(lambda t: t.replace(year=int(year)))
    except ValueError:
        leap_day = None
        sam_dates = pd.date_range(start=f"{year}-01-01-00", freq="H", periods=365 * 24)
    return sam_dates, leap_day 
[docs]def calculate_power(solar_data, pv_dict):
    """Use PVWatts to translate weather data into power.
    :param dict solar_data: weather data as returned by :meth:`Psm3Data.to_dict`.
    :param dict pv_dict: solar plant attributes.
    :return: (*numpy.array*) hourly power output.
    """
    pv_dat = pssc.dict_to_ssc_table(pv_dict, "pvwattsv7")
    pv = PVWatts.wrap(pv_dat)
    pv.SolarResource.assign({"solar_resource_data": solar_data})
    pv.execute()
    return np.array(pv.Outputs.gen) 
[docs]def retrieve_data_blended(
    email,
    api_key,
    grid=None,
    solar_plant=None,
    interconnect_to_state_abvs=None,
    year="2016",
    rate_limit=0.5,
    cache_dir=None,
):
    """Retrieves irradiance data from NSRDB and calculate the power output using
    the System Adviser Model (SAM). Either a Grid object needs to be passed to ``grid``,
    or (a data frame needs to be passed to ``solar_plant`` and a dictionary needs to be
    passed to ``interconnect_to_state_abvs``).
    :param str email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
    :param str api_key: API key.
    :param powersimdata.input.grid.Grid: grid instance.
    :param pandas.DataFrame solar_plant: plant data frame.
    :param dict/pandas.Series interconnect_to_state_abvs: mapping of interconnection
        name to state abbreviations, used to look up average parameters by interconnect
        when average parameters by state are not available.
    :param int/str year: year.
    :param int/float rate_limit: minimum seconds to wait between requests to NREL
    :param str cache_dir: directory to cache downloaded data. If None, don't cache.
    :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*,
        *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator.
    """
    xor_err_msg = (
        "Either grid xor (solar_plant and interconnect_to_state_abvs) must be defined"
    )
    if grid is None:
        if solar_plant is None or interconnect_to_state_abvs is None:
            raise TypeError(xor_err_msg)
        if not {"state_abv", "interconnect"} <= set(solar_plant.columns):
            raise ValueError("solar_plant needs 'state_abv' and 'interconnect' columns")
        # Create mappings from other inputs
        zone_id_to_state_abv = {
            i: group["state_abv"].unique()[0]
            for i, group in solar_plant.groupby("zone_id")
        }
        zone_id_to_interconnect = {
            i: group["interconnect"].unique()[0]
            for i, group in solar_plant.groupby("zone_id")
        }
    else:
        if solar_plant is not None or interconnect_to_state_abvs is not None:
            raise TypeError(xor_err_msg)
        solar_plant = grid.plant.query("type == 'solar'").copy()
        # Use existing mappings found in the Grid object
        interconnect_to_state_abvs = grid.model_immutables.zones["interconnect2abv"]
        zone_id_to_state_abv = grid.model_immutables.zones["id2abv"]
        zone_id_to_interconnect = {
            z: grid.model_immutables.zones["abv2interconnect"][zone_id_to_state_abv[z]]
            for z in solar_plant["zone_id"].unique()
        }
    real_dates = pd.date_range(
        start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H"
    )
    sam_dates, leap_day = generate_timestamps_without_leap_day(year)
    # PV tracking ratios
    # By state and by interconnect when EIA data do not have any solar PV in the state
    pv_info = get_pv_tracking_data()
    zone_id = solar_plant.zone_id.unique()
    frac = {}
    for zone in zone_id:
        state = zone_id_to_state_abv[zone]
        frac[zone] = get_pv_tracking_ratio_state(pv_info, [state])
        if frac[zone] is None:
            interconnect = zone_id_to_interconnect[zone]
            states_in_interconnect = list(interconnect_to_state_abvs[interconnect])
            frac[zone] = get_pv_tracking_ratio_state(pv_info, states_in_interconnect)
    # Inverter Loading Ratio
    ilr = 1.25
    api = NrelApi(email, api_key, rate_limit)
    # Identify unique location
    coord = get_plant_id_unique_location(solar_plant)
    data = {}
    for key, plants in tqdm(coord.items(), total=len(coord)):
        lat, lon = key[1], key[0]
        solar_data = api.get_psm3_at(
            lat,
            lon,
            attributes="dhi,dni,wind_speed,air_temperature",
            year=year,
            leap_day=False,
            dates=sam_dates,
            cache_dir=cache_dir,
        ).to_dict()
        for i, plant_id in enumerate(plants):
            if i == 0:
                # Calculate power for the first plant at each location
                first_plant_id = plant_id
                tracking_ratios = frac[solar_plant.loc[plant_id].zone_id]
                power = 0
                for j, axis in enumerate([0, 2, 4]):
                    plant_pv_dict = {
                        "system_capacity": ilr,
                        "dc_ac_ratio": ilr,
                        "array_type": axis,
                    }
                    pv_dict = {**default_pv_parameters, **plant_pv_dict}
                    power += tracking_ratios[j] * calculate_power(solar_data, pv_dict)
                if leap_day is not None:
                    power = np.insert(power, leap_day, power[leap_day - 24 : leap_day])
            else:
                # For every other plant, look up power from first plant at the location
                power = data[first_plant_id]
            data[plant_id] = power
    return pd.DataFrame(data, index=real_dates).sort_index(axis="columns") 
[docs]def retrieve_data_individual(
    email, api_key, solar_plant, year="2016", rate_limit=0.5, cache_dir=None
):
    """Retrieves irradiance data from NSRDB and calculate the power output using
    the System Adviser Model (SAM). Either a Grid object needs to be passed to ``grid``,
    or (a data frame needs to be passed to ``solar_plant`` and a string needs to be
    passed to ``grid_model``.
    :param str email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
    :param str api_key: API key.
    :param pandas.DataFrame solar_plant: plant data frame, plus additional boolean
        columns 'Single-Axis Tracking?', 'Dual-Axis Tracking?', 'Fixed Tilt?', and float
        columns 'Tilt Angle', 'Nameplate Capacity (MW)', and 'DC Net Capacity (MW)'.
    :param int/str year: year.
    :param int/float rate_limit: minimum seconds to wait between requests to NREL
    :param str cache_dir: directory to cache downloaded data. If None, don't cache.
    :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*,
        *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator.
    """
    # Verify that each solar plant has exactly one tracking type equal to True
    array_type_mapping = {
        "Fixed Tilt?": 0,
        "Single-Axis Tracking?": 2,
        "Dual-Axis Tracking?": 4,
    }
    if not all(solar_plant[array_type_mapping.keys()].sum(axis=1) == 1):
        raise ValueError("Indeterminate tracking information for one or more plants")
    # Select the appropriate 'array type' to pass to SAM
    plant_array_types = (
        solar_plant[array_type_mapping.keys()]
        .astype(bool)
        .apply(lambda x: array_type_mapping[x.idxmax()], axis=1)
    )
    real_dates = pd.date_range(
        start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H"
    )
    sam_dates, leap_day = generate_timestamps_without_leap_day(year)
    api = NrelApi(email, api_key, rate_limit)
    coord = get_plant_id_unique_location(solar_plant)
    data = {}
    for key, plants in tqdm(coord.items(), total=len(coord)):
        lat, lon = key[1], key[0]
        solar_data = api.get_psm3_at(
            lat,
            lon,
            attributes="dhi,dni,wind_speed,air_temperature",
            year=year,
            leap_day=False,
            dates=sam_dates,
            cache_dir=cache_dir,
        ).to_dict()
        for plant_id in plants:
            series = solar_plant.loc[plant_id]
            ilr = series["DC Net Capacity (MW)"] / series["Nameplate Capacity (MW)"]
            plant_pv_dict = {
                "system_capacity": ilr,
                "dc_ac_ratio": ilr,
                "array_type": plant_array_types.loc[plant_id],
            }
            if plant_pv_dict["array_type"] == 0:
                plant_pv_dict["tilt"] = series["Tilt Angle"]
            pv_dict = {**default_pv_parameters, **plant_pv_dict}
            power = calculate_power(solar_data, pv_dict)
            if leap_day is not None:
                power = np.insert(power, leap_day, power[leap_day - 24 : leap_day])
            data[plant_id] = power
    return pd.DataFrame(data, index=real_dates).sort_index(axis="columns")