Source code for prereise.gather.solardata.ga_wind.ga_wind

import dateutil
import h5pyd
import numpy as np
import pandas as pd
from pyproj import Transformer
from tqdm import tqdm

from prereise.gather.solardata.ga_wind.helpers import ll2ij, proj_string
from prereise.gather.solardata.helpers import get_plant_id_unique_location


[docs]def retrieve_data( solar_plant, hs_api_key, start_date="2007-01-01", end_date="2014-01-01" ): """Retrieves irradiance data from Gridded Atmospheric Wind Integration National dataset. :param pandas.DataFrame solar_plant: plant data frame. :param str hs_api_key: API key. :param str start_date: start date. :param str end_date: end date. :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*, *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator. """ # Identify unique location coord = get_plant_id_unique_location(solar_plant) # Build query hs_endpoint = "https://developer.nrel.gov/api/hsds" hs_endpoint_fallback = "https://developer.nrel.gov/api/hsds/" hs_username = None hs_password = None try: f = h5pyd.File( "/nrel/wtk-us.h5", "r", username=hs_username, password=hs_password, endpoint=hs_endpoint, api_key=hs_api_key, ) except OSError: f = h5pyd.File( "/nrel/wtk-us.h5", "r", username=hs_username, password=hs_password, endpoint=hs_endpoint_fallback, api_key=hs_api_key, ) # Get coordinates of nearest location lat_origin, lon_origin = f["coordinates"][0][0] transformer = Transformer.from_pipeline(proj_string) ij = {key: ll2ij(transformer, lon_origin, lat_origin, *key) for key in coord.keys()} # Extract time series dt = f["datetime"] dt = pd.DataFrame({"datetime": dt[:]}) dt["datetime"] = dt["datetime"].apply(dateutil.parser.parse) dt_range = dt.loc[(dt.datetime >= start_date) & (dt.datetime < end_date)] data = pd.DataFrame({"Pout": [], "plant_id": [], "ts": [], "ts_id": []}) for key, val in tqdm(ij.items(), total=len(ij)): ghi = f["GHI"][min(dt_range.index) : max(dt_range.index) + 1, val[0], val[1]] data_loc = pd.DataFrame({"Pout": ghi}) data_loc["Pout"] /= max(ghi) data_loc["ts_id"] = range(1, len(ghi) + 1) data_loc["ts"] = pd.date_range(start=start_date, end=end_date, freq="H")[:-1] for i in coord[key]: data_site = data_loc.copy() data_site["plant_id"] = i data = data.append(data_site, ignore_index=True, sort=False) data["plant_id"] = data["plant_id"].astype(np.int32) data["ts_id"] = data["ts_id"].astype(np.int32) data.sort_values(by=["ts_id", "plant_id"], inplace=True) data.reset_index(inplace=True, drop=True) return data