Source code for prereise.gather.flexibilitydata.doe.doe_data

import os
import tarfile
import urllib.request
import zipfile

import numpy as np
import pandas as pd


[docs]def cleanup_directory(root): """Recursively cleanup a folder by deleting meaningless or empty files :param str root: the root directory containing raw de-compressed DOE flexibility data """ all_files = os.listdir(root) for i in all_files: fp = os.path.join(root, i) if i[0] == "." or (i[-3:] == "csv" and os.path.getsize(fp) < 0.01): os.remove(fp) all_folders = os.listdir(root) for i in all_folders: if os.path.isdir(os.path.join(root, i)): cleanup_directory(os.path.join(root, i))
[docs]def download_doe(download_path="data"): """Download demand flexibility filters from OEDI, extract and cleanup :param str download_path: the directory where the original DOE data will be stored """ # create data directory os.makedirs(download_path, exist_ok=True) # download zip data oedi_filter_link = ( "https://data.openei.org/files/180/2006weatherentireusdrfilters.tar.zip" ) urllib.request.urlretrieve( oedi_filter_link, os.path.join(download_path, "filter.zip") ) # extract with zipfile.ZipFile(os.path.join(download_path, "filter.zip"), "r") as fh: fh.extractall(download_path) # delete and further extract os.remove(os.path.join(download_path, "filter.zip")) for f in os.listdir(download_path): with tarfile.open(os.path.join(download_path, f), "r:gz") as fh: fh.extractall(download_path) os.remove(os.path.join(download_path, f)) # cleanup cleanup_directory(download_path)
[docs]def aggregate_doe(root, out_path): """Aggregate sector flexibilties by summing up the percentage flexibility from all sectors and store to output csv file :param str root: the root directory containing raw de-compressed DOE flexibility data :param str out_path: the output file where the aggregated data will be stored """ all_folders = os.listdir(root) # initialize output container eia_flex = pd.DataFrame(np.zeros((8808, len(all_folders))), columns=all_folders) for i in all_folders: all_csvs = os.listdir(os.path.join(root, i)) # new column for total flexibility file_flex = pd.read_csv(os.path.join(root, i, all_csvs[0]), index_col=0) # assign index to the output df if "time" not in eia_flex.keys(): eia_flex["time"] = pd.to_datetime(file_flex.index.copy()) eia_flex.set_index("time", inplace=True) eia_flex[i] = eia_flex[i] + file_flex["Flexibility"].values for c in all_csvs[1:]: fn = os.path.join(root, i, c) file_flex = pd.read_csv(fn, index_col=0) if file_flex.shape[0] == eia_flex.shape[0]: eia_flex[i] = eia_flex[i] + file_flex["Flexibility"].values else: eia_flex[i] = eia_flex[i].add(file_flex["Flexibility"], fill_value=0) eia_flex = eia_flex.round(4) eia_flex.to_csv(out_path)