Source code for prereise.gather.flexibilitydata.doe.doe_data

import os
import tarfile
import urllib.request
import zipfile

import numpy as np
import pandas as pd


[docs]def cleanup_directory(root):
    """Recursively cleanup a folder by deleting meaningless or empty files

    :param str root: the root directory containing raw de-compressed DOE flexibility data
    """

    all_files = os.listdir(root)
    for i in all_files:
        fp = os.path.join(root, i)
        if i[0] == "." or (i[-3:] == "csv" and os.path.getsize(fp) < 0.01):
            os.remove(fp)

    all_folders = os.listdir(root)
    for i in all_folders:
        if os.path.isdir(os.path.join(root, i)):
            cleanup_directory(os.path.join(root, i))


[docs]def download_doe(download_path="data"):
    """Download demand flexibility filters from OEDI, extract and cleanup

    :param str download_path: the directory where the original DOE data will be stored
    """

    # create data directory
    os.makedirs(download_path, exist_ok=True)

    # download zip data
    oedi_filter_link = (
        "https://data.openei.org/files/180/2006weatherentireusdrfilters.tar.zip"
    )
    urllib.request.urlretrieve(
        oedi_filter_link, os.path.join(download_path, "filter.zip")
    )

    # extract
    with zipfile.ZipFile(os.path.join(download_path, "filter.zip"), "r") as fh:
        fh.extractall(download_path)

    # delete and further extract
    os.remove(os.path.join(download_path, "filter.zip"))

    for f in os.listdir(download_path):
        with tarfile.open(os.path.join(download_path, f), "r:gz") as fh:
            fh.extractall(download_path)
        os.remove(os.path.join(download_path, f))

    # cleanup
    cleanup_directory(download_path)


[docs]def aggregate_doe(root, out_path):
    """Aggregate sector flexibilties by summing up the percentage flexibility from all sectors
    and store to output csv file

    :param str root: the root directory containing raw de-compressed DOE flexibility data
    :param str out_path: the output file where the aggregated data will be stored
    """

    all_folders = os.listdir(root)

    # initialize output container
    eia_flex = pd.DataFrame(np.zeros((8808, len(all_folders))), columns=all_folders)

    for i in all_folders:
        all_csvs = os.listdir(os.path.join(root, i))

        # new column for total flexibility
        file_flex = pd.read_csv(os.path.join(root, i, all_csvs[0]), index_col=0)

        # assign index to the output df
        if "time" not in eia_flex.keys():
            eia_flex["time"] = pd.to_datetime(file_flex.index.copy())
            eia_flex.set_index("time", inplace=True)

        eia_flex[i] = eia_flex[i] + file_flex["Flexibility"].values

        for c in all_csvs[1:]:
            fn = os.path.join(root, i, c)
            file_flex = pd.read_csv(fn, index_col=0)

            if file_flex.shape[0] == eia_flex.shape[0]:
                eia_flex[i] = eia_flex[i] + file_flex["Flexibility"].values
            else:
                eia_flex[i] = eia_flex[i].add(file_flex["Flexibility"], fill_value=0)

    eia_flex = eia_flex.round(4)
    eia_flex.to_csv(out_path)
Source code for prereise.gather.flexibilitydata.doe.doe_data

Navigation

Related Topics

Useful Links

Code