Source code for powersimdata.data_access.csv_store

import functools

import pandas as pd


[docs]def verify_hash(func): """Utility function which verifies the sha1sum of the file before writing it on the server. Operates on methods that return an updated scenario or execute list. """ @functools.wraps(func) def wrapper(self, *args, **kwargs): checksum = self.data_access.checksum(self._FILE_NAME) table = func(self, *args, **kwargs) self.commit(table, checksum) return table return wrapper
def _parse_csv(file_object): """Read file from disk into data frame :param str, path object or file-like object file_object: a reference to the csv file :return: (*pandas.DataFrame*) -- the specified file as a data frame. """ table = pd.read_csv(file_object) table.set_index("id", inplace=True) table.fillna("", inplace=True) return table.astype(str)
[docs]class CsvStore: """Base class for common functionality used to manage scenario and execute list stored as csv files on the server :param powersimdata.data_access.data_access.DataAccess: data access object """ def __init__(self, data_access): """Constructor""" self.data_access = data_access
[docs] def get_table(self): """Attempt to download the file from server and blob storage, falling back to local copy if one exists, and return the combined result. :return: (*pandas.DataFrame*) -- the specified table as a data frame. """ filename = self._FILE_NAME orig = self._get_table(filename) blob = self._get_table(filename + ".2") df = pd.concat([orig, blob]) return df[~df.index.duplicated()]
def _get_table(self, filename): try: self.data_access.copy_from(filename) except: # noqa pass try: with self.data_access.get(filename) as (f, _): return _parse_csv(f) except: # noqa return pd.DataFrame()
[docs] def commit(self, table, checksum): """Save to local directory and upload if needed :param pandas.DataFrame table: the data frame to save :param str checksum: the checksum prior to download """ with self.data_access.push(self._FILE_NAME, checksum) as f: table.to_csv(f)