Source code for prereise.gather.demanddata.bldg_electrification.helper
import io
import os
import zipfile
import geopandas as gpd
import pandas as pd
import requests
[docs]def read_shapefile(url):
"""Read shape files for overlay
:param str url: directory in blob storage that contain the shape file in zip format
:return: (*geopandas.GeoDataFrame*) -- geo data frame of the shape file
"""
local_path = "tmp/"
r = requests.get(url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(path=local_path) # extract to folder
filenames = [
y for y in sorted(z.namelist()) if y[-3:] in {"dbf", "prj", "shp", "shx"}
]
dbf, prj, shp, shx = [filename for filename in filenames]
shapefile_df = gpd.GeoDataFrame(gpd.read_file((local_path + shp))).to_crs(
"EPSG:4269"
)
return shapefile_df
[docs]def zone_shp_overlay(zone_name_shp, zone_shp, pumas_shp):
"""Select pumas within a zonal load area
:param str zone_name_shp: name of the zone in ba_area.shp
:param geopandas.GeoDataFrame zone_shp: geo data frame of zone(BA) shape file
:param geopandas.GeoDataFrame pumas_shp: geo data frame of pumas shape file
:return: (*pandas.DataFrame*) -- puma data of all pumas within the zone,
including fraction within the zone
"""
zone_shp = zone_shp[zone_shp["BA"] == zone_name_shp].copy()
pumas_shp["area"] = pumas_shp["geometry"].to_crs({"proj": "cea"}).area
puma_zone = gpd.overlay(pumas_shp, zone_shp.to_crs("EPSG:4269"))
puma_zone["area"] = puma_zone["geometry"].to_crs({"proj": "cea"}).area
puma_zone["puma"] = "puma_" + puma_zone["GEOID10"]
puma_zone["area_frac"] = [
puma_zone["area"][i]
/ list(pumas_shp[pumas_shp["puma"] == puma_zone["puma"][i]]["area"])[0]
for i in range(len(puma_zone))
]
puma_data_zone = pd.DataFrame(
{"puma": puma_zone["puma"], "frac_in_zone": puma_zone["area_frac"]}
)
puma_data = pd.read_csv(
os.path.join(os.path.dirname(__file__), "data", "puma_data.csv"),
index_col="puma",
)
puma_hp = pd.read_csv(
os.path.join(os.path.dirname(__file__), "data", "puma_hp_data.csv"),
index_col="puma",
)
puma_data_zone = puma_data_zone.join(puma_data, on="puma")
puma_data_zone = puma_data_zone.join(puma_hp.drop(columns=["state"]), on="puma")
puma_data_zone = puma_data_zone.set_index("puma")
return puma_data_zone
[docs]def state_shp_overlay(state, state_shp, zone_shp):
"""Select load zones within a state
:param str state: abbrev. of state
:param geopandas.GeoDataFrame state_shp: geo data frame of state shape file
:param geopandas.GeoDataFrame zone_shp: geo data frame of zone(BA) shape file
:return: (*geopandas.GeoDataFrame*) -- state boundaries and load zones within it
"""
if state == "United States":
state_shape = state_shp[state_shp["NAME"] == state].copy()
else:
state_shape = state_shp[state_shp["STUSPS"] == state].copy()
zone_shp["area"] = zone_shp["geometry"].to_crs({"proj": "cea"}).area
zone_state = gpd.overlay(zone_shp, state_shape.to_crs("EPSG:4269"))
zone_state["area"] = zone_state["geometry"].to_crs({"proj": "cea"}).area
zone_state["area_frac"] = [
zone_state["area"][i]
/ list(zone_shp[zone_shp["BA"] == zone_state["BA"][i]]["area"])[0]
for i in range(len(zone_state))
]
zone_state.loc[zone_state["area_frac"] >= 0.99, "area_frac"] = 1
zone_state = zone_state.drop(zone_state[zone_state["area_frac"] <= 0.00001].index)
return zone_state