import geopandas
import pandas as pd
from shapely import wkt
import cdsapi
import xarray as xr
from shapes.models import Shape
from datalayers.datasources.base_layer import LayerValueType, LayerTimeResolution
from datalayers.datasources.base_layer import BaseLayer
class Era5T2m(BaseLayer):
def __init__(self):
super().__init__()
self.time_col = LayerTimeResolution.DAY
self.value_type = LayerValueType.VALUE
def download(self):
self._create_data_dir_if_not_exists()
c = cdsapi.Client()
c.retrieve(
"reanalysis-era5-single-levels",
{
"product_type": "reanalysis",
"format": "netcdf",
"variable": "2m_temperature",
"year": "2023",
"month": [
"01",
"02",
"03",
"04",
"05",
"06",
"07",
"08",
"09",
"10",
"11",
"12",
],
"day": [
"01",
"02",
"03",
"04",
"05",
"06",
"07",
"08",
"09",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29",
"30",
"31",
],
"time": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00",
],
"area": [
11.17,
-3.26,
4.74,
1.2,
],
},
self.get_data_path() / "2023 2m_temperature.nz",
)
def process(self, shapes=None, save_output=False, param_dir=None):
# get shapes
if shapes is None:
shapes = Shape.objects.all()
# netCFD
ds = xr.open_mfdataset(self.get_data_path() / "2023 2m_temperature.nz")
# t2m is provided in Kelvin [K], transform to °C by subtracting 273.15
# as specified in the Copernicus documentation
ds["t2m"] -= 273.15
ds["t2m"].attrs["units"] = "°C"
# get points inside netCFD
df = ds.isel(time=0).to_dataframe().reset_index()
gdf_df_points = geopandas.GeoDataFrame(
df,
geometry=geopandas.points_from_xy(df.longitude, df.latitude),
crs="EPSG:4326",
)
for shape in shapes:
shape_geom = wkt.loads(shape.geometry.wkt)
shape_points = gdf_df_points[gdf_df_points.within(shape_geom)]
if len(shape_points) == 0:
self.layer.warning("No points inside %s", {"shape_id": shape.id})
continue
# extract row for each point in the netCFD that is inside the
# current shape
datetime_index = ds.time.values
point_series = []
for i, row in shape_points.iterrows():
lat = row["latitude"]
lng = row["longitude"]
point_series.append(ds.t2m.sel(longitude=lng, latitude=lat).values)
# combine to data frame
datetime_index = pd.to_datetime(datetime_index)
df = pd.DataFrame(index=datetime_index)
i = 1
for s in point_series:
df[f"p{i}"] = s
i += 1
# mean per time step
df["mean"] = df.max(axis=1)
# group to mean per day
daily_df = df.groupby(pd.Grouper(freq="D")).max()
for i, row in daily_df.iterrows():
self.rows.append(
{
"date": i,
"value": row["mean"],
"shape_id": shape.id,
}
)
self.save()