Processing: ERA5 Sea Ice Profiles#
In this notebook, we’ll download and process ERA5 reanalysis data for the Arctic region, preparing atmospheric profiles suitable for pyRadtran simulations. This involves setting up a Dask cluster to handle the large ERA5 dataset efficiently.
Warning
This notebook downloads and processes a large ERA5 dataset. Expect 30–60 minutes for the download and 10+ minutes for the resampling step. A machine with sufficient RAM (32 GB+) and multiple CPU cores is recommended.
Overview:
We download ERA5 data of the Arctic from 1980 to 2020 and prepare it for use in pyRadtran simulations.
The raw data is stored in
data/era5_arctic_subset.nc.The resampled data has dimensions of
season,decade,sea_ice_cover, andlevel.The results are visualized in
sea_ice_era5_plotting.ipynband used for advanced simulations inera5_seasonal_sea_ice_profiles.ipynb.
Setting Up the Dask Cluster#
We access ERA5 data from Google Cloud Storage via the ARCO-ERA5 dataset and use a local Dask cluster for parallel processing.
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import gcsfs
from dask.distributed import Client, LocalCluster
# Access ERA5 data from Google Cloud Storage
gcs = gcsfs.GCSFileSystem(token='anon')
path = 'gs://gcp-public-data-arco-era5/ar/1959-2022-6h-128x64_equiangular_with_poles_conservative.zarr/'
full_era5 = xr.open_zarr(gcs.get_mapper(path), chunks='auto')
cluster = LocalCluster(
n_workers=30,
threads_per_worker=1,
memory_limit="auto",
local_directory="/tmp/dask",
dashboard_address=":8787",
)
client = Client(cluster)
Downloading Arctic Subset#
Select 1980–2020 data north of 60°N and load temperature, specific humidity, and sea ice cover into memory.
ds_arctic = full_era5.sel(time=slice('1980-01-01', '2020-01-02')).sel(latitude=slice(60, 90))
sic = ds_arctic.sea_ice_cover.load()
ds_arctic_subset = ds_arctic[['temperature', 'specific_humidity', 'sea_ice_cover']]
ds_arctic_subset = ds_arctic_subset.load()
Resampling by Season, Decade, and Sea Ice Cover#
Group the data by season, decade (yearly bins), and sea ice cover fraction, computing mean temperature and specific humidity profiles for each combination. This is computationally expensive (~10 min).
ds_arctic_subset.to_netcdf('data/era5_arctic_subset.nc')
ds_arctic_subset = xr.open_dataset("data/era5_arctic_subset.nc", chunks='auto')
ds_arctic_subset
ds_arctic_season = ds_arctic_subset.groupby('time.season')
import pandas as pd
seasons = ['DJF', 'MAM', 'JJA', 'SON']
decades = pd.date_range(start='1980-01-01', end='2020-01-01', freq='1YS')
sea_ice_covers = np.arange(0, 1.1, 0.1)
levels = ds_arctic_subset['level']
decade_bins = pd.cut(decades, bins=decades)[1:]
decade_mids = [decade.mid for decade in decade_bins]
sea_ice_cover_bins = pd.cut(sea_ice_covers, bins=sea_ice_covers)[1:]
sea_ice_cover_mids = [sea_ice_cover.mid for sea_ice_cover in sea_ice_cover_bins]
ds_out = xr.Dataset(
coords={
'season': seasons,
'decade': decade_mids,
'sea_ice_cover': sea_ice_cover_mids,
'level': levels,
},
data_vars={
't': (['season', 'decade', 'sea_ice_cover', 'level'], np.zeros((len(seasons), len(decade_mids), len(sea_ice_cover_mids), len(levels)))),
'q': (['season', 'decade', 'sea_ice_cover', 'level'], np.zeros((len(seasons), len(decade_mids), len(sea_ice_cover_mids), len(levels)))),
}
)
for season, ds in ds_arctic_season:
ds_decade = ds.groupby_bins('time', bins=decades)
for decade, ds in ds_decade:
str_decade = decade.mid
ds_gp_sic = ds.groupby_bins('sea_ice_cover', bins=sea_ice_covers).mean()
ds_out['t'].loc[{'season': season, 'decade': str_decade}] = ds_gp_sic['temperature'].values.T
ds_out['q'].loc[{'season': season, 'decade': str_decade}] = ds_gp_sic['specific_humidity'].values.T
print(f"Processed season {season}, decade {str_decade}, mean temperature {ds_gp_sic['temperature'].mean().values:.2f}, mean specific humidity {ds_gp_sic['specific_humidity'].mean().values:.5f}")
# Add units
ds_out['t'].attrs['units'] = 'K'
ds_out['q'].attrs['units'] = 'kg/kg'
ds_to_rs = ds_out.copy()
ds_to_rs = ds_to_rs.rename({
'level': 'pressure_level',
'decade': 'valid_time'
})
ds_to_rs['pressure_level'].attrs['units'] = 'hPa'
ds_to_rs.to_netcdf('data/era5_sea_ice_to_libradtran.nc')
# from cmocean import cm
# for i, decade in enumerate(ds_out.decade[::2]):
# fig, ax = plt.subplots(2, 4, figsize=(12, 7))
# from cycler import cycler
# for a in ax.flatten():
# a.set_yscale('log')
# a.set_prop_cycle(cycler(color=cm.ice(ds_out.sea_ice_cover)))
# a.set_ylim(1000, 100)
# ds_out.isel(season=0, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 0])
# ds_out.isel(season=1, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 1])
# ds_out.isel(season=2, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 2])
# ds_out.isel(season=3, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 3])
# ds_out.isel(season=0, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 0])
# ds_out.isel(season=1, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 1])
# ds_out.isel(season=2, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 2])
# ds_out.isel(season=3, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 3])
# plt.yscale('log')
# plt.ylim(1000, 100)
ds_trend = ds_out - ds_out.mean(dim='decade')
ds_out['theta'] = ds_out['t'] * (1000 / ds_out['level'])**0.286 # dry potential temperature
Trend Analysis#
Compute decadal trends in potential temperature and specific humidity, and visualize them by season and sea ice cover.
fig = plt.figure(figsize=(12, 10))
da_to_plot = ds_out.polyfit(dim='decade', deg=1).isel(degree=0).theta_polyfit_coefficients * (1e9 * 3600 * 24 * 365 * 10)
da_to_plot.plot.contourf(y='level', ylim=(1000, 100), yscale='log', col='season', robust=False, levels=np.arange(-1, 1, .1), cmap='RdBu_r')
plt.show()
Detailed Diagnostics#
Explore the data further: surface-level trends, DJF-specific patterns, and spatial sea ice trends.
ds_out.isel(season=0, level=-1).theta.plot(hue='sea_ice_cover')
da_to_plot.sel(season='DJF').plot.contourf(
ylim=(1000, 500), yscale='log', y='level', add_labels=False, levels=20, xlim=(0.7, 1)
)
fig = plt.figure(figsize=(12, 10))
da_to_plot = ds_out.polyfit(dim='decade', deg=1).isel(degree=0).q_polyfit_coefficients * (1e9 * 3600 * 24 * 365 * 10) * 1000
da_to_plot.plot.contourf(y='level', ylim=(1000, 100), yscale='log', col='season', robust=False, cmap=cm.diff_r, levels=30)
plt.show()
da_to_plot.mean('season').plot(ylim=(1000, 100), yscale='log', y='level')
ds_out.t.isel(season=0).plot.contourf(
levels=np.arange(245, 270, 2.5),
cmap=cm.thermal,
col='decade', col_wrap=4, yscale='log', y='level', add_labels=False, ylim=(1000, 100))
plt.ylim(1000, 700)
ds_trend.q.isel(season=0).plot(col='decade', col_wrap=4, yscale='log', y='level', add_labels=False, ylim=(1000, 100))
ds_trend.t.isel(season=0).plot(col='decade', col_wrap=4, yscale='log', y='level', add_labels=False, ylim=(1000, 100))
# set all yscales to logscale
ds_out.q.isel(season=0, sea_ice_cover=0, level=-1).plot()
from cartopy import crs as ccrs
import matplotlib.pyplot as plt
fig, ax = plt.subplots(
subplot_kw={'projection': ccrs.NorthPolarStereo()}
)
ds_arctic_subset_sic.polyfit('time', deg=1).isel(degree=0)['polyfit_coefficients'].plot(x='longitude', transform=ccrs.PlateCarree(), ax=ax)
ax.coastlines()