Processing: ERA5 Sea Ice Profiles#

In this notebook, we’ll download and process ERA5 reanalysis data for the Arctic region, preparing atmospheric profiles suitable for pyRadtran simulations. This involves setting up a Dask cluster to handle the large ERA5 dataset efficiently.

Warning

This notebook downloads and processes a large ERA5 dataset. Expect 30–60 minutes for the download and 10+ minutes for the resampling step. A machine with sufficient RAM (32 GB+) and multiple CPU cores is recommended.

Overview:

  • We download ERA5 data of the Arctic from 1980 to 2020 and prepare it for use in pyRadtran simulations.

  • The raw data is stored in data/era5_arctic_subset.nc.

  • The resampled data has dimensions of season, decade, sea_ice_cover, and level.

  • The results are visualized in sea_ice_era5_plotting.ipynb and used for advanced simulations in era5_seasonal_sea_ice_profiles.ipynb.

Setting Up the Dask Cluster#

We access ERA5 data from Google Cloud Storage via the ARCO-ERA5 dataset and use a local Dask cluster for parallel processing.

import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import gcsfs
from dask.distributed import Client, LocalCluster

# Access ERA5 data from Google Cloud Storage
gcs = gcsfs.GCSFileSystem(token='anon')
path = 'gs://gcp-public-data-arco-era5/ar/1959-2022-6h-128x64_equiangular_with_poles_conservative.zarr/'
full_era5 = xr.open_zarr(gcs.get_mapper(path), chunks='auto')

cluster = LocalCluster(
    n_workers=30,
    threads_per_worker=1,
    memory_limit="auto",
    local_directory="/tmp/dask",
    dashboard_address=":8787",
)

client = Client(cluster)

Downloading Arctic Subset#

Select 1980–2020 data north of 60°N and load temperature, specific humidity, and sea ice cover into memory.

ds_arctic = full_era5.sel(time=slice('1980-01-01', '2020-01-02')).sel(latitude=slice(60, 90))
sic = ds_arctic.sea_ice_cover.load()
ds_arctic_subset = ds_arctic[['temperature', 'specific_humidity', 'sea_ice_cover']]

ds_arctic_subset = ds_arctic_subset.load()

Resampling by Season, Decade, and Sea Ice Cover#

Group the data by season, decade (yearly bins), and sea ice cover fraction, computing mean temperature and specific humidity profiles for each combination. This is computationally expensive (~10 min).

ds_arctic_subset.to_netcdf('data/era5_arctic_subset.nc')
ds_arctic_subset = xr.open_dataset("data/era5_arctic_subset.nc", chunks='auto')

ds_arctic_subset
ds_arctic_season = ds_arctic_subset.groupby('time.season')
import pandas as pd

seasons = ['DJF', 'MAM', 'JJA', 'SON']
decades = pd.date_range(start='1980-01-01', end='2020-01-01', freq='1YS')
sea_ice_covers = np.arange(0, 1.1, 0.1)
levels = ds_arctic_subset['level']

decade_bins = pd.cut(decades, bins=decades)[1:]
decade_mids = [decade.mid for decade in decade_bins]

sea_ice_cover_bins = pd.cut(sea_ice_covers, bins=sea_ice_covers)[1:]
sea_ice_cover_mids = [sea_ice_cover.mid for sea_ice_cover in sea_ice_cover_bins]

ds_out = xr.Dataset(
    coords={
        'season': seasons,
        'decade': decade_mids,
        'sea_ice_cover': sea_ice_cover_mids,
        'level': levels,
    },
    data_vars={
        't': (['season', 'decade', 'sea_ice_cover', 'level'], np.zeros((len(seasons), len(decade_mids), len(sea_ice_cover_mids), len(levels)))),
        'q': (['season', 'decade', 'sea_ice_cover', 'level'], np.zeros((len(seasons), len(decade_mids), len(sea_ice_cover_mids), len(levels)))),
    }
)

for season, ds in ds_arctic_season:

    ds_decade = ds.groupby_bins('time', bins=decades)
    for decade, ds in ds_decade:

        str_decade = decade.mid
        ds_gp_sic = ds.groupby_bins('sea_ice_cover', bins=sea_ice_covers).mean()
        ds_out['t'].loc[{'season': season, 'decade': str_decade}] = ds_gp_sic['temperature'].values.T
        ds_out['q'].loc[{'season': season, 'decade': str_decade}] = ds_gp_sic['specific_humidity'].values.T
        print(f"Processed season {season}, decade {str_decade}, mean temperature {ds_gp_sic['temperature'].mean().values:.2f}, mean specific humidity {ds_gp_sic['specific_humidity'].mean().values:.5f}")

# Add units
ds_out['t'].attrs['units'] = 'K'
ds_out['q'].attrs['units'] = 'kg/kg'

ds_to_rs = ds_out.copy()

ds_to_rs = ds_to_rs.rename({
    'level': 'pressure_level',
    'decade': 'valid_time'
})

ds_to_rs['pressure_level'].attrs['units'] = 'hPa'

ds_to_rs.to_netcdf('data/era5_sea_ice_to_libradtran.nc')
# from cmocean import cm


# for i, decade in enumerate(ds_out.decade[::2]):
    


#     fig, ax = plt.subplots(2, 4, figsize=(12, 7))
#     from cycler import cycler

#     for a in ax.flatten():
#         a.set_yscale('log')
#         a.set_prop_cycle(cycler(color=cm.ice(ds_out.sea_ice_cover)))
#         a.set_ylim(1000, 100)

#     ds_out.isel(season=0, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 0])
#     ds_out.isel(season=1, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 1])
#     ds_out.isel(season=2, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 2])
#     ds_out.isel(season=3, decade=i).t.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[0, 3])

#     ds_out.isel(season=0, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 0])
#     ds_out.isel(season=1, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 1])
#     ds_out.isel(season=2, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 2])
#     ds_out.isel(season=3, decade=i).q.plot(hue='sea_ice_cover', add_legend=False, y='level', ax=ax[1, 3])


#     plt.yscale('log')
#     plt.ylim(1000, 100)
ds_trend = ds_out - ds_out.mean(dim='decade') 
ds_out['theta'] = ds_out['t'] * (1000 / ds_out['level'])**0.286 # dry potential temperature

Trend Analysis#

Compute decadal trends in potential temperature and specific humidity, and visualize them by season and sea ice cover.

fig = plt.figure(figsize=(12, 10))

da_to_plot = ds_out.polyfit(dim='decade', deg=1).isel(degree=0).theta_polyfit_coefficients * (1e9 * 3600 * 24 * 365 * 10)
da_to_plot.plot.contourf(y='level', ylim=(1000, 100), yscale='log', col='season', robust=False, levels=np.arange(-1, 1, .1), cmap='RdBu_r')
plt.show()

Detailed Diagnostics#

Explore the data further: surface-level trends, DJF-specific patterns, and spatial sea ice trends.

ds_out.isel(season=0, level=-1).theta.plot(hue='sea_ice_cover')
da_to_plot.sel(season='DJF').plot.contourf(
    ylim=(1000, 500), yscale='log', y='level', add_labels=False, levels=20, xlim=(0.7, 1)
)
fig = plt.figure(figsize=(12, 10))

da_to_plot = ds_out.polyfit(dim='decade', deg=1).isel(degree=0).q_polyfit_coefficients * (1e9 * 3600 * 24 * 365 * 10) * 1000
da_to_plot.plot.contourf(y='level', ylim=(1000, 100), yscale='log', col='season', robust=False, cmap=cm.diff_r, levels=30)
plt.show()
da_to_plot.mean('season').plot(ylim=(1000, 100), yscale='log', y='level')
ds_out.t.isel(season=0).plot.contourf(
    levels=np.arange(245, 270, 2.5),    
    cmap=cm.thermal,
    col='decade', col_wrap=4, yscale='log', y='level', add_labels=False, ylim=(1000, 100))

plt.ylim(1000, 700)
ds_trend.q.isel(season=0).plot(col='decade', col_wrap=4, yscale='log', y='level', add_labels=False, ylim=(1000, 100))
ds_trend.t.isel(season=0).plot(col='decade', col_wrap=4, yscale='log', y='level', add_labels=False, ylim=(1000, 100))
# set all yscales to logscale 
ds_out.q.isel(season=0, sea_ice_cover=0, level=-1).plot()
from cartopy import crs as ccrs
import matplotlib.pyplot as plt 

fig, ax = plt.subplots(
    subplot_kw={'projection': ccrs.NorthPolarStereo()}
)

ds_arctic_subset_sic.polyfit('time', deg=1).isel(degree=0)['polyfit_coefficients'].plot(x='longitude', transform=ccrs.PlateCarree(), ax=ax)
ax.coastlines()