Advanced: Realistic Clouds from Observations (Solar Spectral)

Advanced: Realistic Clouds from Observations (Solar Spectral)#

In this notebook, we’ll apply the same cloud-construction approach as the thermal notebook, but for spectral solar simulations. This allows us to study the wavelength-dependent effect of observed cloud properties on downwelling solar radiation.

Note

Campaign data required. This notebook uses data from the HALO-(AC)³ field experiment that is not publicly available. You can adapt the workflow to your own observational data by replacing the file paths.

# Import PyRadtran components
from pyradtran.config import PathsConfig, SimulationDefaults, SimulationConfig, ExecutionConfig, OutputConfig, load_config
from pyradtran.core import Simulation, generate_input_content
from pyradtran.io import parse_uvspec_output
import pyradtran
from dataclasses import asdict
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from pathlib import Path
import os
import yaml
import pandas as pd
import logging

# Configure logging for pyradtran
logging.getLogger('pyradtran').setLevel(logging.CRITICAL)

# Build configuration programmatically from master config
cfg = load_config()
cfg.simulation_defaults.rte_solver = "disort"
cfg.simulation_defaults.mol_abs_param = "reptran per_nm"
cfg.simulation_defaults.source = "solar"
cfg.simulation_defaults.wavelength_nm = [1024, 2400]
cfg.simulation_defaults.integrate_wavelength = False
cfg.simulation_defaults.h2o_source = "radiosonde"
cfg.simulation_defaults.albedo_value = 0.3
cfg.simulation_defaults.surface_temperature_k = 273.15
cfg.simulation_defaults.ozone_du = 300.0
cfg.simulation_defaults.output_altitudes_km = [0]
cfg.simulation_defaults.output_columns = ["zout", "lambda", "sza", "edir", "eglo", "edn", "eup", "enet", "albedo"]
cfg.execution.max_workers = 16
cfg.execution.cleanup_temp_files = False
cfg.execution.timeout_seconds = 60
cfg.output.filename_prefix = "radiosonde_solar_spectral"
config_path = Path("config/radiosonde_solar_spectral.yaml")
cfg.to_yaml(config_path)
ds = xr.open_dataset('/projekt_agmwend/data/HALO-AC3/02_Flights/HALO-AC3_20220404_HALO_RF13/VELOX/VELOX_327kveL/Processed/to_publish/final/HALO-AC3_HALO_VELOX_BT3_10740nm_20220404_RF13_v3.0.nc').sel(time=slice('2022-04-04T13:41:00', '2022-04-04T14:00:00')).load()

ds.BT_Center.plot(figsize=(20, 3))
ds_specMACS = xr.open_zarr('/projekt_agmwend/data/HALO-AC3/02_Flights/HALO-AC3_20220404_HALO_RF13/specMACS/specMACS_RF13.zarr/').sel(time=slice('2022-04-04T13:41:00', '2022-04-04T14:00:00')).load()
ds_specMACS.radiance.isel(wavelength=0).plot(x='time', figsize=(20, 3))
# Static plot for a representative time step
time_idx = 240

fig, ax = plt.subplots(1, 2, figsize=(7, 3))
bins = np.linspace(-25, 0, 100)
ds.BT_2D.isel(time=time_idx).plot(cmap='viridis', vmin=-25, vmax=0, ax=ax[1], cbar_kwargs={'label': 'Brightness Temperature (K)'})
ds.BT_2D.isel(time=time_idx).plot.hist(ax=ax[0], bins=bins, color='gray', alpha=0.7, histtype='step', lw=2)
ax[0].set_xlabel('Brightness Temperature (°C)')
ax[0].set_ylabel('Count')
plt.tight_layout()
plt.show()
ds_sel = ds.rename(
    {'lat' : 'latitude', 'lon': 'longitude', 'alt': 'altitude'}
)
ds_sel['altitude_halo'] = ds_sel['altitude'] / 1000 
ds_sel = ds_sel.drop_vars('altitude')
ds_sel['altitude'] = ('altitude', np.arange(0, 15, 0.1))
#s_sel['altitude'] = ds_sel['altitude'] / 1000  # Convert altitude to km
ds_specMACS.wavelength
# Run a clear-sky solar spectral simulation for the dataset
print("Running batch solar spectral simulation (clear-sky)...")
config_path = Path('config/radiosonde_solar_spectral.yaml')

ds_sim_cloud_free = ds_sel.pyradtran.run(
    config_path=config_path,
    return_dataset=True,
    save_to_file=True,
    show_progress=False,
)

print("\nSimulation complete!")
ds_sim_cloud_free

Simulation Results#

Spectral radiative transfer calculations completed using DISORT solver with multi-level output altitude grid.

# Generate cloud layers using the utility functions
from pyradtran.clouds import CloudLayer, CloudGenerator, CloudFileWriter

# Create simple parametric clouds
simple_clouds = CloudGenerator.from_simple_parameters(
    z_base_km=2.0,
    z_top_km=4.0,
    lwc_g_m3=0.1,
    r_eff_um=10.0,
    n_layers=5
)

# Write custom cloud file
custom_wc_file = Path('work/custom_water_cloud.dat')
CloudFileWriter.write_water_cloud_file(
    cloud_layers=simple_clouds,
    output_path=custom_wc_file,
    altitude_resolution_km=0.1
)
lidar = xr.open_dataset('/projekt_agmwend/home_rad/Joshua/HALO-AC3_unified_data/unified_bsrgl.nc').sel(time=ds.time).load()
lidar = lidar.where(lidar.backscatter_ratio > 100)
radar = xr.open_dataset('/projekt_agmwend/home_rad/Joshua/HALO-AC3_unified_data/unified_radar.nc').sel(time=ds.time).load()

fig, (ax, ax2) = plt.subplots(1, 2, figsize=(12, 3), gridspec_kw={'width_ratios': [1, .2]})

im_lidar = lidar.backscatter_ratio.where(lidar.backscatter_ratio > 100).plot(y='altitude', robust=True, add_colorbar=False, ax=ax)
im_radar = radar.dBZg.plot(y='height', robust=True, cmap='gray_r', alpha=.7, add_colorbar=False, ax=ax)
ax.set_ylim(0, 2000)

def cth_and_layer_thickness(ds, ds_lidar=lidar):
    """ Calculate cloud top height and layer thickness from lidar data.
    """
    # The lidar data is already filtered for backscatter_ratio > 10
    # We can find the cloud top and base by finding the max and min altitude where the data is not NaN
    
    # Ensure the lidar data is aligned with the ds time, using nearest neighbor interpolation
    ds_lidar_sel = ds_lidar.sel(time=ds.time, method='nearest')
    
    # Find cloud top height (highest altitude with significant backscatter)
    cth = ds_lidar_sel.altitude.where(ds_lidar_sel.backscatter_ratio.notnull()).max(dim='altitude')
    
    # Find cloud base height (lowest altitude with significant backscatter)
    cbh = ds_lidar_sel.altitude.where(ds_lidar_sel.backscatter_ratio.notnull()).min(dim='altitude')

    # if cth is smaller than 150 m, set it to NaN
    cth = cth.where(cth > 150, np.nan)
    cbh = cbh.where(cbh > 150, np.nan)

    
    # Calculate thickness
    thickness = cth - cbh
    # 1d cloudmask
    cloudmask = xr.where((ds_lidar_sel.backscatter_ratio > 100) & (ds_lidar_sel.altitude > 150), 1, 0)

    out_ds = xr.Dataset({
        'cth': cth,
        'thickness': thickness,
        'cloudmask': cloudmask.mean(dim='altitude')  # Mean cloud mask over altitude
    }, coords={
        'time': ds.time,
    })
    return out_ds

cth_ds = cth_and_layer_thickness(ds)
cth_ds = cth_ds.dropna('time')  # Drop times with NaN values
cth = cth_ds.cth
thickness = cth_ds.thickness
cloudmask = cth_ds.cloudmask

cth_interp = cth.rolling(time=5, center=True).mean().interpolate_na('time')
thickness_interp = thickness.rolling(time=5, center=True).mean().interpolate_na('time')
### assign thickness < 100 to 100
thickness_interp = thickness_interp.where(thickness_interp > 100, 100)
thickness_interp = thickness_interp.sel(time=cloudmask > 0)
cth_interp = cth_interp.sel(time=cloudmask > 0)
cth_interp = cth_interp.fillna(500)  # Fill NaNs with 0 for plotting

# print nans
print(f'{cth_interp.isnull().sum().values} NaNs in CTH; {thickness_interp.isnull().sum().values} NaNs in thickness')


cth_interp.plot(ax=ax, label='Cloud Top Height', color='blue')
(cth_interp - thickness_interp).plot(ax=ax, label='Cloud Base Height', color='orange')

lidar_bins = np.linspace(np.nanquantile(lidar.backscatter_ratio, 0.01), np.nanquantile(lidar.backscatter_ratio, 0.99), 50)

#lidar.backscatter_ratio.plot.hist(bins=lidar_bins, color='gray', alpha=0.7, histtype='step', lw=2)
# plot the cloud mask

_= ax2.hist(cth, bins=25)
ds_filtered = ds.sel(time=cth_ds.time)
ds_filtered
# Run cloudy-sky solar spectral simulations for each filtered timestep
print("Running batch solar spectral simulation (cloudy)...")
config_path = Path('config/radiosonde_solar_spectral.yaml')
logging.getLogger('pyradtran').setLevel(logging.ERROR)
from tqdm import tqdm

ds_sim_cloudy_list = []

for i in tqdm(range(len(ds_filtered.time))):
    # Select the current time slice
    ds_to_sim = ds_filtered.isel(time=[i]).copy().rename(
        {'lat': 'latitude', 'lon': 'longitude'}
    )
    alt = ds_to_sim.alt / 1e3  # Use mean altitude for the simulation
    ds_to_sim['altitude'] = ('altitude', alt.values)
    cth_i = cth_interp.isel(time=i).values / 1000  # Convert to km
    thickness_i = thickness_interp.isel(time=i).values / 1000  # Convert to km

    # Create simple parametric clouds
    simple_clouds = CloudGenerator.from_simple_parameters(
        z_base_km=cth_i - thickness_i,
        z_top_km=cth_i,
        lwc_g_m3=0.1,
        r_eff_um=10.0,
        n_layers=2
    )

    # Write custom cloud file
    custom_wc_file = Path('work/custom_water_cloud.dat')
    CloudFileWriter.write_water_cloud_file(
        cloud_layers=simple_clouds,
        output_path=custom_wc_file,
        altitude_resolution_km=0.01
    )

    ds_sim_cloudy = ds_to_sim.pyradtran.run(
        config_path=config_path,
        return_dataset=True,
        save_to_file=False,
        parameter_overrides={
            'output_quantity' : 'brightness',
            'wc_file 1D' : 'custom_water_cloud.dat',
        },
        show_progress=False,
    )

    ds_sim_cloudy['cloud_top_height'] = cth_i
    ds_sim_cloudy['cloud_thickness'] = thickness_i

    ds_sim_cloudy_list.append(ds_sim_cloudy)

print("\nSimulation complete!")
ds_sim_cloudy
ds_sim_cloudy = xr.concat(ds_sim_cloudy_list, dim='time')
ds_sim_cloudy
# Determine brightness temperature threshold for cloud presence
ds.BT_2D.plot.hist(bins=100)

from scipy.signal import find_peaks

# Compute the median brightness temperature across all pixels and time steps
median = float(ds.BT_2D.quantile(0.5, ['x', 'y', 'time']).values)
plt.axvline(median, color='red', linestyle='--', label='Median BT')
print(f"Median brightness temperature: {median:.2f} K")

# Use the median as a threshold to determine cloud presence
ds_cloudy = ds_filtered.where(ds_filtered.BT_2D < median)
fig, (ax_hists, ax, ax_scatter) = plt.subplots(1, 3, figsize=(18, 3), gridspec_kw={'width_ratios': [.3, 1, .3]})
window = 50  # Define a window size for the scatter plot
y_max = 270
y_min = 250

ds_sim_cloudy.eup.plot(marker='o', color='blue', label='Simulated Upwelling BT', ax=ax)
(ds_cloudy.isel(x=slice(635//2-window, 635//2+window), y=slice(507//2-window, 507//2+window)).BT_2D.mean(['x', 'y']) + 273.15).plot(label='Measured Cloudy BT', color='red', linestyle='--', marker='x', ax=ax)
ax.set_xlabel('Time (UTC)')
ax.set_ylabel('Brightness Temperature 10.7µm (K)')
ax.set_ylim(y_min, y_max)


X = ds_sim_cloudy.eup.values
Y = (ds_cloudy.isel(x=slice(635//2-window, 635//2+window), y=slice(507//2-window, 507//2+window)).BT_2D.mean(['x', 'y']) + 273.15).values
nan_mask = ~np.isnan(X) & ~np.isnan(Y)
X = X[nan_mask]
Y = Y[nan_mask]
bins = np.arange(y_min, y_max, 1)
h = ax_scatter.hist2d(X, Y, label='Simulated Upwelling BT', bins=bins, cmin=1, cmap='viridis')
ax_scatter.set_xlabel('Simulated Upwelling BT (K)')
ax_scatter.set_ylabel('Measured Cloudy BT (K)')
# add a colorbar to the scatter plot
cbar = plt.colorbar(h[3], ax=ax_scatter, label='Counts')

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
rmse = np.sqrt(mean_squared_error(X, Y))
from scipy.stats import linregress
slope, intercept, r_value, p_value, std_err = linregress(X, Y)
r2 = r_value**2

mae = mean_absolute_error(X, Y)
bias = np.mean(X - Y)

stats_text = f'RMSE: {rmse:.2f}\nR²: {r2:.2f}\nMAE: {mae:.2f}\nBias: {bias:.2f} K'
ax_scatter.text(0.6, 0.3, stats_text, transform=ax_scatter.transAxes, fontsize=10, verticalalignment='top')
ax_scatter.set_xlim(y_min, y_max)
ax_scatter.set_ylim(y_min, y_max)
ax_scatter.plot([y_min, y_max], [y_min, y_max], color='black', linestyle='--', label='1:1 Line', alpha=0.5)

ax_scatter.legend(loc='upper left')

ax_hists.hist(ds_sim_cloudy.eup.values, bins=bins, color='blue', alpha=0.5, histtype='stepfilled', lw=2, label='Simulated Upwelling BT')
ax_hists.hist(ds_sim_cloudy.eup.values, bins=bins, color='blue', histtype='step', lw=2)

ax_hists.hist((ds_cloudy.BT_2D.mean(['x', 'y']) + 273.15).values, bins=bins, color='red', alpha=0.5, histtype='stepfilled', lw=2, label='Measured Cloudy BT')
ax_hists.hist((ds_cloudy.BT_2D.mean(['x', 'y']) + 273.15).values, bins=bins, color='red', histtype='step', lw=2)

for a in (ax, ax_scatter, ax_hists):
    a.spines[['top', 'right']].set_visible(False)
    a.grid(True, linestyle='--', alpha=0.5)
    a.legend()
    a.set_title('')