Advanced: Realistic Clouds from Observations (Thermal)#

In this notebook, we’ll construct realistic cloud profiles from lidar and radar observations during the HALO-(AC)³ campaign, and use them for thermal radiative transfer simulations. This demonstrates how to work with observational data to create physically consistent cloud input files.

Note

Campaign data required. This notebook uses data from the HALO-(AC)³ field experiment that is not publicly available. You can adapt the workflow to your own observational data by replacing the file paths.

# Import PyRadtran components
from pyradtran.config import PathsConfig, SimulationDefaults, SimulationConfig, ExecutionConfig, OutputConfig, load_config
from pyradtran.core import Simulation, generate_input_content
from pyradtran.io import parse_uvspec_output
import pyradtran
from dataclasses import asdict
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from pathlib import Path
import os
import yaml
import pandas as pd
import logging

# Configure logging for pyradtran
logging.getLogger('pyradtran').setLevel(logging.CRITICAL)

# Build configuration programmatically from master config
cfg = load_config()
cfg.simulation_defaults.rte_solver = "disort"
cfg.simulation_defaults.mol_abs_param = "reptran medium"
cfg.simulation_defaults.source = "thermal"
cfg.simulation_defaults.wavelength_nm = [10350, 11130]
cfg.simulation_defaults.integrate_wavelength = True
cfg.simulation_defaults.h2o_source = "radiosonde"
cfg.simulation_defaults.albedo_value = 0.3
cfg.simulation_defaults.surface_temperature_k = 273.15
cfg.simulation_defaults.ozone_du = 300.0
cfg.simulation_defaults.output_altitudes_km = [0]
cfg.simulation_defaults.output_columns = ["zout", "lambda", "eup", "edn", "edir", "eglo", "enet"]
cfg.execution.max_workers = 16
cfg.execution.cleanup_temp_files = False
cfg.execution.timeout_seconds = 60
cfg.output.filename_prefix = "radiosonde_thermal"
config_path = Path("config/radiosonde_thermal.yaml")
cfg.to_yaml(config_path)

Load Observation Data#

Load VELOX thermal infrared imagery from the HALO-(AC)³ flight RF13 on 2022-04-04.

ds = xr.open_dataset('/projekt_agmwend/data/HALO-AC3/02_Flights/HALO-AC3_20220404_HALO_RF13/VELOX/VELOX_327kveL/Processed/to_publish/final/HALO-AC3_HALO_VELOX_BT3_10740nm_20220404_RF13_v3.0.nc').sel(time=slice('2022-04-04T13:41:00', '2022-04-04T14:00:00')).load()

ds.BT_Center.plot(figsize=(20, 3))
# Static plot for a representative time step
time_idx = 240

fig, ax = plt.subplots(1, 2, figsize=(7, 3))
bins = np.linspace(-25, 0, 100)
ds.BT_2D.isel(time=time_idx).plot(cmap='viridis', vmin=-25, vmax=0, ax=ax[1], cbar_kwargs={'label': 'Brightness Temperature (K)'})
ds.BT_2D.isel(time=time_idx).plot.hist(ax=ax[0], bins=bins, color='gray', alpha=0.7, histtype='step', lw=2)
ax[0].set_xlabel('Brightness Temperature (°C)')
ax[0].set_ylabel('Count')
plt.tight_layout()
plt.show()

Prepare Dataset for Simulation#

Rename coordinates and set up the altitude grid for the radiative transfer simulation.

ds_sel = ds.rename(
    {'lat' : 'latitude', 'lon': 'longitude', 'alt': 'altitude'}
)
ds_sel['altitude_halo'] = ds_sel['altitude'] / 1000 
ds_sel = ds_sel.drop_vars('altitude')
ds_sel['altitude'] = ('altitude', np.arange(0, 15, 0.1))
#s_sel['altitude'] = ds_sel['altitude'] / 1000  # Convert altitude to km

Clear-Sky Thermal Simulation#

Run the broadband thermal radiative transfer simulation for clear-sky conditions along the flight track.

# Run a clear-sky thermal simulation for the dataset
print("Running batch thermal simulation (clear-sky)...")
config_path = Path('config/radiosonde_thermal.yaml')

ds_sim_cloud_free = ds_sel.pyradtran.run(
    config_path=config_path,
    return_dataset=True,
    save_to_file=True,
    parameter_overrides={
        'output_quantity' : 'brightness',
    },
    show_progress=False,
)

print("\nSimulation complete!")
ds_sim_cloud_free

Simulation Results#

Spectral radiative transfer calculations completed using DISORT solver with multi-level output altitude grid.

Construct Cloud Profiles from Lidar and Radar#

Use lidar backscatter and radar reflectivity observations to derive cloud top height and layer thickness, then generate parametric cloud input files for libRadtran.

# Generate cloud layers using the utility functions
from pyradtran.clouds import CloudLayer, CloudGenerator, CloudFileWriter

# Create simple parametric clouds
simple_clouds = CloudGenerator.from_simple_parameters(
    z_base_km=2.0,
    z_top_km=4.0,
    lwc_g_m3=0.1,
    r_eff_um=10.0,
    n_layers=5
)

# Write custom cloud file
custom_wc_file = Path('work/custom_water_cloud.dat')
CloudFileWriter.write_water_cloud_file(
    cloud_layers=simple_clouds,
    output_path=custom_wc_file,
    altitude_resolution_km=0.1
)
lidar = xr.open_dataset('/projekt_agmwend/home_rad/Joshua/HALO-AC3_unified_data/unified_bsrgl.nc').sel(time=ds.time).load()
lidar = lidar.where(lidar.backscatter_ratio > 100)
radar = xr.open_dataset('/projekt_agmwend/home_rad/Joshua/HALO-AC3_unified_data/unified_radar.nc').sel(time=ds.time).load()

fig, (ax, ax2) = plt.subplots(1, 2, figsize=(12, 3), gridspec_kw={'width_ratios': [1, .2]})

im_lidar = lidar.backscatter_ratio.where(lidar.backscatter_ratio > 100).plot(y='altitude', robust=True, add_colorbar=False, ax=ax)
im_radar = radar.dBZg.plot(y='height', robust=True, cmap='gray_r', alpha=.7, add_colorbar=False, ax=ax)
ax.set_ylim(0, 2000)

def cth_and_layer_thickness(ds, ds_lidar=lidar):
    """ Calculate cloud top height and layer thickness from lidar data.
    """
    # The lidar data is already filtered for backscatter_ratio > 10
    # We can find the cloud top and base by finding the max and min altitude where the data is not NaN
    
    # Ensure the lidar data is aligned with the ds time, using nearest neighbor interpolation
    ds_lidar_sel = ds_lidar.sel(time=ds.time, method='nearest')
    
    # Find cloud top height (highest altitude with significant backscatter)
    cth = ds_lidar_sel.altitude.where(ds_lidar_sel.backscatter_ratio.notnull()).max(dim='altitude')
    
    # Find cloud base height (lowest altitude with significant backscatter)
    cbh = ds_lidar_sel.altitude.where(ds_lidar_sel.backscatter_ratio.notnull()).min(dim='altitude')

    # if cth is smaller than 150 m, set it to NaN
    cth = cth.where(cth > 150, np.nan)
    cbh = cbh.where(cbh > 150, np.nan)

    
    # Calculate thickness
    thickness = cth - cbh
    # 1d cloudmask
    cloudmask = xr.where((ds_lidar_sel.backscatter_ratio > 100) & (ds_lidar_sel.altitude > 150), 1, 0)

    out_ds = xr.Dataset({
        'cth': cth,
        'thickness': thickness,
        'cloudmask': cloudmask.mean(dim='altitude')  # Mean cloud mask over altitude
    }, coords={
        'time': ds.time,
    })
    return out_ds

cth_ds = cth_and_layer_thickness(ds)
cth_ds = cth_ds.dropna('time')  # Drop times with NaN values
cth = cth_ds.cth
thickness = cth_ds.thickness
cloudmask = cth_ds.cloudmask

cth_interp = cth.rolling(time=5, center=True).mean().interpolate_na('time')
thickness_interp = thickness.rolling(time=5, center=True).mean().interpolate_na('time')
### assign thickness < 100 to 100
thickness_interp = thickness_interp.where(thickness_interp > 100, 100)
thickness_interp = thickness_interp.sel(time=cloudmask > 0)
cth_interp = cth_interp.sel(time=cloudmask > 0)
cth_interp = cth_interp.fillna(500)  # Fill NaNs with 0 for plotting

# print nans
print(f'{cth_interp.isnull().sum().values} NaNs in CTH; {thickness_interp.isnull().sum().values} NaNs in thickness')


cth_interp.plot(ax=ax, label='Cloud Top Height', color='blue')
(cth_interp - thickness_interp).plot(ax=ax, label='Cloud Base Height', color='orange')

lidar_bins = np.linspace(np.nanquantile(lidar.backscatter_ratio, 0.01), np.nanquantile(lidar.backscatter_ratio, 0.99), 50)

#lidar.backscatter_ratio.plot.hist(bins=lidar_bins, color='gray', alpha=0.7, histtype='step', lw=2)
# plot the cloud mask

_= ax2.hist(cth, bins=25)
ds_filtered = ds.sel(time=cth_ds.time)
ds_filtered

Cloudy-Sky Simulation#

Loop over each cloudy timestep, generate a cloud file from the observed cloud geometry, and run a thermal radiative transfer simulation.

# Run cloudy-sky thermal simulations for each filtered timestep
print("Running batch thermal simulation (cloudy)...")
config_path = Path('config/radiosonde_thermal.yaml')
logging.getLogger('pyradtran').setLevel(logging.ERROR)
from tqdm import tqdm

ds_sim_cloudy_list = []

for i in tqdm(range(len(ds_filtered.time))):
    # Select the current time slice
    ds_to_sim = ds_filtered.isel(time=[i]).copy().rename(
        {'lat': 'latitude', 'lon': 'longitude'}
    )
    alt = ds_to_sim.alt / 1e3  # Use mean altitude for the simulation
    ds_to_sim['altitude'] = ('altitude', alt.values)
    cth_i = cth_interp.isel(time=i).values / 1000  # Convert to km
    thickness_i = thickness_interp.isel(time=i).values / 1000  # Convert to km

    # Create simple parametric clouds
    simple_clouds = CloudGenerator.from_simple_parameters(
        z_base_km=cth_i - thickness_i,
        z_top_km=cth_i,
        lwc_g_m3=0.1,
        r_eff_um=10.0,
        n_layers=2
    )

    # Write custom cloud file
    custom_wc_file = Path('work/custom_water_cloud.dat')
    CloudFileWriter.write_water_cloud_file(
        cloud_layers=simple_clouds,
        output_path=custom_wc_file,
        altitude_resolution_km=0.01
    )

    ds_sim_cloudy = ds_to_sim.pyradtran.run(
        config_path=config_path,
        return_dataset=True,
        save_to_file=False,
        parameter_overrides={
            'output_quantity' : 'brightness',
            'wc_file 1D' : 'custom_water_cloud.dat',
        },
        show_progress=False,
    )

    ds_sim_cloudy['cloud_top_height'] = cth_i
    ds_sim_cloudy['cloud_thickness'] = thickness_i

    ds_sim_cloudy_list.append(ds_sim_cloudy)

print("\nSimulation complete!")
ds_sim_cloudy = xr.concat(ds_sim_cloudy_list, dim='time')
ds_sim_cloudy

Determine Cloud Presence Threshold#

Use the median brightness temperature to separate cloudy from clear-sky scenes in the VELOX imagery.

# Determine brightness temperature threshold for cloud presence
ds.BT_2D.plot.hist(bins=100)

from scipy.signal import find_peaks

# Compute the median brightness temperature across all pixels and time steps
median = float(ds.BT_2D.quantile(0.5, ['x', 'y', 'time']).values)
plt.axvline(median, color='red', linestyle='--', label='Median BT')
print(f"Median brightness temperature: {median:.2f} K")

# Use the median as a threshold to determine cloud presence
ds_cloudy = ds_filtered.where(ds_filtered.BT_2D < median)

Validation: Simulated vs. Measured Brightness Temperature#

Compare the simulated upwelling brightness temperatures against VELOX measurements to assess the quality of the cloud parameterization.

fig, (ax_hists, ax, ax_scatter) = plt.subplots(1, 3, figsize=(18, 3), gridspec_kw={'width_ratios': [.3, 1, .3]})
window = 50  # Define a window size for the scatter plot
y_max = 270
y_min = 250

ds_sim_cloudy.eup.plot(marker='o', color='blue', label='Simulated Upwelling BT', ax=ax)
(ds_cloudy.isel(x=slice(635//2-window, 635//2+window), y=slice(507//2-window, 507//2+window)).BT_2D.mean(['x', 'y']) + 273.15).plot(label='Measured Cloudy BT', color='red', linestyle='--', marker='x', ax=ax)
ax.set_xlabel('Time (UTC)')
ax.set_ylabel('Brightness Temperature 10.7µm (K)')
ax.set_ylim(y_min, y_max)


X = ds_sim_cloudy.eup.values
Y = (ds_cloudy.isel(x=slice(635//2-window, 635//2+window), y=slice(507//2-window, 507//2+window)).BT_2D.mean(['x', 'y']) + 273.15).values
nan_mask = ~np.isnan(X) & ~np.isnan(Y)
X = X[nan_mask]
Y = Y[nan_mask]
bins = np.arange(y_min, y_max, 1)
h = ax_scatter.hist2d(X, Y, label='Simulated Upwelling BT', bins=bins, cmin=1, cmap='viridis')
ax_scatter.set_xlabel('Simulated Upwelling BT (K)')
ax_scatter.set_ylabel('Measured Cloudy BT (K)')
# add a colorbar to the scatter plot
cbar = plt.colorbar(h[3], ax=ax_scatter, label='Counts')

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
rmse = np.sqrt(mean_squared_error(X, Y))
from scipy.stats import linregress
slope, intercept, r_value, p_value, std_err = linregress(X, Y)
r2 = r_value**2

mae = mean_absolute_error(X, Y)
bias = np.mean(X - Y)

stats_text = f'RMSE: {rmse:.2f}\nR²: {r2:.2f}\nMAE: {mae:.2f}\nBias: {bias:.2f} K'
ax_scatter.text(0.6, 0.3, stats_text, transform=ax_scatter.transAxes, fontsize=10, verticalalignment='top')
ax_scatter.set_xlim(y_min, y_max)
ax_scatter.set_ylim(y_min, y_max)
ax_scatter.plot([y_min, y_max], [y_min, y_max], color='black', linestyle='--', label='1:1 Line', alpha=0.5)

ax_scatter.legend(loc='upper left')

ax_hists.hist(ds_sim_cloudy.eup.values, bins=bins, color='blue', alpha=0.5, histtype='stepfilled', lw=2, label='Simulated Upwelling BT')
ax_hists.hist(ds_sim_cloudy.eup.values, bins=bins, color='blue', histtype='step', lw=2)

ax_hists.hist((ds_cloudy.BT_2D.mean(['x', 'y']) + 273.15).values, bins=bins, color='red', alpha=0.5, histtype='stepfilled', lw=2, label='Measured Cloudy BT')
ax_hists.hist((ds_cloudy.BT_2D.mean(['x', 'y']) + 273.15).values, bins=bins, color='red', histtype='step', lw=2)

for a in (ax, ax_scatter, ax_hists):
    a.spines[['top', 'right']].set_visible(False)
    a.grid(True, linestyle='--', alpha=0.5)
    a.legend()
    a.set_title('')