Source code for climada.hazard.forecast

"""
This file is part of CLIMADA.

Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Define Forecast variant of Hazard.
"""

import logging
import pathlib
from typing import Any, Dict, Literal, Optional

import numpy as np
import xarray as xr
from packaging.version import Version
from scipy import sparse

import climada.util.constants as u_const
from climada.hazard.base import Hazard
from climada.hazard.xarray import HazardXarrayReader
from climada.util.checker import size
from climada.util.forecast import ForecastMixin, reduce_unique_selection

LOGGER = logging.getLogger(__name__)

XARRAY_TIMEDELTA_BUG_BEGIN = Version("2025.04.0")
XARRAY_TIMEDELTA_BUG_END = Version("2025.07.0")



[docs]
def xarray_has_timedelta_bug() -> bool:
    """Return True if xarray contains the timedelta bug

    See https://docs.xarray.dev/en/stable/whats-new.html#id80
    """
    return (Version(xr.__version__) >= XARRAY_TIMEDELTA_BUG_BEGIN) and (
        Version(xr.__version__) < XARRAY_TIMEDELTA_BUG_END
    )




[docs]
class HazardForecast(ForecastMixin, Hazard):
    """A hazard object with forecast information"""


[docs]
    def __init__(
        self,
        lead_time: np.ndarray | None = None,
        member: np.ndarray | None = None,
        **hazard_kwargs,
    ):
        """
        Initialize a HazardForecast object.

        Parameters
        ----------
        lead_time : np.ndarray of np.timedelta64 or None, optional
            Forecast lead times. Default is empty array.
        member : np.ndarray or None, optional
            Ensemble member identifiers as integers. Default is empty array.
        **hazard_kwargs
            keyword arguments to pass to :py:class:`~climada.hazard.base.Hazard` See
            py:meth:`~climada.hazard.base.Hazard.__init__` for details.
        """
        super().__init__(lead_time=lead_time, member=member, **hazard_kwargs)
        self._check_sizes()



[docs]
    @classmethod
    def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray):
        """
        Create a HazardForecast object from a Hazard object.

        Parameters
        ----------
        hazard : Hazard
            Hazard object to convert into a HazardForecast.
        lead_time : np.ndarray of np.timedelta64 or None, optional
            Forecast lead times. Default is empty array.
        member : np.ndarray or None, optional
            Ensemble member identifiers as integers. Default is empty array.

        Returns
        -------
        HazardForecast
            A HazardForecast object with the same attributes as the input hazard,
            but with lead_time and member attributes set from instance of HazardForecast.
        """
        return cls(
            lead_time=lead_time,
            member=member,
            haz_type=hazard.haz_type,
            pool=hazard.pool,
            units=hazard.units,
            centroids=hazard.centroids,
            event_id=hazard.event_id,
            frequency=hazard.frequency,
            frequency_unit=hazard.frequency_unit,
            orig=hazard.orig,
            event_name=hazard.event_name,
            date=hazard.date,
            intensity=hazard.intensity,
            fraction=hazard.fraction,
        )


    def _check_sizes(self):
        """Check sizes of forecast data vs. hazard data.

        Raises
        ------
        ValueError
            If the sizes of the forecast data do not match the
            :py:attr:`~climada.hazard.base.Hazard.event_id`
        """
        num_entries = len(self.event_id)
        size(exp_len=num_entries, var=self.member, var_name="Forecast.member")
        size(exp_len=num_entries, var=self.lead_time, var_name="Forecast.lead_time")


[docs]
    def min(self, dim: Literal["member", "lead_time"] | None = None):
        """Reduce the intensity and fraction to the minimum value.

        Parameters
        ----------
        dim : "member", "lead_time", or None
            Dimension to reduce over. If None, reduce over all data.

        Returns
        -------
        HazardForecast
            A HazardForecast object with the min intensity and fraction.

        Warning
        -------
        This reduces :py:attr:`~climada.hazard.base.Hazard.intensity`
        and :py:attr:`~climada.hazard.base.Hazard.fraction` independently, possibly
        leading to an inconsistent result: Reduced intensity and fraction values of a
        single centroid might then originate from different events!
        """
        if dim is not None:
            rdim = self._reduce_iter_dim(dim)
            return reduce_unique_selection(self, attr=rdim, reduce_method="min")

        red_intensity = self.intensity.min(axis=0).tocsr()
        red_fraction = self.fraction.min(axis=0).tocsr()
        return HazardForecast(
            haz_type=self.haz_type,
            pool=self.pool,
            units=self.units,
            centroids=self.centroids,
            frequency_unit=self.frequency_unit,
            intensity=red_intensity,
            fraction=red_fraction,
            **self._reduce_attrs("min"),
        )



[docs]
    def max(self, dim: Literal["member", "lead_time"] | None = None):
        """Reduce the intensity and fraction to the maximum value.

        Parameters
        ----------
        dim : "member", "lead_time", or None
            Dimension to reduce over. If None, reduce over all data.

        Returns
        -------
        HazardForecast
            A HazardForecast object with the max intensity and fraction.

        Warning
        -------
        This reduces :py:attr:`~climada.hazard.base.Hazard.intensity`
        and :py:attr:`~climada.hazard.base.Hazard.fraction` independently, possibly
        leading to an inconsistent result: Reduced intensity and fraction values of a
        single centroid might then originate from different events!
        """
        if dim is not None:
            rdim = self._reduce_iter_dim(dim)
            return reduce_unique_selection(self, attr=rdim, reduce_method="max")

        red_intensity = self.intensity.max(axis=0).tocsr()
        red_fraction = self.fraction.max(axis=0).tocsr()
        return HazardForecast(
            haz_type=self.haz_type,
            pool=self.pool,
            units=self.units,
            centroids=self.centroids,
            frequency_unit=self.frequency_unit,
            intensity=red_intensity,
            fraction=red_fraction,
            **self._reduce_attrs("max"),
        )



[docs]
    def mean(self, dim: Literal["member", "lead_time"] | None = None):
        """Reduce the intensity and fraction to the mean value.

        Parameters
        ----------
        dim : "member", "lead_time", or None
            Dimension to reduce over. If None, reduce over all data.

        Returns
        -------
        HazardForecast
            A HazardForecast object with the mean intensity and fraction.

        Warning
        -------
        This reduces :py:attr:`~climada.hazard.base.Hazard.intensity`
        and :py:attr:`~climada.hazard.base.Hazard.fraction` independently, possibly
        leading to an inconsistent result: Reduced intensity and fraction values of a
        single centroid might then originate from different events!
        """
        if dim is not None:
            rdim = self._reduce_iter_dim(dim)
            return reduce_unique_selection(self, attr=rdim, reduce_method="mean")

        red_intensity = sparse.csr_matrix(self.intensity.mean(axis=0))
        red_fraction = sparse.csr_matrix(self.fraction.mean(axis=0))
        return HazardForecast(
            haz_type=self.haz_type,
            pool=self.pool,
            units=self.units,
            centroids=self.centroids,
            frequency_unit=self.frequency_unit,
            intensity=red_intensity,
            fraction=red_fraction,
            **self._reduce_attrs("mean"),
        )


    # TODO: Do not densify the entire matrix but compute quantiles column-wise!
    def _quantile(
        self,
        q: float,
        dim: Literal["member", "lead_time"] | None = None,
        event_name: str | None = None,
    ):
        """Reduce intensity and fraction to the quantile value."""
        if dim is not None:
            rdim = self._reduce_iter_dim(dim)
            return reduce_unique_selection(
                self,
                attr=rdim,
                reduce_method="quantile",
                q=q,
            )

        red_intensity = sparse.csr_matrix(
            np.quantile(self.intensity.toarray(), q, axis=0)
        )
        red_fraction = sparse.csr_matrix(
            np.quantile(self.fraction.toarray(), q, axis=0)
        )
        if event_name is None:
            event_name = f"quantile_{q}"
        return HazardForecast(
            haz_type=self.haz_type,
            pool=self.pool,
            units=self.units,
            centroids=self.centroids,
            frequency_unit=self.frequency_unit,
            intensity=red_intensity,
            fraction=red_fraction,
            **self._reduce_attrs(event_name),
        )


[docs]
    def quantile(self, q: float, dim: Literal["member", "lead_time"] | None = None):
        """Reduce intensity and fraction to the quantile value.

        The quantile value is computed by taking the quantile of the impact matrix
        along the event dimension axis (axis=0) and then taking the quantile of the
        resulting array.

        Parameters
        ----------
        q : float
            The quantile to compute, between 0 and 1.
        dim : "member", "lead_time", or None
            The dimension to reduce when computing the quantile. If ``None`` (default),
            this computes the centroid-wise quantile over the entire matrix.

        Returns
        -------
        HazardForecast
            A HazardForecast object with the quantile intensity and fraction.

        Warning
        -------
        This reduces :py:attr:`~climada.hazard.base.Hazard.intensity`
        and :py:attr:`~climada.hazard.base.Hazard.fraction` independently, possibly
        leading to an inconsistent result: Reduced intensity and fraction values of a
        single centroid might then originate from different events!
        """
        return self._quantile(q=q, dim=dim)



[docs]
    def median(self, dim: Literal["member", "lead_time"] | None = None):
        """Reduce the intensity and fraction to the median value.

        The median value is computed by taking the median of the impact matrix along the
        event dimension axis (axis=0) and then taking the median of the resulting array.

        Parameters
        ----------
        dim : "member", "lead_time", or None
            The dimension to reduce when computing the median. If ``None`` (default),
            this computes the centroid-wise median over the entire matrix.

        Returns
        -------
        HazardForecast
            A HazardForecast object with the median intensity and fraction.

        Warning
        -------
        This reduces :py:attr:`~climada.hazard.base.Hazard.intensity`
        and :py:attr:`~climada.hazard.base.Hazard.fraction` independently, possibly
        leading to an inconsistent result: Reduced intensity and fraction values of a
        single centroid might then originate from different events!
        """
        return self._quantile(q=0.5, event_name="median", dim=dim)



[docs]
    @classmethod
    def concat(cls, haz_list: list):
        """Concatenate multiple HazardForecast instances and return a new object"""
        if len(haz_list) == 0:
            return cls()
        hazard = Hazard.concat(haz_list)
        lead_time = np.concatenate(tuple(haz.lead_time for haz in haz_list))
        member = np.concatenate(tuple(haz.member for haz in haz_list))
        return cls.from_hazard(hazard, lead_time=lead_time, member=member)



[docs]
    def select(
        self,
        event_names=None,
        event_id=None,
        date=None,
        orig=None,
        reg_id=None,
        extent=None,
        reset_frequency=False,
        member=None,
        lead_time=None,
    ):
        """Select entries based on the parameters and return a new instance.

        The selection will contain the intersection of all given parameters.

        Parameters
        ----------
        member : Sequence of ints
            Ensemble members to select
        lead_time : Sequence of numpy.timedelta64
            Lead times to select

        See Also
        --------
        :py:meth:`~climada.hazard.base.Hazard.select`
        """
        if member is not None or lead_time is not None:
            mask_member = (
                self.idx_member(member)
                if member is not None
                else np.full_like(self.member, True, dtype=bool)
            )
            mask_lead_time = (
                self.idx_lead_time(lead_time)
                if lead_time is not None
                else np.full_like(self.lead_time, True, dtype=bool)
            )
            event_id_from_forecast_mask = np.asarray(self.event_id)[
                (mask_member & mask_lead_time)
            ]
            event_id = (
                np.intersect1d(event_id, event_id_from_forecast_mask)
                if event_id is not None
                else event_id_from_forecast_mask
            )
            if event_id.size == 0:
                raise ValueError(
                    "Empty selection for 'member', 'lead_time', and/or 'event_id'"
                )

        return super().select(
            event_names=event_names,
            event_id=event_id,
            date=date,
            orig=orig,
            reg_id=reg_id,
            extent=extent,
            reset_frequency=reset_frequency,
        )



[docs]
    @classmethod
    def from_xarray_raster(
        cls,
        data: xr.Dataset | pathlib.Path | str,
        hazard_type: str,
        intensity_unit: str,
        *,
        intensity: Optional[str] = None,
        coordinate_vars: Optional[Dict[str, str]] = None,
        data_vars: Optional[Dict[str, str]] = None,
        crs: str = u_const.DEF_CRS,
        rechunk: bool = False,
        open_dataset_kws: dict[str, Any] | None = None,
    ):
        """Read forecast hazard data from an xarray Dataset

        This extends the parent
        :py:meth:`~climada.hazard.io.HazardIO.from_xarray_raster` to handle forecast
        dimensions (lead_time and member). For forecast data, the "event" dimension is
        constructed from the Cartesian product of lead_time and member dimensions, so
        you don't need to specify an "event" coordinate.

        Parameters
        ----------
        data : xarray.Dataset or Path or str
            The filepath to read the data from or the already opened dataset
        hazard_type : str
            The type identifier of the hazard
        intensity_unit : str
            The physical units of the intensity
        intensity : str, optional
            Identifier of the DataArray containing the hazard intensity data
        coordinate_vars : dict(str, str), optional
            Mapping from default coordinate names to coordinate names in the data.
            For HazardForecast, should include:
            - ``"lead_time"``: name of the lead time coordinate (required)
            - ``"member"``: name of the ensemble member coordinate (required)
            - ``"longitude"``: name of longitude coordinate (default: "longitude")
            - ``"latitude"``: name of latitude coordinate (default: "latitude")

            Note: The "event" coordinate is automatically constructed from lead_time
            and member, so it should not be specified.
        data_vars : dict(str, str), optional
            Mapping from default variable names to variable names used in the data
            to read. See :py:meth:`~climada.hazard.io.HazardIO.from_xarray_raster` for
            details.
        crs : str, optional
            Coordinate reference system identifier. Defaults to "EPSG:4326".
        rechunk : bool, optional
            Rechunk the dataset before flattening. Defaults to ``False``. See
            :py:meth:`~climada.hazard.io.HazardIO.from_xarray_raster` for details.
        open_dataset_kws : dict, optional
            Keyword arguments passed to xarray.open_dataset if data is a file path

        Returns
        -------
        HazardForecast
            A forecast hazard object with lead_time and member attributes populated

        See Also
        --------
        :py:meth:`climada.hazard.io.HazardIO.from_xarray_raster`
            Parent method documentation for standard hazard loading
        """
        if xarray_has_timedelta_bug():
            LOGGER.warning(
                "xarray version %s contains a bug that prevents proper timedelta "
                "parsing. Consider updating to version %s or later, or downgrading to "
                "before version %s",
                xr.__version__,
                XARRAY_TIMEDELTA_BUG_END,
                XARRAY_TIMEDELTA_BUG_BEGIN,
            )

        # Open dataset if needed
        if isinstance(data, (pathlib.Path, str)):
            open_dataset_kws = open_dataset_kws or {}
            open_dataset_kws = {"chunks": "auto"} | open_dataset_kws
            dset = xr.open_dataset(data, **open_dataset_kws)
        else:
            dset = data

        if intensity is None:
            data_var_names = list(dset.data_vars.keys())
            if len(data_var_names) == 0:
                raise ValueError("Dataset has no data variables")
            intensity = str(data_var_names[0])
            LOGGER.info(
                "No intensity variable specified. Assuming intensity variable is '%s'",
                intensity,
            )

        # Extract forecast coordinates
        coordinate_vars = coordinate_vars or {}
        for key in ["lead_time", "member"]:
            if key not in coordinate_vars:
                raise ValueError(
                    f"coordinate_vars must include '{key}' key. "
                    f"Available coordinates: {list(dset.coords.keys())}"
                )
        leadtime_var = coordinate_vars["lead_time"]
        member_var = coordinate_vars["member"]

        dset = dset.assign_coords(
            event=(
                (leadtime_var, member_var),
                np.zeros((len(dset[leadtime_var]), len(dset[member_var]))),
            )
        )

        dset_squeezed = dset.squeeze()

        # Prepare coordinate_vars for parent call
        parent_coord_vars = {
            k: v for k, v in coordinate_vars.items() if k not in ["member", "lead_time"]
        }
        parent_coord_vars["event"] = "event"

        reader = HazardXarrayReader(
            data=dset_squeezed,
            coordinate_vars=parent_coord_vars,
            intensity=intensity,
            crs=crs,
            data_vars=data_vars,
            rechunk=rechunk,
        )

        kwargs = reader.get_hazard_kwargs() | {
            "haz_type": hazard_type,
            "units": intensity_unit,
            "lead_time": reader.data_stacked[leadtime_var].to_numpy(),
            "member": reader.data_stacked[member_var].to_numpy(),
        }

        # Generate from lead_time/member
        kwargs["event_name"] = [
            f"lt_{lt / np.timedelta64(1, 'h'):.0f}h_m_{m}"
            for lt, m in zip(kwargs["lead_time"], kwargs["member"])
        ]
        kwargs["date"] = np.zeros_like(kwargs["date"], dtype=int)

        # Convert to HazardForecast with forecast attributes
        return cls(**Hazard._check_and_cast_attrs(kwargs))