Source code for climada.engine.uncertainty.unc_impact

"""
This file is part of CLIMADA.

Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Define Uncertainty Impact class
"""

__all__ = ['UncImpact']

import logging
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from climada.engine import Impact
from climada.engine.uncertainty.base import Uncertainty, UncVar
from climada.util import log_level
from climada.util import plot as u_plot

LOGGER = logging.getLogger(__name__)


[docs]class UncImpact(Uncertainty):
    """
    Impact uncertainty analysis class

    This is the base class to perform uncertainty analysis on the outputs of a
    climada.engine.impact.Impact() object.

    Attributes
    ----------
    rp : list(int)
        List of the chosen return periods.
    calc_eai_exp : bool
        Compute eai_exp or not
    calc_at_event : bool
        Compute eai_exp or not
    unc_vars : dict(UncVar)
        Dictonnary of the required uncertainty variables ['exp',
        'impf', 'haz'] and values are the corresponding UncVar.
    samples_df : pandas.DataFrame
        Values of the sampled uncertainty parameters. It has n_samples rows
        and one column per uncertainty parameter.
    sampling_method : str
        Name of the sampling method from SAlib.
        https://salib.readthedocs.io/en/latest/api.html#
    n_samples : int
        Effective number of samples (number of rows of samples_df)
    param_labels : list
        Name of all the uncertainty parameters
    distr_dict : dict
        Comon flattened dictionary of all the distr_dic list in unc_vars.
        It represents the distribution of all the uncertainty parameters.
    problem_sa : dict
        The description of the uncertainty variables and their
        distribution as used in SALib.
        https://salib.readthedocs.io/en/latest/basics.html
    metrics : dict
        Dictionnary of the value of the CLIMADA metrics for each sample
        (of the uncertainty parameters) defined in samples_df.
        Keys are metrics names ['aai_agg'', 'freq_curve', 'eai_exp',
        'at_event'] and falues are pd.DataFrame of dict(pd.DataFrame),
        with one row for one sample.
    sensitivity: dict
        Sensitivity indices for each metric.
        Keys are metrics names ['aai_agg'', 'freq_curve', 'eai_exp',
        'at_event'] and values are the sensitivity indices dictionary
        as returned by SALib.

    """

[docs]    def __init__(self, exp_unc, impf_unc, haz_unc):
        """Initialize UncImpact

        Parameters
        ----------
        exp_unc : climada.engine.uncertainty.UncVar or climada.entity.Exposure
            Exposure uncertainty variable or Exposure
        impf_unc : climada.engine.uncertainty.UncVar or climada.entity.ImpactFuncSet
            Impactfunction uncertainty variable or Impact function
        haz_unc : climada.engine.uncertainty.UncVar or climada.hazard.Hazard
            Hazard uncertainty variable or Hazard

        """

        unc_vars = {'exp': UncVar.var_to_uncvar(exp_unc),
                    'impf': UncVar.var_to_uncvar(impf_unc),
                    'haz': UncVar.var_to_uncvar(haz_unc),
                    }

        metrics = {'aai_agg': pd.DataFrame([]),
                   'freq_curve': pd.DataFrame([]),
                   'eai_exp': pd.DataFrame([]),
                   'at_event':  pd.DataFrame([])
                   }

        Uncertainty.__init__(self, unc_vars=unc_vars, metrics=metrics)


[docs]    def calc_distribution(self,
                            rp=None,
                            calc_eai_exp=False,
                            calc_at_event=False,
                            pool=None
                            ):
        """
        Computes the impact for each of the parameters set defined in
        uncertainty.samples.

        By default, the aggregated average annual impact
        (impact.aai_agg) and the excees impact at return periods rp
        (imppact.calc_freq_curve(self.rp).impact) is computed.
        Optionally, eai_exp and at_event is computed (this may require
        a larger amount of memory if n_samples and/or the number of centroids
        is large).

        This sets the attributes self.rp, self.calc_eai_exp,
        self.calc_at_event, self.metrics.

        Parameters
        ----------
        rp : list(int), optional
            Return periods in years to be computed.
            The default is [5, 10, 20, 50, 100, 250].
        calc_eai_exp : boolean, optional
            Toggle computation of the impact at each centroid location.
            The default is False.
        calc_at_event : boolean, optional
            Toggle computation of the impact for each event.
            The default is False.
        pool : pathos.pools.ProcessPool, optional
            Pool of CPUs for parralel computations. Default is None.
            The default is None.

        Raises
        ------
        ValueError:
            If no sampling parameters defined, the distribution cannot
            be computed.

        """

        if self.samples_df.empty:
            raise ValueError("No sample was found. Please create one first"
                             "using UncImpact.make_sample(N)")

        if rp is None:
            rp=[5, 10, 20, 50, 100, 250]

        self.rp = rp
        self.calc_eai_exp = calc_eai_exp
        self.calc_at_event = calc_at_event

        start = time.time()
        one_sample = self.samples_df.iloc[0:1].iterrows()
        imp_metrics = map(self._map_impact_calc, one_sample)
        [aai_agg_list, freq_curve_list,
         eai_exp_list, at_event_list, tot_value_list] = list(zip(*imp_metrics))
        elapsed_time = (time.time() - start)
        est_com_time = self.est_comp_time(elapsed_time, pool)
        LOGGER.info("\n\nEstimated computation time: %.2f s\n", est_com_time)

        #Compute impact distributions
        with log_level(level='ERROR', name_prefix='climada'):
            if pool:
                LOGGER.info('Using %s CPUs.', pool.ncpus)
                chunksize = min(self.n_samples // pool.ncpus, 100)
                imp_metrics = pool.map(self._map_impact_calc,
                                               self.samples_df.iterrows(),
                                               chunsize = chunksize)

            else:
                imp_metrics = map(self._map_impact_calc,
                                  self.samples_df.iterrows())

        #Perform the actual computation
        with log_level(level='ERROR', name_prefix='climada'):
            [aai_agg_list, freq_curve_list,
             eai_exp_list, at_event_list,
             tot_value_list] = list(zip(*imp_metrics))


        # Assign computed impact distribution data to self
        self.metrics['aai_agg']  = pd.DataFrame(aai_agg_list,
                                                columns = ['aai_agg'])

        self.metrics['freq_curve'] = pd.DataFrame(freq_curve_list,
                                    columns=['rp' + str(n) for n in rp])
        self.metrics['eai_exp'] =  pd.DataFrame(eai_exp_list)
        self.metrics['at_event'] = pd.DataFrame(at_event_list)
        self.metrics['tot_value'] = pd.DataFrame(tot_value_list,
                                                 columns = ['tot_value'])
        self.check()


    def _map_impact_calc(self, sample_iterrows):
        """
        Map to compute impact for all parameter samples in parrallel

        Parameters
        ----------
        sample_iterrows : pd.DataFrame.iterrows()
            Generator of the parameter samples

        Returns
        -------
         : list
            impact metrics list for all samples containing aai_agg, rp_curve,
            eai_exp (np.array([]) if self.calc_eai_exp=False) and at_event
            (np.array([]) if self.calc_at_event=False).

        """

        # [1] only the rows of the dataframe passed by pd.DataFrame.iterrows()
        exp_samples = sample_iterrows[1][self.unc_vars['exp'].labels].to_dict()
        haz_samples = sample_iterrows[1][self.unc_vars['haz'].labels].to_dict()
        impf_samples = sample_iterrows[1][self.unc_vars['impf'].labels].to_dict()

        exp = self.unc_vars['exp'].uncvar_func(**exp_samples)
        haz = self.unc_vars['haz'].uncvar_func(**haz_samples)
        impf = self.unc_vars['impf'].uncvar_func(**impf_samples)

        imp = Impact()

        imp.calc(exposures=exp, impact_funcs=impf, hazard=haz)

        # Extract from climada.impact the chosen metrics
        freq_curve = imp.calc_freq_curve(self.rp).impact

        if self.calc_eai_exp:
            eai_exp = imp.eai_exp
        else:
            eai_exp = np.array([])

        if self.calc_at_event:
            at_event= imp.at_event
        else:
            at_event = np.array([])

        return [imp.aai_agg, freq_curve, eai_exp, at_event, imp.tot_value]


[docs]    def plot_rp_distribution(self, figsize=(8, 6)):
        """
        Plot the distribution of return period values.

        Parameters
        ----------
        figsize: tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is (8, 6)

        Raises
        ------
        ValueError
            If no metric distribution was computed the plot cannot be made.

        Returns
        -------
        ax: matplotlib.pyplot.axes
            The axis handle of the plot.

        """

        if not self.metrics:
            raise ValueError("No uncertainty data present for these metrics. "+
                    "Please run an uncertainty analysis first.")


        df_values = self.metrics['freq_curve']

        _fig, ax = plt.subplots(figsize=figsize)

        min_l, max_l = df_values.min().min(), df_values.max().max()

        for n, (_name, values) in enumerate(df_values.iteritems()):
            count, division = np.histogram(values, bins=10)
            count = count / count.max()
            losses = [(bin_i + bin_f )/2 for (bin_i, bin_f) in zip(division[:-1], division[1:])]
            ax.plot([min_l, max_l], [2*n, 2*n], color='k', alpha=0.5)
            ax.fill_between(losses, count + 2*n, 2*n)

        ax.set_xlim(min_l, max_l)
        ax.set_ylim(0, 2*(n+1))
        ax.set_xlabel('impact')
        ax.set_ylabel('return period [years]')
        ax.set_yticks(np.arange(0, 2*(n+1), 2))
        ax.set_yticklabels(df_values.columns)

        return ax


[docs]    def plot_sensitivity_map(self, exp, salib_si='S1', figsize=(8, 6)):
        """
        Plot a map of the largest sensitivity index in each exposure point

        Parameters
        ----------
        exp : climada.exposure
            The exposure from which to take the coordinates
        salib_si : str, optional
            The name of the sensitivity index to plot.
            The default is 'S1'.
        figsize: tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is (8, 6)

        Raises
        ------
        ValueError
            If no sensitivity data is found, raise error.

        Returns
        -------
        ax: matplotlib.pyplot.axes
            The axis handle of the plot.

        """

        try:
            si_eai = self.sensitivity['eai_exp']
            eai_max_si_idx = [
                np.argmax(si_dict[salib_si])
                for si_dict in si_eai.values()
                ]

        except KeyError as verr:
            raise ValueError("No sensitivity indices found for"
                  " impact.eai_exp. Please compute sensitivity first using"
                  " UncImpact.calc_sensitivity(calc_eai_exp=True)"
                  ) from verr

        plot_val = np.array([eai_max_si_idx]).astype(float)
        coord = np.array([exp.gdf.latitude, exp.gdf.longitude]).transpose()
        ax = u_plot.geo_scatter_categorical(
                plot_val, coord,
                var_name='Largest sensitivity index ' + salib_si,
                title='Sensitivity map',
                cat_name= self.param_labels,
                figsize=figsize
                )

        return ax