Source code for climada.engine.unsequa.unc_output

"""
This file is part of CLIMADA.

Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Define Uncertainty class.
"""

__all__ = ['UncOutput', 'UncCostBenefitOutput', 'UncImpactOutput', 'UncDeltaImpactOutput']

import logging
import datetime as dt

from itertools import zip_longest
from pathlib import Path


import h5py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

from climada import CONFIG

from climada.util.value_representation import value_to_monetary_unit as u_vtm
from climada.util.value_representation import convert_monetary_value as u_cmv
from climada.util import plot as u_plot
import climada.util.hdf5_handler as u_hdf5

LOGGER = logging.getLogger(__name__)

# Metrics that are multi-dimensional
METRICS_2D = ['eai_exp', 'at_event']

DATA_DIR = CONFIG.engine.uncertainty.local_data.user_data.dir()

FIG_W, FIG_H = 8, 5 #default figize width/heigh column/work multiplicators

MAP_CMAP = 'Dark2' #Default color map for the sensitivity map

#Table of recommended pairing between salib sampling and sensitivity methods
# NEEDS TO BE UPDATED REGULARLY!! https://salib.readthedocs.io/en/latest/api.html
SALIB_COMPATIBILITY = {
    'fast': ['fast_sampler'],
    'rbd_fast': ['latin'] ,
    'morris': ['morris'],
    'sobol' : ['saltelli'],
    'delta' : ['latin'],
    'dgsm' : ['fast_sampler', 'latin', 'morris', 'saltelli', 'latin', 'ff'],
    'ff' : ['ff'],
    }



[docs]
class UncOutput():
    """
    Class to store and plot uncertainty and sensitivity analysis output data

    This is the base class to store uncertainty and sensitivity outputs of an
    analysis  done on climada.engine.impact.Impact() or
    climada.engine.costbenefit.CostBenefit() object.

    Attributes
    ----------
    samples_df : pandas.DataFrame
        Values of the sampled uncertainty parameters. It has n_samples rows
        and one column per uncertainty parameter.
    sampling_method : str
        Name of the sampling method from SAlib.
        https://salib.readthedocs.io/en/latest/api.html#
    n_samples : int
        Effective number of samples (number of rows of samples_df)
    param_labels : list
        Name of all the uncertainty parameters
    distr_dict : dict
        Comon flattened dictionary of all the distr_dict of all input variables.
        It represents the distribution of all the uncertainty parameters.
    problem_sa : dict
        The description of the uncertainty variables and their
        distribution as used in SALib.
        https://salib.readthedocs.io/en/latest/basics.html.
    """

    _metadata = ['sampling_method', 'sampling_kwargs', 'sensitivity_method',
                 'sensitivity_kwargs']


[docs]
    def __init__(self, samples_df, unit=None):
        """
        Initialize Uncertainty Data object.

        Parameters
        ----------
        samples_df : pandas.DataFrame
            input parameters samples
        unit : str, optional
            value unit
        """
        #Data
        self.samples_df = samples_df
        self.unit = unit



[docs]
    def order_samples(self, by_parameters):
        """
        Function to sort the samples dataframe.

        Note: the unc_output.samples_df is ordered inplace.

        Parameters
        ----------
        by_parameters : list[string]
            List of the uncertainty parameters to sort by (ordering in list is kept)

        Returns
        -------
        None.

        """
        self.samples_df.sort_values(by=by_parameters, inplace=True, axis=0)



[docs]
    def get_samples_df(self):
        return getattr(self, 'samples_df')



[docs]
    def get_unc_df(self, metric_name):
        return getattr(self, f'{metric_name}_unc_df')



[docs]
    def set_unc_df(self, metric_name, unc_df):
        setattr(self, f'{metric_name}_unc_df', unc_df)



[docs]
    def get_sens_df(self, metric_name):
        return getattr(self, f'{metric_name}_sens_df')



[docs]
    def set_sens_df(self, metric_name, sens_df):
        setattr(self, f'{metric_name}_sens_df', sens_df)



[docs]
    def check_salib(self, sensitivity_method):
        """
        Checks whether the chosen sensitivity method and the sampling
        method used to generated self.samples_df
        respect the pairing recommendation by the SALib package.

        https://salib.readthedocs.io/en/latest/api.html

        Parameters
        ----------
        sensitivity_method : str
            Name of the sensitivity analysis method.

        Returns
        -------
        bool
            True if sampling and sensitivity methods respect the recommended
            pairing.

        """

        if self.sampling_method not in SALIB_COMPATIBILITY[sensitivity_method]:
            LOGGER.warning("The chosen combination of sensitivity method (%s)"
                " and sampling method (%s) does not correspond to the"
                " recommendation of the salib pacakge."
                "\n https://salib.readthedocs.io/en/latest/api.html",
                self.sampling_method, sensitivity_method
                )
            return False
        return True


    @property
    def sampling_method(self):
        """
        Returns the sampling method used to generate self.samples_df

        Returns
        -------
        str :
            Sampling method name

        """
        return self.samples_df.attrs['sampling_method']

    @property
    def sampling_kwargs(self):
        """
        Returns the kwargs of the sampling method that generate self.samples_df

        Returns
        -------
        dict
            Dictionary of arguments for SALib sampling method

        """
        return self.samples_df.attrs['sampling_kwargs']

    @property
    def n_samples(self):
        """
        The effective number of samples

        Returns
        -------
        int :
            effective number of samples

        """
        return self.samples_df.shape[0]

    @property
    def param_labels(self):
        """
        Labels of all uncertainty input parameters.

        Returns
        -------
        list of str :
            Labels of all uncertainty input parameters.

        """
        return list(self.samples_df)

    @property
    def problem_sa(self):
        """
        The description of the uncertainty variables and their
        distribution as used in SALib.
        https://salib.readthedocs.io/en/latest/basics.html

        Returns
        -------
        dict :
            Salib problem dictionary.

        """
        return {
            'num_vars' : len(self.param_labels),
            'names' : self.param_labels,
            'bounds' : [[0, 1]]*len(self.param_labels)
            }

    @property
    def uncertainty_metrics(self):
        """
        Retrieve all uncertainty output metrics names

        Returns
        -------
        unc_metric_list : [str]
            List of names of attributes containing metrics uncertainty values,
            without the trailing '_unc_df'

        """
        unc_metric_list = []
        for attr_name, attr_value in self.__dict__.items():
            if isinstance(attr_value, pd.DataFrame):
                if not attr_value.empty and attr_name.endswith("_unc_df"):
                    unc_metric_list.append(attr_name[:-7])
        return unc_metric_list

    @property
    def sensitivity_metrics(self):
        """
        Retrieve all sensitivity output metrics names

        Returns
        -------
        sens_metric_list : [str]
            List of names of attributes containing metrics sensitivity values,
            without the trailing '_sens_df'

        """
        sens_metric_list = []
        for attr_name, attr_value in self.__dict__.items():
            if isinstance(attr_value, pd.DataFrame):
                if not attr_value.empty and attr_name.endswith("_sens_df"):
                    sens_metric_list.append(attr_name[:-8])
        return sens_metric_list


[docs]
    def get_uncertainty(self, metric_list=None):
        """
        Returns uncertainty dataframe with values for each sample

        Parameters
        ----------
        metric_list : [str], optional
            List of uncertainty metrics to consider.
            The default returns all uncertainty metrics at once.

        Returns
        -------
        pandas.DataFrame
            Joint dataframe of all uncertainty values for all metrics in
            the metric_list.

        See Also
        --------
        uncertainty_metrics: list of all available uncertainty metrics

        """
        if metric_list is None:
            metric_list = self.uncertainty_metrics
        try:
            unc_df = pd.concat(
                [self.get_unc_df(metric) for metric in metric_list],
                axis=1
                )
        except AttributeError:
            return pd.DataFrame([])
        return unc_df



[docs]
    def get_sensitivity(self, salib_si, metric_list=None):
        """
        Returns sensitivity index

        E.g. For the sensitivity analysis method 'sobol', the choices
        are ['S1', 'ST'], for 'delta' the  choices are ['delta', 'S1'].

        For more information see the SAlib documentation:
        https://salib.readthedocs.io/en/latest/basics.html

        Parameters
        ----------
        salib_si : str
            Sensitivity index
        metric_list :[str], optional
            List of sensitivity metrics to consider.
            The default returns all sensitivity indices at once.

        Returns
        -------
        pandas.DataFrame
            Joint dataframe of the sensitvity indices for all metrics in
            the metric_list

        See Also
        --------
        sensitvity_metrics: list of all available sensitivity metrics

        """
        df_all = pd.DataFrame([])
        df_meta = pd.DataFrame([])

        if metric_list is None:
            metric_list = self.sensitivity_metrics
        for metric in metric_list:
            submetric_df = self.get_sens_df(metric)
            if not submetric_df.empty:
                submetric_df = submetric_df[submetric_df['si'] == salib_si]
                df_all = pd.concat(
                    [df_all, submetric_df.select_dtypes('number')],
                    axis=1
                    )
                if df_meta.empty:
                    df_meta = submetric_df.drop(
                        submetric_df.select_dtypes('number').columns, axis=1)
        return pd.concat([df_meta, df_all], axis=1).reset_index(drop=True)



[docs]
    def get_largest_si(self, salib_si, metric_list=None, threshold=0.01):
        """
        Get largest si per metric

        Parameters
        ----------
        salib_si : str
            The name of the sensitivity index to plot.
        metric_list : list of strings, optional
            List of metrics to plot the sensitivity.
            Default is None.
        threshold : float
            The minimum value a sensitivity index must have to be considered
            as the largest. The default is 0.01.
        Returns
        -------
        max_si_df : pandas.dataframe
            Dataframe with the largest si and its value per metric

        """

        if metric_list is None:
            metric_list = self.sensitivity_metrics

        si_df = self.get_sensitivity(salib_si, metric_list)

        #get max index
        si_df_num = si_df.select_dtypes('number')
        si_df_num[si_df_num<threshold] = 0 #remove noise whenn all si are 0
        max_si_idx = si_df_num.idxmax().to_numpy()
        max_si_val = si_df_num.max().to_numpy()

        #get parameter of max index
        max_si_idx = np.nan_to_num(max_si_idx + 1).astype(int) # Set np.nan values to 0
        param = np.concatenate((['None'], si_df['param']))
        param_max_si = [param[idx] for idx in max_si_idx]
        param2 = np.concatenate((['None'], si_df['param2']))
        param2_max_si = [param2[idx] for idx in max_si_idx]

        max_si_df = pd.DataFrame(
            {'metric' : si_df_num.columns,
             'param' : param_max_si,
             'param2' : param2_max_si,
             'si' : max_si_val
             })

        return max_si_df



[docs]
    def plot_sample(self, figsize=None):
        """
        Plot the sample distributions of the uncertainty input parameters

        For each uncertainty input variable, the sample distributions is shown
        in a separate axes.

        Parameters
        ---------
        figsize : tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is derived from the total number of plots (nplots) as:

            >>> nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
            >>> figsize = (ncols * FIG_W, nrows * FIG_H)

        Raises
        ------
        ValueError
            If no sample was computed the plot cannot be made.

        Returns
        -------
        axes: matplotlib.pyplot.axes
            The axis handle of the plot.

        """
        fontsize = 18

        if self.samples_df.empty:
            raise ValueError("No uncertainty sample present."+
                    "Please make a sample first.")

        nplots = len(self.param_labels)
        nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
        if not figsize:
            figsize = (ncols * FIG_W, nrows * FIG_H)
        _fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
        for ax, label in zip_longest(axes.flatten(),
                                     self.param_labels,
                                     fillvalue=None):
            if label is None:
                ax.remove()
                continue
            self.samples_df[label].hist(ax=ax, bins=100)
            ax.set_xlabel(label)
            ax.set_ylabel('Sample count')
            ax.tick_params(labelsize=fontsize)
            for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
                item.set_fontsize(fontsize)

        plt.tight_layout()

        return axes



[docs]
    def plot_uncertainty(self, metric_list=None, orig_list=None, figsize=None,
                         log=False, axes=None, calc_delta=False):
        """
        Plot the  uncertainty distribution

        For each risk metric, a separate axes is used to plot the uncertainty
        distribution of the output values obtained over the sampled
        input parameters.

        Parameters
        ----------
        metric_list : list[str], optional
            List of metrics to plot the distribution.
            The default is None.
        orig_list : list[float], optional
            List of the original (without uncertainty) values for each
            sub-metric of the mtrics in metric_list. The ordering is identical.
            The default is None.
        figsize : tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is derived from the total number of plots (nplots) as:
            nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
            figsize = (ncols * FIG_W, nrows * FIG_H)
        log : boolean, optional
            Use log10 scale for x axis. Default is False.
        axes : matplotlib.pyplot.axes, optional
            Axes handles to use for the plot. The default is None.
        calc_delta: boolean, optional
            Adapt x axis label for CalcDeltaImpact unc_output. Default is False.

        Raises
        ------
        ValueError
            If no metric distribution was computed the plot cannot be made.

        Returns
        -------
        axes : matplotlib.pyplot.axes
            The axes handle of the plot.

        See Also
        --------
        uncertainty_metrics : list of all available uncertainty metrics

        """
        fontsize = 18 #default label fontsize

        if not self.uncertainty_metrics:
            raise ValueError("No uncertainty data present for these metrics. "+
                    "Please run an uncertainty analysis first.")

        if orig_list is None:
            orig_list = []

        if metric_list is None:
            metric_list = [
                metric
                for metric in self.uncertainty_metrics
                if metric not in METRICS_2D
                ]

        unc_df = self.get_uncertainty(metric_list)

        if unc_df.empty:
            raise AttributeError(f"The listed metrics {metric_list} either "
                 "do not exist or have no uncertainty values.")

        if log:
            unc_df_plt = unc_df.apply(np.log10).copy()
            unc_df_plt = unc_df_plt.replace([np.inf, -np.inf], np.nan)
        else:
            unc_df_plt = unc_df.copy()

        cols = unc_df_plt.columns
        nplots = len(cols)
        nrows, ncols = int(np.ceil(nplots / 2)), min(nplots, 2)
        if axes is None:
            if not figsize:
                figsize = (ncols * FIG_W, nrows * FIG_H)
            _fig, axes = plt.subplots(nrows = nrows,
                                     ncols = ncols,
                                     figsize = figsize)
        if nplots > 1:
            flat_axes = axes.flatten()
        else:
            flat_axes = np.array([axes])

        for ax, col, orig_val in zip_longest(flat_axes, cols, orig_list, fillvalue=None):
            if col is None:
                if ax is not None:
                    ax.remove()
                continue
            # Check if the column data is empty or contains only NaNs
            data, m_unit = u_vtm(unc_df_plt[col])
            data = pd.Series(data)
            if data.empty or data.isna().all() or data.dropna().shape[0] < 2:
                print(f"No data to plot for '{col}'.")
                if ax is not None:
                    ax.text(0.5, 0.5, 'No data to plot', fontsize=18,
                            horizontalalignment='center', verticalalignment='center', 
                            transform=ax.transAxes)
                    ax.set_xlabel(col)
                    ax.set_ylabel('density of samples')
                    ax.tick_params(labelsize=fontsize)
                    for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
                        item.set_fontsize(fontsize)
                continue
            data, m_unit = u_vtm(unc_df_plt[col])
            data = pd.Series(data)
            if data.empty:
                ax.remove()
                continue
            data.hist(ax=ax, bins=30, density=True, histtype='bar',
                      color='lightsteelblue', edgecolor='black')
            try:
                data.plot.kde(ax=ax, color='darkblue', linewidth=4, label='')
            except np.linalg.LinAlgError:
                pass
            avg, std = data.mean(), data.std()
            _, ymax = ax.get_ylim()
            if log:
                avg_plot = np.log10(avg)
            else:
                avg_plot = avg
            ax.axvline(avg_plot, color='darkorange', linestyle='dashed', linewidth=2,
                    label="avg=%.2f%s" %(avg, m_unit))
            if orig_val is not None:
                if log:
                    orig_plot = np.log10(orig_val)
                else:
                    orig_plot = orig_val
                [orig_plot] = u_cmv(orig_plot, m_unit)
                ax.axvline(orig_plot, color='green', linestyle='dotted', linewidth=2,
                           label="orig=%.2f%s" %(orig_plot, m_unit))
            if log:
                std_m, std_p = np.log10(avg - std), np.log10(avg + std)
            else:
                std_m, std_p = avg - std, avg + std
            ax.plot([std_m, std_p],
                    [0.3 * ymax, 0.3 * ymax], color='black',
                    label="std=%.2f%s" %(std, m_unit))
            xlabel = col + ' [' + m_unit + ' ' + self.unit + '] '
            if calc_delta:
                # Modify the xlabel when calc_delta is True
                xlabel = col + ' change [%]'
            if log:
                ax.set_xlabel( xlabel + ' (log10 scale)')
            else:
                ax.set_xlabel(xlabel)
            ax.set_ylabel('density of samples')
            ax.legend(fontsize=fontsize-2)

            ax.tick_params(labelsize=fontsize)
            for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
                item.set_fontsize(fontsize)
            ax.set_xlim([data.min(), data.max()])
        plt.tight_layout()

        return axes




[docs]
    def plot_rp_uncertainty(self, orig_list=None, figsize=(16, 6), axes=None, 
                            calc_delta=False):
        """
        Plot the distribution of return period uncertainty
    
        Parameters
        ----------
        orig_list : list[float], optional
            List of the original (without uncertainty) values for each
            sub-metric of the metrics in metric_list. The ordering is identical.
            The default is None.
        figsize : tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is (16, 6)
        axes: matplotlib.pyplot.axes, optional
            Axes handles to use for the plot. The default is None.
        calc_delta: boolean, optional
            Adapt axis labels for CalcDeltaImpact unc_output. Default is False.
    
        Raises
        ------
        ValueError
            If no metric distribution was computed the plot cannot be made.
    
        Returns
        -------
        axes : matplotlib.pyplot.axes
            The axis handle of the plot.
        """
    
        try:
            unc_df = self.freq_curve_unc_df
        except AttributeError:
            unc_df = None
        if unc_df is None or unc_df.empty:
            raise ValueError("No return period uncertainty data present "
                    "Please run an uncertainty analysis with the desired "
                    "return period specified.")
    
        add_orig=True
        if orig_list is None:
            add_orig=False
    
        if axes is None:
            _fig, axes = plt.subplots(figsize=figsize, nrows=1, ncols=2)
    
        [min_l, max_l], m_unit = u_vtm([unc_df.min().min(), unc_df.max().max()], n_sig_dig=4)
    
        # Plotting for the first axes
        ax = axes[0]
        prop_cycle = plt.rcParams['axes.prop_cycle']
        colors = prop_cycle.by_key()['color']
    
        for n, (_name, values) in enumerate(unc_df.items()):
            if values.isna().all() or len(values.dropna()) < 2:
                print(f"Skipping plot for '{_name}': insufficient data.")
                continue
            values = u_cmv(values, m_unit, n_sig_dig=4)
            count, division = np.histogram(values, bins=100)
            count = count / count.max()
            losses = [(bin_i + bin_f)/2 for (bin_i, bin_f) in zip(division[:-1], division[1:])]
            ax.plot([min_l, max_l], [2*n, 2*n], color='k', alpha=0.5)
            ax.fill_between(losses, count + 2*n, 2*n)
            if add_orig:
                [orig_val] = u_cmv(orig_list[n], m_unit, n_sig_dig=4)
                ax.plot(
                    [orig_val, orig_val], [2*n, 2*(n+1)],
                    color=colors[n], linestyle='dotted', linewidth=2,
                    label="orig=%.2f%s" %(orig_val, m_unit)
                )
    
        ax.set_xlim(min_l, max_l)
        ax.set_ylim(0, 2*unc_df.shape[1])
        ax.set_yticks(np.arange(0, 2*unc_df.shape[1], 2))
        ax.set_yticklabels([s[2:] for s in unc_df.columns])
        ax.legend(loc='lower right')
    
        # Set x-axis label for the first axes
        if calc_delta:
            ax.set_xlabel('Impact change [%]')
        else:
            ax.set_xlabel('Impact [%s %s]' % (m_unit, self.unit))
    
        ax.set_ylabel('Return period [years]')
    
        # Plotting for the second axes
        ax = axes[1]
        high = u_cmv(self.get_unc_df('freq_curve').quantile(0.95).values, 
                     m_unit, n_sig_dig=4)
        middle = u_cmv(self.get_unc_df('freq_curve').quantile(0.5).values, 
                       m_unit, n_sig_dig=4)
        low = u_cmv(self.get_unc_df('freq_curve').quantile(0.05).values, 
                    m_unit, n_sig_dig=4)
    
        x = [float(rp[2:]) for rp in unc_df.columns]
        ax.plot(x, high, linestyle='--', color='blue', alpha=0.5, 
                label='0.95 percentile')
        ax.plot(x, middle, label='0.5 percentile')
        ax.plot(x, low, linestyle='dashdot', color='blue', alpha=0.5, 
                label='0.05 percentile')
        ax.fill_between(x, low, high, alpha=0.2)
        if add_orig:
            ax.plot(x, u_cmv(orig_list, m_unit, n_sig_dig=4), color='green', 
                    linestyle='dotted', label='orig')
        ax.set_xlabel('Return period [year]')
    
        # Set y-axis label for the second axes
        if calc_delta:
            ax.set_ylabel('Impact change [%]')
        else:
            ax.set_ylabel('Impact [' + m_unit + ' ' + self.unit + ']')
    
        ax.legend()
    
        return axes





[docs]
    def plot_sensitivity(self, salib_si='S1', salib_si_conf='S1_conf',
                         metric_list=None, figsize=None, axes=None,
                         **kwargs):
        """
        Bar plot of a first order sensitivity index

        For each metric, the sensitivity indices are plotted in a
        separate axes.

        This requires that a senstivity analysis was already
        performed.

        E.g. For the sensitivity analysis method 'sobol', the choices
        are ['S1', 'ST'], for 'delta' the  choices are ['delta', 'S1'].

        Note that not all sensitivity indices have a confidence interval.

        For more information see the SAlib documentation:
        https://salib.readthedocs.io/en/latest/basics.html

        Parameters
        ----------
        salib_si : string, optional
            The first order (one value per metric output) sensitivity index
            to plot.
            The default is S1.
        salib_si_conf : string, optional
            The  confidence value for the first order sensitivity index
            to plot.
            The default is S1_conf.
        metric_list : list of strings, optional
            List of metrics to plot the sensitivity. If a metric is not found
            it is ignored.
        figsize : tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is derived from the total number of plots (nplots) as:

            >>> nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
            >>> figsize = (ncols * FIG_W, nrows * FIG_H)
        axes : matplotlib.pyplot.axes, optional
            Axes handles to use for the plot. The default is None.
        kwargs :
            Keyword arguments passed on to
            pandas.DataFrame.plot(kind='bar')

        Raises
        ------
        ValueError :
            If no sensitivity is available the plot cannot be made.

        Returns
        -------
        axes : matplotlib.pyplot.axes
            The axes handle of the plot.

        See Also
        --------
        sensitvity_metrics :
            list of all available sensitivity metrics

        """

        if not self.sensitivity_metrics:
            raise ValueError("No sensitivity present. "
                    "Please run a sensitivity analysis first.")

        if metric_list is None:
            metric_list = [
                metric
                for metric in self.sensitivity_metrics
                if metric not in METRICS_2D
                ]

        nplots = len(metric_list)
        nrows, ncols = int(np.ceil(nplots / 2)), min(nplots, 2)
        if axes is None:
            if not figsize:
                figsize = (ncols * FIG_W, nrows * FIG_H)
            _fig, axes = plt.subplots(nrows = nrows,
                                     ncols = ncols,
                                     figsize = figsize)
        if nplots > 1:
            flat_axes = axes.flatten()
        else:
            flat_axes = np.array([axes])

        for ax, metric in zip(flat_axes, metric_list):
            df_S = self.get_sensitivity(salib_si, [metric]).select_dtypes('number')
            if not df_S.columns[df_S.isnull().all()].empty:
                LOGGER.warning("All-NaN columns encountered: %s",
                               list(df_S.columns[df_S.isnull().all()]))
            df_S = df_S.loc[:, df_S.notnull().any()]
            if df_S.empty:
                ax.set_xlabel('Input parameter')
                ax.remove()
                continue
            df_S_conf = self.get_sensitivity(salib_si_conf, [metric]).select_dtypes('number')
            df_S_conf = df_S_conf.loc[:, df_S.columns]
            if df_S_conf.empty:
                df_S.plot(ax=ax, kind='bar', **kwargs)
            df_S.plot(ax=ax, kind='bar', yerr=df_S_conf, **kwargs)
            ax.set_xticklabels(self.param_labels, rotation=0)
            ax.set_xlabel('Input parameter')
            ax.set_ylabel(salib_si)
        plt.tight_layout()

        return axes



[docs]
    def plot_sensitivity_second_order(self, salib_si='S2', salib_si_conf='S2_conf',
                                      metric_list=None, figsize=None, axes=None,
                                      **kwargs):
        """
        Plot second order sensitivity indices as matrix.

        For each metric, the sensitivity indices are plotted in a
        separate axes.

        E.g. For the sensitivity analysis method 'sobol', the choices
        are ['S2', 'S2_conf'].

        Note that not all sensitivity indices have a confidence interval.

        For more information see the SAlib documentation:
        https://salib.readthedocs.io/en/latest/basics.html

        Parameters
        ----------
        salib_si : string, optional
            The second order sensitivity indexto plot.
            The default is S2.
        salib_si_conf : string, optional
            The  confidence value for thesensitivity index salib_si
            to plot.
            The default is S2_conf.
        metric_list : list of strings, optional
            List of metrics to plot the sensitivity. If a metric is not found
            it is ignored. Default is all 1D metrics.
        figsize : tuple(int or float, int or float), optional
            The figsize argument of matplotlib.pyplot.subplots()
            The default is derived from the total number of plots (nplots) as:

            >>> nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
            >>> figsize = (ncols * 5, nrows * 5)
        axes : matplotlib.pyplot.axes, optional
            Axes handles to use for the plot. The default is None.
        kwargs :
            Keyword arguments passed on to matplotlib.pyplot.imshow()

        Raises
        ------
        ValueError :
            If no sensitivity is available the plot cannot be made.

        Returns
        -------
        axes:  matplotlib.pyplot.axes
            The axes handle of the plot.

        See Also
        --------
        sensitvity_metrics :
            list of all available sensitivity metrics

        """

        if not self.sensitivity_metrics:
            raise ValueError("No sensitivity present for this metrics. "
                    "Please run a sensitivity analysis first.")

        if metric_list is None:
            metric_list = [
                metric
                for metric in self.sensitivity_metrics
                if metric not in METRICS_2D
                ]


        if 'cmap' not in kwargs.keys():
            kwargs['cmap'] = 'summer'

        #all the lowest level metrics (e.g. rp10) directly or as
        #submetrics of the metrics in metrics_list
        df_S = self.get_sensitivity(salib_si, metric_list).select_dtypes('number')
        df_S_conf = self.get_sensitivity(salib_si_conf, metric_list).select_dtypes('number')

        nplots = len(df_S.columns)
        nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
        if axes is None:
            if not figsize:
                figsize = (ncols * 5, nrows * 5)
            _fig, axes = plt.subplots(nrows = nrows,
                                     ncols = ncols,
                                     figsize = figsize)

        if nplots > 1:
            flat_axes = axes.flatten()
        else:
            flat_axes = np.array([axes])

        for ax, submetric in zip(flat_axes, df_S.columns):
            #Make matrix symmetric
            s2_matrix = np.triu(
                np.reshape(
                    df_S[submetric].to_numpy(),
                    (len(self.param_labels), -1)
                    )
                )
            s2_matrix = s2_matrix + s2_matrix.T - np.diag(np.diag(s2_matrix))
            ax.imshow(s2_matrix, **kwargs)
            s2_conf_matrix = np.triu(
                np.reshape(
                    df_S_conf[submetric].to_numpy(),
                    (len(self.param_labels), -1)
                    )
                )
            s2_conf_matrix = s2_conf_matrix + s2_conf_matrix.T - \
                np.diag(np.diag(s2_conf_matrix))
            for i in range(len(s2_matrix)):
                for j in range(len(s2_matrix)):
                    if np.isnan(s2_matrix[i, j]):
                        ax.text(j, i, np.nan,
                           ha="center", va="center",
                           color="k", fontsize='medium')
                    else:
                        ax.text(j, i,
                            str(round(s2_matrix[i, j], 2)) + u'\n\u00B1' +  #\u00B1 = +-
                            str(round(s2_conf_matrix[i, j], 2)),
                            ha="center", va="center",
                            color="k", fontsize='medium')

            ax.set_title(salib_si + ' - ' + submetric, fontsize=18)
            labels = self.param_labels
            ax.set_xticks(np.arange(len(labels)))
            ax.set_yticks(np.arange(len(labels)))
            ax.set_xticklabels(labels, fontsize=16)
            ax.set_yticklabels(labels, fontsize=16)
        plt.tight_layout()

        return axes



[docs]
    def plot_sensitivity_map(self, salib_si='S1', **kwargs):
        """
        Plot a map of the largest sensitivity index in each exposure point

        Requires the uncertainty distribution for eai_exp.

        Parameters
        ----------
        salib_si : str, optional
            The name of the sensitivity index to plot.
            The default is 'S1'.
        kwargs :
            Keyword arguments passed on to
            climada.util.plot.geo_scatter_categorical

        Raises
        ------
        ValueError :
            If no sensitivity data is found, raise error.

        Returns
        -------
        ax: matplotlib.pyplot.axes
            The axis handle of the plot.

        See Also
        --------
        climada.util.plot.geo_scatter_categorical :
            geographical plot for categorical variable

        """

        eai_max_si_df = self.get_largest_si(salib_si, metric_list=['eai_exp'])

        plot_val = eai_max_si_df['param']
        coord = np.array([self.coord_df.latitude, self.coord_df.longitude]).transpose()  # pylint: disable=no-member
        if 'var_name' not in kwargs:
            kwargs['var_name'] = 'Input parameter with largest ' + salib_si
        if 'title' not in kwargs:
            kwargs['title'] = ''
        if 'figsize' not in kwargs:
            kwargs['figsize'] = (8,6)
        if 'cmap' not in kwargs:
            labels = np.unique(plot_val)
            n=np.where(labels=='None')[0]
            if len(n) > 0 :
                n = n[0]
                cmap = mpl.colors.ListedColormap(
                    plt.get_cmap(MAP_CMAP).colors[:len(labels)]
                    )
                colors = list(cmap.colors)
                colors[n] = tuple(np.repeat(0.93, 3))
                cmap.colors = tuple(colors)
                kwargs['cmap'] = cmap
        ax = u_plot.geo_scatter_categorical(
                plot_val, coord,
                **kwargs
                )

        return ax




[docs]
    def to_hdf5(self, filename=None):
        """
        Save output to .hdf5

        Parameters
        ----------
        filename : str or pathlib.Path, optional
            The filename with absolute or relative path.
            The default name is "unc_output + datetime.now() + .hdf5" and
            the default path is taken from climada.config

        Returns
        -------
        save_path : pathlib.Path
            Path to the saved file

        """
        if filename is None:
            filename = "unc_output" + dt.datetime.now().strftime(
                                                            "%Y-%m-%d-%H%M%S"
                                                            )
            filename = Path(DATA_DIR) / Path(filename)
        save_path = Path(filename)
        save_path = save_path.with_suffix('.hdf5')

        LOGGER.info('Writing %s', save_path)
        store = pd.HDFStore(save_path, mode='w')
        for (var_name, var_val) in self.__dict__.items():
            if isinstance(var_val, pd.DataFrame):
                store.put(var_name, var_val, format='fixed', complevel=9)
        store.get_storer('/samples_df').attrs.metadata = self.samples_df.attrs
        store.close()

        str_dt = h5py.special_dtype(vlen=str)
        with h5py.File(save_path, 'a') as fh:
            if getattr(self, 'unit'):
                fh['impact_unit'] = [self.unit]
            if hasattr(self, 'sensitivity_method'):
                if self.sensitivity_method:
                    fh['sensitivity_method'] = [self.sensitivity_method]
            if hasattr(self, 'sensitivity_kwargs'):
                if self.sensitivity_kwargs:
                    grp = fh.create_group("sensitivity_kwargs")
                    for key, value in dict(self.sensitivity_kwargs).items():
                        ds = grp.create_dataset(key, (1,), dtype=str_dt)
                        ds[0] = str(value)
        return save_path



[docs]
    @staticmethod
    def from_hdf5(filename):
        """
        Load a uncertainty and uncertainty output data from .hdf5 file

        Parameters
        ----------
        filename : str or pathlib.Path
            The filename with absolute or relative path.

        Returns
        -------
        unc_output: climada.engine.uncertainty.unc_output.UncOutput
            Uncertainty and sensitivity data loaded from .hdf5 file.
        """
        filename = Path(filename).with_suffix('.hdf5')
        if not filename.exists():
            LOGGER.info('File not found')
            return None

        unc_data = UncOutput(pd.DataFrame())

        LOGGER.info('Reading %s', filename)
        store = pd.HDFStore(filename, mode='r')
        for var_name in store.keys():
            setattr(unc_data, var_name[1:], store.get(var_name))
        unc_data.samples_df.attrs = store.get_storer('/samples_df').attrs.metadata
        store.close()
        with h5py.File(filename, 'r') as fh:
            if 'impact_unit' in list(fh.keys()):
                unc_data.unit = fh.get('impact_unit')[0].decode('UTF-8')
            if 'sensitivity_method' in list(fh.keys()):
                unc_data.sensitivity_method = \
                    fh.get('sensitivity_method')[0].decode('UTF-8')
            if 'sensitivity_kwargs' in list(fh.keys()):
                grp = fh["sensitivity_kwargs"]
                sens_kwargs = {
                    key: u_hdf5.to_string(grp.get(key)[0])
                    for key in grp.keys()
                    }
                unc_data.sensitivity_kwargs = tuple(sens_kwargs.items())
        return unc_data





[docs]
class UncImpactOutput(UncOutput):
    """Extension of UncOutput specific for CalcImpact, returned by the 
        uncertainty() method.
    """

[docs]
    def __init__(self, samples_df, unit, aai_agg_unc_df, freq_curve_unc_df, 
                 eai_exp_unc_df, at_event_unc_df, coord_df):
        """Constructor

        Uncertainty output values from impact.calc for each sample

        Parameters
        ----------
        samples_df : pandas.DataFrame
            input parameters samples
        unit : str
            value unit
        aai_agg_unc_df : pandas.DataFrame
            Each row contains the value of aai_aag for one sample (row of
            samples_df)
        freq_curve_unc_df : pandas.DataFrame
            Each row contains the values of the impact exceedence frequency
            curve for one sample (row of samples_df)
        eai_exp_unc_df : pandas.DataFrame
            Each row contains the values of eai_exp for one sample (row of
            samples_df)
        at_event_unc_df : pandas.DataFrame
            Each row contains the values of at_event for one sample (row of
            samples_df)
        coord_df : pandas.DataFrame
            Coordinates of the exposure
        """
        super().__init__(samples_df, unit)
        self.aai_agg_unc_df = aai_agg_unc_df
        self.aai_agg_sens_df = None
        self.freq_curve_unc_df = freq_curve_unc_df
        self.freq_curve_sens_df = None
        self.eai_exp_unc_df = eai_exp_unc_df
        self.eai_exp_sens_df = None
        self.at_event_unc_df = at_event_unc_df
        self.at_event_sens_df = None
        self.coord_df = coord_df




[docs]
class UncDeltaImpactOutput(UncOutput):
    """Extension of UncOutput specific for CalcDeltaImpact, returned by the  uncertainty() method.
    """

[docs]
    def __init__(self, samples_df, unit, aai_agg_unc_df, freq_curve_unc_df, eai_exp_unc_df,
                 at_event_initial_unc_df, at_event_final_unc_df, coord_df):
        """Constructor

        Uncertainty output values from impact.calc for each sample

        Parameters
        ----------
        samples_df : pandas.DataFrame
            input parameters samples
        unit : str
            value unit
        aai_agg_unc_df : pandas.DataFrame
            Each row contains the value of aai_aag for one sample (row of
            samples_df)
        freq_curve_unc_df : pandas.DataFrame
            Each row contains the values of the impact exceedence frequency
            curve for one sample (row of samples_df)
        eai_exp_unc_df : pandas.DataFrame
            Each row contains the values of eai_exp for one sample (row of
            samples_df)
        at_event_initial_unc_df : pandas.DataFrame
            Each row contains the values of at_event for one sample (row of
            samples_df)
        at_event_final_unc_df : pandas.DataFrame
            Each row contains the values of at_event for one sample (row of
            samples_df)
        coord_df : pandas.DataFrame
            Coordinates of the exposure
        """
        super().__init__(samples_df, unit)
        self.aai_agg_unc_df = aai_agg_unc_df
        self.aai_agg_sens_df = None
        self.freq_curve_unc_df = freq_curve_unc_df
        self.freq_curve_sens_df = None
        self.eai_exp_unc_df = eai_exp_unc_df
        self.eai_exp_sens_df = None
        self.at_event_initial_unc_df = at_event_initial_unc_df
        self.at_event_initial_sens_df = None
        self.at_event_final_unc_df = at_event_final_unc_df
        self.at_event_final_sens_df = None
        self.coord_df = coord_df





[docs]
class UncCostBenefitOutput(UncOutput):
    """Extension of UncOutput specific for CalcCostBenefit, returned by the uncertainty() method.
    """

[docs]
    def __init__(self, samples_df, unit, imp_meas_present_unc_df, imp_meas_future_unc_df,
                 tot_climate_risk_unc_df, benefit_unc_df, cost_ben_ratio_unc_df,
                 cost_benefit_kwargs):
        """Constructor

        Uncertainty output values from cost_benefit.calc for each sample

        Parameters
        ----------
        samples_df : pandas.DataFrame
            input parameters samples
        unit : str
            value unit
        imp_meas_present_unc_df : pandas.DataFrame
            Each row contains the values of imp_meas_present for one sample
            (row of samples_df)
        imp_meas_future_unc_df : pandas.DataFrame
            Each row contains the values of imp_meas_future for one sample
            (row of samples_df)
        tot_climate_risk_unc_df : pandas.DataFrame
            Each row contains the values of tot_cliamte_risk for one sample
            (row of samples_df)
        benefit_unc_df : pandas.DataFrame
            Each row contains the values of benefit for one sample
            (row of samples_df)
        cost_ben_ratio_unc_df : pandas.DataFrame
            Each row contains the values of cost_ben_ratio for one sample
            (row of samples_df)
        cost_benefit_kwargs : pandas.DataFrame
            Each row contains the value of cost_benefit for one sample
            (row of samples_df)

        """
        super().__init__(samples_df, unit)
        self.imp_meas_present_unc_df= imp_meas_present_unc_df
        self.imp_meas_present_sens_df = None
        self.imp_meas_future_unc_df= imp_meas_future_unc_df
        self.imp_meas_future_sens_df = None
        self.tot_climate_risk_unc_df = tot_climate_risk_unc_df
        self.tot_climate_risk_sens_df = None
        self.benefit_unc_df = benefit_unc_df
        self.benefit_sens_df = None
        self.cost_ben_ratio_unc_df = cost_ben_ratio_unc_df
        self.cost_ben_ratio_sens_df = None
        self.cost_benefit_kwargs = cost_benefit_kwargs