"""
This file is part of CLIMADA.
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
---
Define Uncertainty class.
"""
__all__ = [
"UncOutput",
"UncCostBenefitOutput",
"UncImpactOutput",
"UncDeltaImpactOutput",
]
import datetime as dt
import logging
from itertools import zip_longest
from pathlib import Path
import h5py
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import colormaps as cm
import climada.util.hdf5_handler as u_hdf5
from climada import CONFIG
from climada.util import plot as u_plot
from climada.util.value_representation import convert_monetary_value as u_cmv
from climada.util.value_representation import value_to_monetary_unit as u_vtm
LOGGER = logging.getLogger(__name__)
# Metrics that are multi-dimensional
METRICS_2D = ["eai_exp", "at_event"]
DATA_DIR = CONFIG.engine.uncertainty.local_data.user_data.dir()
FIG_W, FIG_H = 8, 5 # default figize width/heigh column/work multiplicators
MAP_CMAP = "Dark2" # Default color map for the sensitivity map
# Table of recommended pairing between salib sampling and sensitivity methods
# NEEDS TO BE UPDATED REGULARLY!! https://salib.readthedocs.io/en/latest/api.html
# Currently, we do not support the 'delta' method due to Singular matrix issues,
SALIB_COMPATIBILITY = {
#'delta': ['fast_sampler', 'ff', 'finite_diff', 'latin', 'morris', 'saltelli'],
"dgsm": ["finite_diff"],
"fast": ["fast_sampler"],
"ff": ["ff"],
"hdmr": ["fast_sampler", "ff", "finite_diff", "latin", "morris", "saltelli"],
"morris": ["morris"],
"pawn": ["fast_sampler", "ff", "finite_diff", "latin", "morris", "saltelli"],
"rbd_fast": ["fast_sampler", "ff", "finite_diff", "latin", "morris", "saltelli"],
"sobol": ["saltelli", "sobol"],
}
[docs]
class UncOutput:
"""
Class to store and plot uncertainty and sensitivity analysis output data
This is the base class to store uncertainty and sensitivity outputs of an
analysis done on climada.engine.impact.Impact() or
climada.engine.costbenefit.CostBenefit() object.
Attributes
----------
samples_df : pandas.DataFrame
Values of the sampled uncertainty parameters. It has n_samples rows
and one column per uncertainty parameter.
sampling_method : str
Name of the sampling method from SAlib.
https://salib.readthedocs.io/en/latest/api.html#
n_samples : int
Effective number of samples (number of rows of samples_df)
param_labels : list
Name of all the uncertainty parameters
distr_dict : dict
Comon flattened dictionary of all the distr_dict of all input variables.
It represents the distribution of all the uncertainty parameters.
problem_sa : dict
The description of the uncertainty variables and their
distribution as used in SALib.
https://salib.readthedocs.io/en/latest/basics.html.
"""
_metadata = [
"sampling_method",
"sampling_kwargs",
"sensitivity_method",
"sensitivity_kwargs",
]
[docs]
def __init__(self, samples_df, unit=None):
"""
Initialize Uncertainty Data object.
Parameters
----------
samples_df : pandas.DataFrame
input parameters samples
unit : str, optional
value unit
"""
# Data
self.samples_df = samples_df
self.unit = unit
[docs]
def order_samples(self, by_parameters):
"""
Function to sort the samples dataframe.
Note: the unc_output.samples_df is ordered inplace.
Parameters
----------
by_parameters : list[string]
List of the uncertainty parameters to sort by (ordering in list is kept)
Returns
-------
None.
"""
self.samples_df.sort_values(by=by_parameters, inplace=True, axis=0)
[docs]
def get_samples_df(self):
return getattr(self, "samples_df")
[docs]
def get_unc_df(self, metric_name):
return getattr(self, f"{metric_name}_unc_df")
[docs]
def set_unc_df(self, metric_name, unc_df):
setattr(self, f"{metric_name}_unc_df", unc_df)
[docs]
def get_sens_df(self, metric_name):
return getattr(self, f"{metric_name}_sens_df")
[docs]
def set_sens_df(self, metric_name, sens_df):
setattr(self, f"{metric_name}_sens_df", sens_df)
[docs]
def check_salib(self, sensitivity_method):
"""
Checks whether the chosen sensitivity method and the sampling
method used to generated self.samples_df
respect the pairing recommendation by the SALib package.
https://salib.readthedocs.io/en/latest/api.html
Parameters
----------
sensitivity_method : str
Name of the sensitivity analysis method.
Returns
-------
bool
True if sampling and sensitivity methods respect the recommended
pairing.
"""
if self.sampling_method not in SALIB_COMPATIBILITY[sensitivity_method]:
LOGGER.warning(
"The chosen combination of sensitivity method (%s)"
" and sampling method (%s) does not correspond to the"
" recommendation of the salib pacakge."
"\n https://salib.readthedocs.io/en/latest/api.html",
self.sampling_method,
sensitivity_method,
)
return False
return True
@property
def sampling_method(self):
"""
Returns the sampling method used to generate self.samples_df
Returns
-------
str :
Sampling method name
"""
return self.samples_df.attrs["sampling_method"]
@property
def sampling_kwargs(self):
"""
Returns the kwargs of the sampling method that generate self.samples_df
Returns
-------
dict
Dictionary of arguments for SALib sampling method
"""
return self.samples_df.attrs["sampling_kwargs"]
@property
def n_samples(self):
"""
The effective number of samples
Returns
-------
int :
effective number of samples
"""
return self.samples_df.shape[0]
@property
def param_labels(self):
"""
Labels of all uncertainty input parameters.
Returns
-------
list of str :
Labels of all uncertainty input parameters.
"""
return list(self.samples_df)
@property
def problem_sa(self):
"""
The description of the uncertainty variables and their
distribution as used in SALib.
https://salib.readthedocs.io/en/latest/basics.html
Returns
-------
dict :
Salib problem dictionary.
"""
return {
"num_vars": len(self.param_labels),
"names": self.param_labels,
"bounds": [[0, 1]] * len(self.param_labels),
}
@property
def uncertainty_metrics(self):
"""
Retrieve all uncertainty output metrics names
Returns
-------
unc_metric_list : [str]
List of names of attributes containing metrics uncertainty values,
without the trailing '_unc_df'
"""
unc_metric_list = []
for attr_name, attr_value in self.__dict__.items():
if isinstance(attr_value, pd.DataFrame):
if not attr_value.empty and attr_name.endswith("_unc_df"):
unc_metric_list.append(attr_name[:-7])
return unc_metric_list
@property
def sensitivity_metrics(self):
"""
Retrieve all sensitivity output metrics names
Returns
-------
sens_metric_list : [str]
List of names of attributes containing metrics sensitivity values,
without the trailing '_sens_df'
"""
sens_metric_list = []
for attr_name, attr_value in self.__dict__.items():
if isinstance(attr_value, pd.DataFrame):
if not attr_value.empty and attr_name.endswith("_sens_df"):
sens_metric_list.append(attr_name[:-8])
return sens_metric_list
[docs]
def get_uncertainty(self, metric_list=None):
"""
Returns uncertainty dataframe with values for each sample
Parameters
----------
metric_list : [str], optional
List of uncertainty metrics to consider.
The default returns all uncertainty metrics at once.
Returns
-------
pandas.DataFrame
Joint dataframe of all uncertainty values for all metrics in
the metric_list.
See Also
--------
uncertainty_metrics: list of all available uncertainty metrics
"""
if metric_list is None:
metric_list = self.uncertainty_metrics
try:
unc_df = pd.concat(
[self.get_unc_df(metric) for metric in metric_list], axis=1
)
except AttributeError:
return pd.DataFrame([])
return unc_df
[docs]
def get_sensitivity(self, salib_si, metric_list=None):
"""
Returns sensitivity index
E.g. For the sensitivity analysis method 'sobol', the choices
are ['S1', 'ST'], for 'delta' the choices are ['delta', 'S1'].
For more information see the SAlib documentation:
https://salib.readthedocs.io/en/latest/basics.html
Parameters
----------
salib_si : str
Sensitivity index
metric_list :[str], optional
List of sensitivity metrics to consider.
The default returns all sensitivity indices at once.
Returns
-------
pandas.DataFrame
Joint dataframe of the sensitvity indices for all metrics in
the metric_list
See Also
--------
sensitvity_metrics: list of all available sensitivity metrics
"""
df_all = pd.DataFrame([])
df_meta = pd.DataFrame([])
if metric_list is None:
metric_list = self.sensitivity_metrics
for metric in metric_list:
submetric_df = self.get_sens_df(metric)
if not submetric_df.empty:
submetric_df = submetric_df[submetric_df["si"] == salib_si]
df_all = pd.concat(
[df_all, submetric_df.select_dtypes("number")], axis=1
)
if df_meta.empty:
df_meta = submetric_df.drop(
submetric_df.select_dtypes("number").columns, axis=1
)
return pd.concat([df_meta, df_all], axis=1).reset_index(drop=True)
[docs]
def get_largest_si(self, salib_si, metric_list=None, threshold=0.01):
"""
Get largest si per metric
Parameters
----------
salib_si : str
The name of the sensitivity index to plot.
metric_list : list of strings, optional
List of metrics to plot the sensitivity.
Default is None.
threshold : float
The minimum value a sensitivity index must have to be considered
as the largest. The default is 0.01.
Returns
-------
max_si_df : pandas.dataframe
Dataframe with the largest si and its value per metric
"""
if metric_list is None:
metric_list = self.sensitivity_metrics
si_df = self.get_sensitivity(salib_si, metric_list)
# get max index
si_df_num = si_df.select_dtypes("number")
si_df_num[si_df_num < threshold] = 0 # remove noise whenn all si are 0
max_si_idx = si_df_num.idxmax().to_numpy()
max_si_val = si_df_num.max().to_numpy()
# get parameter of max index
max_si_idx = np.nan_to_num(max_si_idx + 1).astype(int) # Set np.nan values to 0
param = np.concatenate((["None"], si_df["param"]))
param_max_si = [param[idx] for idx in max_si_idx]
param2 = np.concatenate((["None"], si_df["param2"]))
param2_max_si = [param2[idx] for idx in max_si_idx]
max_si_df = pd.DataFrame(
{
"metric": si_df_num.columns,
"param": param_max_si,
"param2": param2_max_si,
"si": max_si_val,
}
)
return max_si_df
[docs]
def plot_sample(self, figsize=None):
"""
Plot the sample distributions of the uncertainty input parameters
For each uncertainty input variable, the sample distributions is shown
in a separate axes.
Parameters
---------
figsize : tuple(int or float, int or float), optional
The figsize argument of matplotlib.pyplot.subplots()
The default is derived from the total number of plots (nplots) as:
>>> nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
>>> figsize = (ncols * FIG_W, nrows * FIG_H)
Raises
------
ValueError
If no sample was computed the plot cannot be made.
Returns
-------
axes: matplotlib.pyplot.axes
The axis handle of the plot.
"""
fontsize = 18
if self.samples_df.empty:
raise ValueError(
"No uncertainty sample present." + "Please make a sample first."
)
nplots = len(self.param_labels)
nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
if not figsize:
figsize = (ncols * FIG_W, nrows * FIG_H)
_fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
for ax, label in zip_longest(axes.flatten(), self.param_labels, fillvalue=None):
if label is None:
ax.remove()
continue
self.samples_df[label].hist(ax=ax, bins=100)
ax.set_xlabel(label)
ax.set_ylabel("Sample count")
ax.tick_params(labelsize=fontsize)
for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
item.set_fontsize(fontsize)
plt.tight_layout()
return axes
[docs]
def plot_uncertainty(
self,
metric_list=None,
orig_list=None,
figsize=None,
log=False,
axes=None,
calc_delta=False,
):
"""
Plot the uncertainty distribution
For each risk metric, a separate axes is used to plot the uncertainty
distribution of the output values obtained over the sampled
input parameters.
Parameters
----------
metric_list : list[str], optional
List of metrics to plot the distribution.
The default is None.
orig_list : list[float], optional
List of the original (without uncertainty) values for each
sub-metric of the mtrics in metric_list. The ordering is identical.
The default is None.
figsize : tuple(int or float, int or float), optional
The figsize argument of matplotlib.pyplot.subplots()
The default is derived from the total number of plots (nplots) as:
nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
figsize = (ncols * FIG_W, nrows * FIG_H)
log : boolean, optional
Use log10 scale for x axis. Default is False.
axes : matplotlib.pyplot.axes, optional
Axes handles to use for the plot. The default is None.
calc_delta: boolean, optional
Adapt x axis label for CalcDeltaImpact unc_output. Default is False.
Raises
------
ValueError
If no metric distribution was computed the plot cannot be made.
Returns
-------
axes : matplotlib.pyplot.axes
The axes handle of the plot.
See Also
--------
uncertainty_metrics : list of all available uncertainty metrics
"""
fontsize = 18 # default label fontsize
if not self.uncertainty_metrics:
raise ValueError(
"No uncertainty data present for these metrics. "
+ "Please run an uncertainty analysis first."
)
if orig_list is None:
orig_list = []
if metric_list is None:
metric_list = [
metric
for metric in self.uncertainty_metrics
if metric not in METRICS_2D
]
unc_df = self.get_uncertainty(metric_list)
if unc_df.empty:
raise AttributeError(
f"The listed metrics {metric_list} either "
"do not exist or have no uncertainty values."
)
if log:
unc_df_plt = unc_df.apply(np.log10).copy()
unc_df_plt = unc_df_plt.replace([np.inf, -np.inf], np.nan)
else:
unc_df_plt = unc_df.copy()
cols = unc_df_plt.columns
nplots = len(cols)
nrows, ncols = int(np.ceil(nplots / 2)), min(nplots, 2)
if axes is None:
if not figsize:
figsize = (ncols * FIG_W, nrows * FIG_H)
_fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
if nplots > 1:
flat_axes = axes.flatten()
else:
flat_axes = np.array([axes])
for ax, col, orig_val in zip_longest(
flat_axes, cols, orig_list, fillvalue=None
):
if col is None:
if ax is not None:
ax.remove()
continue
# Check if the column data is empty or contains only NaNs
data, m_unit = u_vtm(unc_df_plt[col])
data = pd.Series(data)
if data.empty or data.isna().all() or data.dropna().shape[0] < 2:
print(f"No data to plot for '{col}'.")
if ax is not None:
ax.text(
0.5,
0.5,
"No data to plot",
fontsize=18,
horizontalalignment="center",
verticalalignment="center",
transform=ax.transAxes,
)
ax.set_xlabel(col)
ax.set_ylabel("density of samples")
ax.tick_params(labelsize=fontsize)
for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
item.set_fontsize(fontsize)
continue
data, m_unit = u_vtm(unc_df_plt[col])
data = pd.Series(data)
if data.empty:
ax.remove()
continue
data.hist(
ax=ax,
bins=30,
density=True,
histtype="bar",
color="lightsteelblue",
edgecolor="black",
)
try:
data.plot.kde(ax=ax, color="darkblue", linewidth=4, label="")
except np.linalg.LinAlgError:
pass
avg, std = data.mean(), data.std()
_, ymax = ax.get_ylim()
if log:
avg_plot = np.log10(avg)
else:
avg_plot = avg
ax.axvline(
avg_plot,
color="darkorange",
linestyle="dashed",
linewidth=2,
label="avg=%.2f%s" % (avg, m_unit),
)
if orig_val is not None:
if log:
orig_plot = np.log10(orig_val)
else:
orig_plot = orig_val
[orig_plot] = u_cmv(orig_plot, m_unit)
ax.axvline(
orig_plot,
color="green",
linestyle="dotted",
linewidth=2,
label="orig=%.2f%s" % (orig_plot, m_unit),
)
if log:
std_m, std_p = np.log10(avg - std), np.log10(avg + std)
else:
std_m, std_p = avg - std, avg + std
ax.plot(
[std_m, std_p],
[0.3 * ymax, 0.3 * ymax],
color="black",
label="std=%.2f%s" % (std, m_unit),
)
xlabel = col + " [" + m_unit + " " + self.unit + "] "
if calc_delta:
# Modify the xlabel when calc_delta is True
xlabel = col + " change [%]"
if log:
ax.set_xlabel(xlabel + " (log10 scale)")
else:
ax.set_xlabel(xlabel)
ax.set_ylabel("density of samples")
ax.legend(fontsize=fontsize - 2)
ax.tick_params(labelsize=fontsize)
for item in [ax.title, ax.xaxis.label, ax.yaxis.label]:
item.set_fontsize(fontsize)
ax.set_xlim([data.min(), data.max()])
plt.tight_layout()
return axes
[docs]
def plot_rp_uncertainty(
self, orig_list=None, figsize=(16, 6), axes=None, calc_delta=False
):
"""
Plot the distribution of return period uncertainty
Parameters
----------
orig_list : list[float], optional
List of the original (without uncertainty) values for each
sub-metric of the metrics in metric_list. The ordering is identical.
The default is None.
figsize : tuple(int or float, int or float), optional
The figsize argument of matplotlib.pyplot.subplots()
The default is (16, 6)
axes: matplotlib.pyplot.axes, optional
Axes handles to use for the plot. The default is None.
calc_delta: boolean, optional
Adapt axis labels for CalcDeltaImpact unc_output. Default is False.
Raises
------
ValueError
If no metric distribution was computed the plot cannot be made.
Returns
-------
axes : matplotlib.pyplot.axes
The axis handle of the plot.
"""
try:
unc_df = self.freq_curve_unc_df
except AttributeError:
unc_df = None
if unc_df is None or unc_df.empty:
raise ValueError(
"No return period uncertainty data present "
"Please run an uncertainty analysis with the desired "
"return period specified."
)
add_orig = True
if orig_list is None:
add_orig = False
if axes is None:
_fig, axes = plt.subplots(figsize=figsize, nrows=1, ncols=2)
[min_l, max_l], m_unit = u_vtm(
[unc_df.min().min(), unc_df.max().max()], n_sig_dig=4
)
# Plotting for the first axes
ax = axes[0]
prop_cycle = plt.rcParams["axes.prop_cycle"]
colors = prop_cycle.by_key()["color"]
for n, (_name, values) in enumerate(unc_df.items()):
if values.isna().all() or len(values.dropna()) < 2:
print(f"Skipping plot for '{_name}': insufficient data.")
continue
values = u_cmv(values, m_unit, n_sig_dig=4)
count, division = np.histogram(values, bins=100)
count = count / count.max()
losses = [
(bin_i + bin_f) / 2
for (bin_i, bin_f) in zip(division[:-1], division[1:])
]
ax.plot([min_l, max_l], [2 * n, 2 * n], color="k", alpha=0.5)
ax.fill_between(losses, count + 2 * n, 2 * n)
if add_orig:
[orig_val] = u_cmv(orig_list[n], m_unit, n_sig_dig=4)
ax.plot(
[orig_val, orig_val],
[2 * n, 2 * (n + 1)],
color=colors[n],
linestyle="dotted",
linewidth=2,
label="orig=%.2f%s" % (orig_val, m_unit),
)
ax.set_xlim(min_l, max_l)
ax.set_ylim(0, 2 * unc_df.shape[1])
ax.set_yticks(np.arange(0, 2 * unc_df.shape[1], 2))
ax.set_yticklabels([s[2:] for s in unc_df.columns])
ax.legend(loc="lower right")
# Set x-axis label for the first axes
if calc_delta:
ax.set_xlabel("Impact change [%]")
else:
ax.set_xlabel("Impact [%s %s]" % (m_unit, self.unit))
ax.set_ylabel("Return period [years]")
# Plotting for the second axes
ax = axes[1]
high = u_cmv(
self.get_unc_df("freq_curve").quantile(0.95).values, m_unit, n_sig_dig=4
)
middle = u_cmv(
self.get_unc_df("freq_curve").quantile(0.5).values, m_unit, n_sig_dig=4
)
low = u_cmv(
self.get_unc_df("freq_curve").quantile(0.05).values, m_unit, n_sig_dig=4
)
x = [float(rp[2:]) for rp in unc_df.columns]
ax.plot(
x, high, linestyle="--", color="blue", alpha=0.5, label="0.95 percentile"
)
ax.plot(x, middle, label="0.5 percentile")
ax.plot(
x,
low,
linestyle="dashdot",
color="blue",
alpha=0.5,
label="0.05 percentile",
)
ax.fill_between(x, low, high, alpha=0.2)
if add_orig:
ax.plot(
x,
u_cmv(orig_list, m_unit, n_sig_dig=4),
color="green",
linestyle="dotted",
label="orig",
)
ax.set_xlabel("Return period [year]")
# Set y-axis label for the second axes
if calc_delta:
ax.set_ylabel("Impact change [%]")
else:
ax.set_ylabel("Impact [" + m_unit + " " + self.unit + "]")
ax.legend()
return axes
[docs]
def plot_sensitivity(
self,
salib_si="S1",
salib_si_conf="S1_conf",
metric_list=None,
figsize=None,
axes=None,
**kwargs,
):
"""
Bar plot of a first order sensitivity index
For each metric, the sensitivity indices are plotted in a
separate axes.
This requires that a senstivity analysis was already
performed.
E.g. For the sensitivity analysis method 'sobol', the choices
are ['S1', 'ST'], for 'delta' the choices are ['delta', 'S1'].
Note that not all sensitivity indices have a confidence interval.
For more information see the SAlib documentation:
https://salib.readthedocs.io/en/latest/basics.html
Parameters
----------
salib_si : string, optional
The first order (one value per metric output) sensitivity index
to plot.
The default is S1.
salib_si_conf : string, optional
The confidence value for the first order sensitivity index
to plot.
The default is S1_conf.
metric_list : list of strings, optional
List of metrics to plot the sensitivity. If a metric is not found
it is ignored.
figsize : tuple(int or float, int or float), optional
The figsize argument of matplotlib.pyplot.subplots()
The default is derived from the total number of plots (nplots) as:
>>> nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
>>> figsize = (ncols * FIG_W, nrows * FIG_H)
axes : matplotlib.pyplot.axes, optional
Axes handles to use for the plot. The default is None.
kwargs :
Keyword arguments passed on to
pandas.DataFrame.plot(kind='bar')
Raises
------
ValueError :
If no sensitivity is available the plot cannot be made.
Returns
-------
axes : matplotlib.pyplot.axes
The axes handle of the plot.
See Also
--------
sensitvity_metrics :
list of all available sensitivity metrics
"""
if not self.sensitivity_metrics:
raise ValueError(
"No sensitivity present. " "Please run a sensitivity analysis first."
)
if metric_list is None:
metric_list = [
metric
for metric in self.sensitivity_metrics
if metric not in METRICS_2D
]
nplots = len(metric_list)
nrows, ncols = int(np.ceil(nplots / 2)), min(nplots, 2)
if axes is None:
if not figsize:
figsize = (ncols * FIG_W, nrows * FIG_H)
_fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
if nplots > 1:
flat_axes = axes.flatten()
else:
flat_axes = np.array([axes])
for ax, metric in zip(flat_axes, metric_list):
df_S = self.get_sensitivity(salib_si, [metric]).select_dtypes("number")
if not df_S.columns[df_S.isnull().all()].empty:
LOGGER.warning(
"All-NaN columns encountered: %s",
list(df_S.columns[df_S.isnull().all()]),
)
df_S = df_S.loc[:, df_S.notnull().any()]
if df_S.empty:
ax.set_xlabel("Input parameter")
ax.remove()
continue
df_S_conf = self.get_sensitivity(salib_si_conf, [metric]).select_dtypes(
"number"
)
df_S_conf = df_S_conf.loc[:, df_S.columns]
if df_S_conf.empty:
df_S.plot(ax=ax, kind="bar", **kwargs)
df_S.plot(ax=ax, kind="bar", yerr=df_S_conf, **kwargs)
ax.set_xticklabels(self.param_labels, rotation=0)
ax.set_xlabel("Input parameter")
ax.set_ylabel(salib_si)
plt.tight_layout()
return axes
[docs]
def plot_sensitivity_second_order(
self,
salib_si="S2",
salib_si_conf="S2_conf",
metric_list=None,
figsize=None,
axes=None,
**kwargs,
):
"""
Plot second order sensitivity indices as matrix.
For each metric, the sensitivity indices are plotted in a
separate axes.
E.g. For the sensitivity analysis method 'sobol', the choices
are ['S2', 'S2_conf'].
Note that not all sensitivity indices have a confidence interval.
For more information see the SAlib documentation:
https://salib.readthedocs.io/en/latest/basics.html
Parameters
----------
salib_si : string, optional
The second order sensitivity indexto plot.
The default is S2.
salib_si_conf : string, optional
The confidence value for thesensitivity index salib_si
to plot.
The default is S2_conf.
metric_list : list of strings, optional
List of metrics to plot the sensitivity. If a metric is not found
it is ignored. Default is all 1D metrics.
figsize : tuple(int or float, int or float), optional
The figsize argument of matplotlib.pyplot.subplots()
The default is derived from the total number of plots (nplots) as:
>>> nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
>>> figsize = (ncols * 5, nrows * 5)
axes : matplotlib.pyplot.axes, optional
Axes handles to use for the plot. The default is None.
kwargs :
Keyword arguments passed on to matplotlib.pyplot.imshow()
Raises
------
ValueError :
If no sensitivity is available the plot cannot be made.
Returns
-------
axes: matplotlib.pyplot.axes
The axes handle of the plot.
See Also
--------
sensitvity_metrics :
list of all available sensitivity metrics
"""
if not self.sensitivity_metrics:
raise ValueError(
"No sensitivity present for this metrics. "
"Please run a sensitivity analysis first."
)
if metric_list is None:
metric_list = [
metric
for metric in self.sensitivity_metrics
if metric not in METRICS_2D
]
if "cmap" not in kwargs.keys():
kwargs["cmap"] = "summer"
# all the lowest level metrics (e.g. rp10) directly or as
# submetrics of the metrics in metrics_list
df_S = self.get_sensitivity(salib_si, metric_list).select_dtypes("number")
df_S_conf = self.get_sensitivity(salib_si_conf, metric_list).select_dtypes(
"number"
)
nplots = len(df_S.columns)
nrows, ncols = int(np.ceil(nplots / 3)), min(nplots, 3)
if axes is None:
if not figsize:
figsize = (ncols * 5, nrows * 5)
_fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
if nplots > 1:
flat_axes = axes.flatten()
else:
flat_axes = np.array([axes])
for ax, submetric in zip(flat_axes, df_S.columns):
# Make matrix symmetric
s2_matrix = np.triu(
np.reshape(df_S[submetric].to_numpy(), (len(self.param_labels), -1))
)
s2_matrix = s2_matrix + s2_matrix.T - np.diag(np.diag(s2_matrix))
ax.imshow(s2_matrix, **kwargs)
s2_conf_matrix = np.triu(
np.reshape(
df_S_conf[submetric].to_numpy(), (len(self.param_labels), -1)
)
)
s2_conf_matrix = (
s2_conf_matrix + s2_conf_matrix.T - np.diag(np.diag(s2_conf_matrix))
)
for i in range(len(s2_matrix)):
for j in range(len(s2_matrix)):
if np.isnan(s2_matrix[i, j]):
ax.text(
j,
i,
np.nan,
ha="center",
va="center",
color="k",
fontsize="medium",
)
else:
ax.text(
j,
i,
str(round(s2_matrix[i, j], 2))
+ "\n\u00B1" # \u00B1 = +-
+ str(round(s2_conf_matrix[i, j], 2)),
ha="center",
va="center",
color="k",
fontsize="medium",
)
ax.set_title(salib_si + " - " + submetric, fontsize=18)
labels = self.param_labels
ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels(labels, fontsize=16)
ax.set_yticklabels(labels, fontsize=16)
plt.tight_layout()
return axes
[docs]
def plot_sensitivity_map(self, salib_si="S1", **kwargs):
"""
Plot a map of the largest sensitivity index in each exposure point
Requires the uncertainty distribution for eai_exp.
Parameters
----------
salib_si : str, optional
The name of the sensitivity index to plot.
The default is 'S1'.
kwargs :
Keyword arguments passed on to
climada.util.plot.geo_scatter_categorical
Raises
------
ValueError :
If no sensitivity data is found, raise error.
Returns
-------
ax: matplotlib.pyplot.axes
The axis handle of the plot.
See Also
--------
climada.util.plot.geo_scatter_categorical :
geographical plot for categorical variable
"""
eai_max_si_df = self.get_largest_si(salib_si, metric_list=["eai_exp"])
plot_val = eai_max_si_df["param"]
coord = np.array(
[self.coord_df["latitude"], self.coord_df["longitude"]]
).transpose() # pylint: disable=no-member
if "var_name" not in kwargs:
kwargs["var_name"] = "Input parameter with largest " + salib_si
if "title" not in kwargs:
kwargs["title"] = ""
if "figsize" not in kwargs:
kwargs["figsize"] = (8, 6)
if "cmap" not in kwargs:
labels = np.unique(plot_val)
n = np.where(labels == "None")[0]
if len(n) > 0:
n = n[0]
cmap = mpl.colors.ListedColormap(
cm.get_cmap(MAP_CMAP).colors[: len(labels)]
)
colors = list(cmap.colors)
colors[n] = tuple(np.repeat(0.93, 3))
cmap.colors = tuple(colors)
kwargs["cmap"] = cmap
ax = u_plot.geo_scatter_categorical(plot_val, coord, **kwargs)
return ax
[docs]
def to_hdf5(self, filename=None):
"""
Save output to .hdf5
Parameters
----------
filename : str or pathlib.Path, optional
The filename with absolute or relative path.
The default name is "unc_output + datetime.now() + .hdf5" and
the default path is taken from climada.config
Returns
-------
save_path : pathlib.Path
Path to the saved file
"""
if filename is None:
filename = "unc_output" + dt.datetime.now().strftime("%Y-%m-%d-%H%M%S")
filename = Path(DATA_DIR) / Path(filename)
save_path = Path(filename)
save_path = save_path.with_suffix(".hdf5")
LOGGER.info("Writing %s", save_path)
store = pd.HDFStore(save_path, mode="w")
for var_name, var_val in self.__dict__.items():
if isinstance(var_val, pd.DataFrame):
store.put(var_name, var_val, format="fixed", complevel=9)
store.get_storer("/samples_df").attrs.metadata = self.samples_df.attrs
store.close()
str_dt = h5py.special_dtype(vlen=str)
with h5py.File(save_path, "a") as fh:
if getattr(self, "unit"):
fh["impact_unit"] = [self.unit]
if hasattr(self, "sensitivity_method"):
if self.sensitivity_method:
fh["sensitivity_method"] = [self.sensitivity_method]
if hasattr(self, "sensitivity_kwargs"):
if self.sensitivity_kwargs:
grp = fh.create_group("sensitivity_kwargs")
for key, value in dict(self.sensitivity_kwargs).items():
ds = grp.create_dataset(key, (1,), dtype=str_dt)
ds[0] = str(value)
return save_path
[docs]
@staticmethod
def from_hdf5(filename):
"""
Load a uncertainty and uncertainty output data from .hdf5 file
Parameters
----------
filename : str or pathlib.Path
The filename with absolute or relative path.
Returns
-------
unc_output: climada.engine.uncertainty.unc_output.UncOutput
Uncertainty and sensitivity data loaded from .hdf5 file.
"""
filename = Path(filename).with_suffix(".hdf5")
if not filename.exists():
LOGGER.info("File not found")
return None
unc_data = UncOutput(pd.DataFrame())
LOGGER.info("Reading %s", filename)
store = pd.HDFStore(filename, mode="r")
for var_name in store.keys():
setattr(unc_data, var_name[1:], store.get(var_name))
unc_data.samples_df.attrs = store.get_storer("/samples_df").attrs.metadata
store.close()
with h5py.File(filename, "r") as fh:
if "impact_unit" in list(fh.keys()):
unc_data.unit = fh.get("impact_unit")[0].decode("UTF-8")
if "sensitivity_method" in list(fh.keys()):
unc_data.sensitivity_method = fh.get("sensitivity_method")[0].decode(
"UTF-8"
)
if "sensitivity_kwargs" in list(fh.keys()):
grp = fh["sensitivity_kwargs"]
sens_kwargs = {
key: u_hdf5.to_string(grp.get(key)[0]) for key in grp.keys()
}
unc_data.sensitivity_kwargs = tuple(sens_kwargs.items())
return unc_data
[docs]
class UncImpactOutput(UncOutput):
"""Extension of UncOutput specific for CalcImpact, returned by the
uncertainty() method.
"""
[docs]
def __init__(
self,
samples_df,
unit,
aai_agg_unc_df,
freq_curve_unc_df,
eai_exp_unc_df,
at_event_unc_df,
coord_df,
):
"""Constructor
Uncertainty output values from impact.calc for each sample
Parameters
----------
samples_df : pandas.DataFrame
input parameters samples
unit : str
value unit
aai_agg_unc_df : pandas.DataFrame
Each row contains the value of aai_aag for one sample (row of
samples_df)
freq_curve_unc_df : pandas.DataFrame
Each row contains the values of the impact exceedence frequency
curve for one sample (row of samples_df)
eai_exp_unc_df : pandas.DataFrame
Each row contains the values of eai_exp for one sample (row of
samples_df)
at_event_unc_df : pandas.DataFrame
Each row contains the values of at_event for one sample (row of
samples_df)
coord_df : pandas.DataFrame
Coordinates of the exposure
"""
super().__init__(samples_df, unit)
self.aai_agg_unc_df = aai_agg_unc_df
self.aai_agg_sens_df = None
self.freq_curve_unc_df = freq_curve_unc_df
self.freq_curve_sens_df = None
self.eai_exp_unc_df = eai_exp_unc_df
self.eai_exp_sens_df = None
self.at_event_unc_df = at_event_unc_df
self.at_event_sens_df = None
self.coord_df = coord_df
[docs]
class UncDeltaImpactOutput(UncOutput):
"""Extension of UncOutput specific for CalcDeltaImpact, returned by the uncertainty() method."""
[docs]
def __init__(
self,
samples_df,
unit,
aai_agg_unc_df,
freq_curve_unc_df,
eai_exp_unc_df,
at_event_initial_unc_df,
at_event_final_unc_df,
coord_df,
):
"""Constructor
Uncertainty output values from impact.calc for each sample
Parameters
----------
samples_df : pandas.DataFrame
input parameters samples
unit : str
value unit
aai_agg_unc_df : pandas.DataFrame
Each row contains the value of aai_aag for one sample (row of
samples_df)
freq_curve_unc_df : pandas.DataFrame
Each row contains the values of the impact exceedence frequency
curve for one sample (row of samples_df)
eai_exp_unc_df : pandas.DataFrame
Each row contains the values of eai_exp for one sample (row of
samples_df)
at_event_initial_unc_df : pandas.DataFrame
Each row contains the values of at_event for one sample (row of
samples_df)
at_event_final_unc_df : pandas.DataFrame
Each row contains the values of at_event for one sample (row of
samples_df)
coord_df : pandas.DataFrame
Coordinates of the exposure
"""
super().__init__(samples_df, unit)
self.aai_agg_unc_df = aai_agg_unc_df
self.aai_agg_sens_df = None
self.freq_curve_unc_df = freq_curve_unc_df
self.freq_curve_sens_df = None
self.eai_exp_unc_df = eai_exp_unc_df
self.eai_exp_sens_df = None
self.at_event_initial_unc_df = at_event_initial_unc_df
self.at_event_initial_sens_df = None
self.at_event_final_unc_df = at_event_final_unc_df
self.at_event_final_sens_df = None
self.coord_df = coord_df
[docs]
class UncCostBenefitOutput(UncOutput):
"""Extension of UncOutput specific for CalcCostBenefit, returned by the uncertainty() method."""
[docs]
def __init__(
self,
samples_df,
unit,
imp_meas_present_unc_df,
imp_meas_future_unc_df,
tot_climate_risk_unc_df,
benefit_unc_df,
cost_ben_ratio_unc_df,
cost_benefit_kwargs,
):
"""Constructor
Uncertainty output values from cost_benefit.calc for each sample
Parameters
----------
samples_df : pandas.DataFrame
input parameters samples
unit : str
value unit
imp_meas_present_unc_df : pandas.DataFrame
Each row contains the values of imp_meas_present for one sample
(row of samples_df)
imp_meas_future_unc_df : pandas.DataFrame
Each row contains the values of imp_meas_future for one sample
(row of samples_df)
tot_climate_risk_unc_df : pandas.DataFrame
Each row contains the values of tot_cliamte_risk for one sample
(row of samples_df)
benefit_unc_df : pandas.DataFrame
Each row contains the values of benefit for one sample
(row of samples_df)
cost_ben_ratio_unc_df : pandas.DataFrame
Each row contains the values of cost_ben_ratio for one sample
(row of samples_df)
cost_benefit_kwargs : pandas.DataFrame
Each row contains the value of cost_benefit for one sample
(row of samples_df)
"""
super().__init__(samples_df, unit)
self.imp_meas_present_unc_df = imp_meas_present_unc_df
self.imp_meas_present_sens_df = None
self.imp_meas_future_unc_df = imp_meas_future_unc_df
self.imp_meas_future_sens_df = None
self.tot_climate_risk_unc_df = tot_climate_risk_unc_df
self.tot_climate_risk_sens_df = None
self.benefit_unc_df = benefit_unc_df
self.benefit_sens_df = None
self.cost_ben_ratio_unc_df = cost_ben_ratio_unc_df
self.cost_ben_ratio_sens_df = None
self.cost_benefit_kwargs = cost_benefit_kwargs