Source code for climada.engine.calibration_opt

"""
This file is part of CLIMADA.

Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Impact function calibration functionalities:
    Optimization and manual calibration
"""

import datetime as dt
import copy
import itertools
import logging
import numpy as np
import pandas as pd
from scipy import interpolate
from scipy.optimize import minimize

from climada.engine import ImpactCalc
from climada.entity import ImpactFuncSet, ImpfTropCyclone, impact_funcs
from climada.engine.impact_data import emdat_impact_yearlysum  #, emdat_impact_event

LOGGER = logging.getLogger(__name__)



[docs] def calib_instance(hazard, exposure, impact_func, df_out=pd.DataFrame(), yearly_impact=False, return_cost='False'): """calculate one impact instance for the calibration algorithm and write to given DataFrame Parameters ---------- hazard : Hazard exposure : Exposure impact_func : ImpactFunc df_out : Dataframe, optional Output DataFrame with headers of columns defined and optionally with first row (index=0) defined with values. If columns "impact", "event_id", or "year" are not included, they are created here. Data like reported impacts or impact function parameters can be given here; values are preserved. yearly_impact : boolean, optional if set True, impact is returned per year, not per event return_cost : str, optional if not 'False' but any of 'R2', 'logR2', cost is returned instead of df_out Returns ------- df_out: DataFrame DataFrame with modelled impact written to rows for each year or event. """ ifs = ImpactFuncSet([impact_func]) impacts = ImpactCalc(exposures=exposure, impfset=ifs, hazard=hazard)\ .impact(assign_centroids=False) if yearly_impact: # impact per year iys = impacts.impact_per_year(all_years=True) # Loop over whole year range: if df_out.empty | df_out.index.shape[0] == 1: for cnt_, year in enumerate(np.sort(list((iys.keys())))): if cnt_ > 0: df_out.loc[cnt_] = df_out.loc[0] # copy info from first row if year in iys: df_out.loc[cnt_, 'impact_CLIMADA'] = iys[year] else: df_out.loc[cnt_, 'impact_CLIMADA'] = 0.0 df_out.loc[cnt_, 'year'] = year else: years_in_common = df_out.loc[df_out['year'].isin(np.sort(list((iys.keys())))), 'year'] for cnt_, year in years_in_common.iteritems(): df_out.loc[df_out['year'] == year, 'impact_CLIMADA'] = iys[year] else: # impact per event if df_out.empty | df_out.index.shape[0] == 1: for cnt_, impact in enumerate(impacts.at_event): if cnt_ > 0: df_out.loc[cnt_] = df_out.loc[0] # copy info from first row df_out.loc[cnt_, 'impact_CLIMADA'] = impact df_out.loc[cnt_, 'event_id'] = int(impacts.event_id[cnt_]) df_out.loc[cnt_, 'event_name'] = impacts.event_name[cnt_] df_out.loc[cnt_, 'year'] = \ dt.datetime.fromordinal(impacts.date[cnt_]).year df_out.loc[cnt_, 'date'] = impacts.date[cnt_] elif df_out.index.shape[0] == impacts.at_event.shape[0]: for cnt_, (impact, ind) in enumerate(zip(impacts.at_event, df_out.index)): df_out.loc[ind, 'impact_CLIMADA'] = impact df_out.loc[ind, 'event_id'] = int(impacts.event_id[cnt_]) df_out.loc[ind, 'event_name'] = impacts.event_name[cnt_] df_out.loc[ind, 'year'] = \ dt.datetime.fromordinal(impacts.date[cnt_]).year df_out.loc[ind, 'date'] = impacts.date[cnt_] else: raise ValueError('adding simulated impacts to reported impacts not' ' yet implemented. use yearly_impact=True or run' ' without init_impact_data.') if return_cost != 'False': df_out = calib_cost_calc(df_out, return_cost) return df_out
[docs] def init_impf(impf_name_or_instance, param_dict, df_out=pd.DataFrame(index=[0])): """create an ImpactFunc based on the parameters in param_dict using the method specified in impf_parameterisation_name and document it in df_out. Parameters ---------- impf_name_or_instance : str or ImpactFunc method of impact function parameterisation e.g. 'emanuel' or an instance of ImpactFunc param_dict : dict, optional dict of parameter_names and values e.g. {'v_thresh': 25.7, 'v_half': 70, 'scale': 1} or {'mdd_shift': 1.05, 'mdd_scale': 0.8, 'paa_shift': 1, paa_scale': 1} Returns ------- imp_fun : ImpactFunc The Impact function based on the parameterisation df_out : DataFrame Output DataFrame with headers of columns defined and with first row (index=0) defined with values. The impact function parameters from param_dict are represented here. """ impact_func_final = None if isinstance(impf_name_or_instance, str): if impf_name_or_instance == 'emanuel': impact_func_final = ImpfTropCyclone.from_emanuel_usa(**param_dict) impact_func_final.haz_type = 'TC' impact_func_final.id = 1 df_out['impact_function'] = impf_name_or_instance elif isinstance(impf_name_or_instance, impact_funcs.ImpactFunc): impact_func_final = change_impf(impf_name_or_instance, param_dict) df_out['impact_function'] = ('given_' + impact_func_final.haz_type + str(impact_func_final.id)) for key, val in param_dict.items(): df_out[key] = val return impact_func_final, df_out
[docs] def change_impf(impf_instance, param_dict): """apply a shifting or a scaling defined in param_dict to the impact function in impf_istance and return it as a new ImpactFunc object. Parameters ---------- impf_instance : ImpactFunc an instance of ImpactFunc param_dict : dict dict of parameter_names and values (interpreted as factors, 1 = neutral) e.g. {'mdd_shift': 1.05, 'mdd_scale': 0.8, 'paa_shift': 1, paa_scale': 1} Returns ------- ImpactFunc : The Impact function based on the parameterisation """ ImpactFunc_new = copy.deepcopy(impf_instance) # create higher resolution impact functions (intensity, mdd ,paa) paa_func = interpolate.interp1d(ImpactFunc_new.intensity, ImpactFunc_new.paa, fill_value='extrapolate') mdd_func = interpolate.interp1d(ImpactFunc_new.intensity, ImpactFunc_new.mdd, fill_value='extrapolate') temp_dict = dict() temp_dict['paa_intensity_ext'] = np.linspace(ImpactFunc_new.intensity.min(), ImpactFunc_new.intensity.max(), (ImpactFunc_new.intensity.shape[0] + 1) * 10 + 1) temp_dict['mdd_intensity_ext'] = np.linspace(ImpactFunc_new.intensity.min(), ImpactFunc_new.intensity.max(), (ImpactFunc_new.intensity.shape[0] + 1) * 10 + 1) temp_dict['paa_ext'] = paa_func(temp_dict['paa_intensity_ext']) temp_dict['mdd_ext'] = mdd_func(temp_dict['mdd_intensity_ext']) # apply changes given in param_dict for key, val in param_dict.items(): field_key, action = key.split('_') if action == 'shift': shift_absolut = ( ImpactFunc_new.intensity[np.nonzero(getattr(ImpactFunc_new, field_key))[0][0]] * (val - 1)) temp_dict[field_key + '_intensity_ext'] = \ temp_dict[field_key + '_intensity_ext'] + shift_absolut elif action == 'scale': temp_dict[field_key + '_ext'] = \ np.clip(temp_dict[field_key + '_ext'] * val, a_min=0, a_max=1) else: raise AttributeError('keys in param_dict not recognized. Use only:' 'paa_shift, paa_scale, mdd_shift, mdd_scale') # map changed, high resolution impact functions back to initial resolution ImpactFunc_new.intensity = np.linspace(ImpactFunc_new.intensity.min(), ImpactFunc_new.intensity.max(), (ImpactFunc_new.intensity.shape[0] + 1) * 10 + 1) paa_func_new = interpolate.interp1d(temp_dict['paa_intensity_ext'], temp_dict['paa_ext'], fill_value='extrapolate') mdd_func_new = interpolate.interp1d(temp_dict['mdd_intensity_ext'], temp_dict['mdd_ext'], fill_value='extrapolate') ImpactFunc_new.paa = paa_func_new(ImpactFunc_new.intensity) ImpactFunc_new.mdd = mdd_func_new(ImpactFunc_new.intensity) return ImpactFunc_new
[docs] def init_impact_data(hazard_type, region_ids, year_range, source_file, reference_year, impact_data_source='emdat', yearly_impact=True): """creates a dataframe containing the recorded impact data for one hazard type and one area (countries, country or local split) Parameters ---------- hazard_type : str default = 'TC', type of hazard 'WS','FL' etc. region_ids : str name the region_ids or country names year_range : list list containting start and end year. e.g. [1980, 2017] source_file : str reference_year : int impacts will be scaled to this year impact_data_source : str, optional default 'emdat', others maybe possible yearly_impact : bool, optional if set True, impact is returned per year, not per event Returns ------- df_out : pd.DataFrame Dataframe with recorded impact written to rows for each year or event. """ if impact_data_source == 'emdat': if yearly_impact: em_data = emdat_impact_yearlysum(source_file, countries=region_ids, hazard=hazard_type, year_range=year_range, reference_year=reference_year) else: raise ValueError('init_impact_data not yet implemented for yearly_impact = False.') #em_data = emdat_impact_event(source_file) else: raise ValueError('init_impact_data not yet implemented for other impact_data_sources ' 'than emdat.') return em_data
[docs] def calib_cost_calc(df_out, cost_function): """calculate the cost function of the modelled impact impact_CLIMADA and the reported impact impact_scaled in df_out Parameters ---------- df_out : pd.Dataframe DataFrame as created in calib_instance cost_function : str chooses the cost function e.g. 'R2' or 'logR2' Returns ------- cost : float The results of the cost function when comparing modelled and reported impact """ if cost_function == 'R2': cost = np.sum((pd.to_numeric(df_out['impact_scaled']) - pd.to_numeric(df_out['impact_CLIMADA']))**2) elif cost_function == 'logR2': impact1 = pd.to_numeric(df_out['impact_scaled']) impact1[impact1 <= 0] = 1 impact2 = pd.to_numeric(df_out['impact_CLIMADA']) impact2[impact2 <= 0] = 1 cost = np.sum((np.log(impact1) - np.log(impact2))**2) else: raise ValueError('This cost function is not implemented.') return cost
[docs] def calib_all(hazard, exposure, impf_name_or_instance, param_full_dict, impact_data_source, year_range, yearly_impact=True): """portrait the difference between modelled and reported impacts for all impact functions described in param_full_dict and impf_name_or_instance Parameters ---------- hazard : list or Hazard exposure : list or Exposures list or instance of exposure of full countries impf_name_or_instance: string or ImpactFunc the name of a parameterisation or an instance of class ImpactFunc e.g. 'emanuel' param_full_dict : dict a dict containing keys used for f_name_or_instance and values which are iterable (lists) e.g. {'v_thresh' : [25.7, 20], 'v_half': [70], 'scale': [1, 0.8]} impact_data_source : dict or pd.Dataframe with name of impact data source and file location or dataframe year_range : list yearly_impact : bool, optional Returns ------- df_result : pd.DataFrame df with modelled impact written to rows for each year or event. """ df_result = None # init return variable # prepare hazard and exposure region_ids = list(np.unique(exposure.region_id)) hazard_type = hazard.haz_type exposure.assign_centroids(hazard) # prepare impact data if isinstance(impact_data_source, pd.DataFrame): df_impact_data = impact_data_source else: if list(impact_data_source.keys()) == ['emdat']: df_impact_data = init_impact_data(hazard_type, region_ids, year_range, impact_data_source['emdat'], year_range[-1]) else: raise ValueError('other impact data sources not yet implemented.') params_generator = (dict(zip(param_full_dict, x)) for x in itertools.product(*param_full_dict.values())) for param_dict in params_generator: print(param_dict) df_out = copy.deepcopy(df_impact_data) impact_func_final, df_out = init_impf(impf_name_or_instance, param_dict, df_out) df_out = calib_instance(hazard, exposure, impact_func_final, df_out, yearly_impact) if df_result is None: df_result = copy.deepcopy(df_out) else: df_result = df_result.append(df_out, input) return df_result
[docs] def calib_optimize(hazard, exposure, impf_name_or_instance, param_dict, impact_data_source, year_range, yearly_impact=True, cost_fucntion='R2', show_details=False): """portrait the difference between modelled and reported impacts for all impact functions described in param_full_dict and impf_name_or_instance Parameters ---------- hazard: list or Hazard exposure: list or Exposures list or instance of exposure of full countries impf_name_or_instance: string or ImpactFunc the name of a parameterisation or an instance of class ImpactFunc e.g. 'emanuel' param_dict : dict a dict containing keys used for impf_name_or_instance and one set of values e.g. {'v_thresh': 25.7, 'v_half': 70, 'scale': 1} impact_data_source : dict or pd. dataframe with name of impact data source and file location or dataframe year_range : list yearly_impact : bool, optional cost_function : str, optional the argument for function calib_cost_calc, default 'R2' show_details : bool, optional if True, return a tuple with the parameters AND the details of the optimization like success, status, number of iterations etc Returns ------- param_dict_result : dict or tuple the parameters with the best calibration results (or a tuple with (1) the parameters and (2) the optimization output) """ param_dict_result = param_dict # prepare hazard and exposure region_ids = list(np.unique(exposure.region_id)) hazard_type = hazard.haz_type exposure.assign_centroids(hazard) # prepare impact data if isinstance(impact_data_source, pd.DataFrame): df_impact_data = impact_data_source else: if list(impact_data_source.keys()) == ['emdat']: df_impact_data = init_impact_data(hazard_type, region_ids, year_range, impact_data_source['emdat'], year_range[-1]) else: raise ValueError('other impact data sources not yet implemented.') # definie specific function to def specific_calib(values): param_dict_temp = dict(zip(param_dict.keys(), values)) print(param_dict_temp) return calib_instance(hazard, exposure, init_impf(impf_name_or_instance, param_dict_temp)[0], df_impact_data, yearly_impact=yearly_impact, return_cost=cost_fucntion) # define constraints if impf_name_or_instance == 'emanuel': cons = [{'type': 'ineq', 'fun': lambda x: -x[0] + x[1]}, {'type': 'ineq', 'fun': lambda x: -x[2] + 0.9999}, {'type': 'ineq', 'fun': lambda x: x[2]}] else: cons = [{'type': 'ineq', 'fun': lambda x: -x[0] + 2}, {'type': 'ineq', 'fun': lambda x: x[0]}, {'type': 'ineq', 'fun': lambda x: -x[1] + 2}, {'type': 'ineq', 'fun': lambda x: x[1]}] values = list(param_dict.values()) res = minimize(specific_calib, values, # bounds=bounds, # bounds=((0.0, np.inf), (0.0, np.inf), (0.0, 1.0)), constraints=cons, # method='SLSQP', method='trust-constr', options={'xtol': 1e-5, 'disp': True, 'maxiter': 500}) param_dict_result = dict(zip(param_dict.keys(), res.x)) if res.success: LOGGER.info('Optimization successfully finished.') else: LOGGER.info('Opimization did not finish successfully. Check you input' ' or consult the detailed returns (with argument' 'show_details=True) for further information.') if show_details: return param_dict_result, res return param_dict_result
# if __name__ == "__main__": # # # ## tryout calib_all # hazard = TropCyclone.from_hdf5('C:/Users/ThomasRoosli/tc_NA_hazard.hdf5') # exposure = LitPop.from_hdf5('C:/Users/ThomasRoosli/DOM_LitPop.hdf5') # impf_name_or_instance = 'emanuel' # param_full_dict = {'v_thresh': [25.7, 20], 'v_half': [70], 'scale': [1, 0.8]} # # impact_data_source = {'emdat':('D:/Documents_DATA/EM-DAT/' # '20181031_disaster_list_all_non-technological/' # 'ThomasRoosli_2018-10-31.csv')} # year_range = [2004, 2017] # yearly_impact = True # df_result = calib_all(hazard,exposure,impf_name_or_instance,param_full_dict, # impact_data_source, year_range, yearly_impact) # # # ## tryout calib_optimize # hazard = TropCyclone.from_hdf5('C:/Users/ThomasRoosli/tc_NA_hazard.hdf5') # exposure = LitPop.from_hdf5('C:/Users/ThomasRoosli/DOM_LitPop.hdf5') # impf_name_or_instance = 'emanuel' # param_dict = {'v_thresh': 25.7, 'v_half': 70, 'scale': 0.6} # year_range = [2004, 2017] # cost_function = 'R2' # show_details = True # yearly_impact = True # impact_data_source = {'emdat':('D:/Documents_DATA/EM-DAT/' # '20181031_disaster_list_all_non-technological/' # 'ThomasRoosli_2018-10-31.csv')} # param_result,result = calib_optimize(hazard,exposure,impf_name_or_instance,param_dict, # impact_data_source, year_range, yearly_impact=yearly_impact, # cost_fucntion=cost_function,show_details= show_details) # #