Source code for climada.util.yearsets

"""
This file is part of CLIMADA.
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
---
Define functions to handle impact_yearsets
"""

import copy
import logging
import numpy as np
from numpy.random import default_rng

import climada.util.dates_times as u_dt

LOGGER = logging.getLogger(__name__)

[docs] def impact_yearset(imp, sampled_years, lam=None, correction_fac=True, seed=None): """Create a yearset of impacts (yimp) containing a probabilistic impact for each year in the sampled_years list by sampling events from the impact received as input with a Poisson distribution centered around lam per year (lam = sum(imp.frequency)). In contrast to the expected annual impact (eai) yimp contains impact values that differ among years. When correction factor is true, the yimp are scaled such that the average over all years is equal to the eai. Parameters ----------- imp : climada.engine.Impact() impact object containing impacts per event sampled_years : list A list of years that shall be covered by the resulting yimp. seed : Any, optional seed for the default bit generator default: None Optional parameters lam: int The applied Poisson distribution is centered around lam events per year. If no lambda value is given, the default lam = sum(imp.frequency) is used. correction_fac : boolean If True a correction factor is applied to the resulting yimp. It is scaled in such a way that the expected annual impact (eai) of the yimp equals the eai of the input impact Returns ------- yimp : climada.engine.Impact() yearset of impacts containing annual impacts for all sampled_years sampling_vect : 2D array The sampling vector specifies how to sample the yimp, it consists of one sub-array per sampled_year, which contains the event_ids of the events used to calculate the annual impacts. Can be used to re-create the exact same yimp. """ n_sampled_years = len(sampled_years) #create sampling vector if not lam: lam = np.sum(imp.frequency) events_per_year = sample_from_poisson(n_sampled_years, lam, seed=seed) sampling_vect = sample_events(events_per_year, imp.frequency, seed=seed) #compute impact per sampled_year imp_per_year = compute_imp_per_year(imp, sampling_vect) #copy imp object as basis for the yimp object yimp = copy.deepcopy(imp) #save imp_per_year in yimp if correction_fac: #adjust for sampling error yimp.at_event = imp_per_year / calculate_correction_fac(imp_per_year, imp) else: yimp.at_event = imp_per_year #save calculations in yimp yimp.event_id = np.arange(1, n_sampled_years+1) yimp.date = u_dt.str_to_date([str(date) + '-01-01' for date in sampled_years]) yimp.frequency = np.ones(n_sampled_years)*sum(len(row) for row in sampling_vect )/n_sampled_years return yimp, sampling_vect
[docs] def impact_yearset_from_sampling_vect(imp, sampled_years, sampling_vect, correction_fac=True): """Create a yearset of impacts (yimp) containing a probabilistic impact for each year in the sampled_years list by sampling events from the impact received as input following the sampling vector provided. In contrast to the expected annual impact (eai) yimp contains impact values that differ among years. When correction factor is true, the yimp are scaled such that the average over all years is equal to the eai. Parameters ----------- imp : climada.engine.Impact() impact object containing impacts per event sampled_years : list A list of years that shall be covered by the resulting yimp. sampling_vect : 2D array The sampling vector specifies how to sample the yimp, it consists of one sub-array per sampled_year, which contains the event_ids of the events used to calculate the annual impacts. It needs to be obtained in a first call, i.e. [yimp, sampling_vect] = climada_yearsets.impact_yearset(...) and can then be provided in this function to obtain the exact same sampling (also for a different imp object) Optional parameter correction_fac : boolean If True a correction factor is applied to the resulting yimp. It is scaled in such a way that the expected annual impact (eai) of the yimp equals the eai of the input impact Returns ------- yimp : climada.engine.Impact() yearset of impacts containing annual impacts for all sampled_years """ #compute impact per sampled_year imp_per_year = compute_imp_per_year(imp, sampling_vect) #copy imp object as basis for the yimp object yimp = copy.deepcopy(imp) if correction_fac: #adjust for sampling error imp_per_year = imp_per_year / calculate_correction_fac(imp_per_year, imp) #save calculations in yimp yimp.at_event = imp_per_year n_sampled_years = len(sampled_years) yimp.event_id = np.arange(1, n_sampled_years+1) yimp.date = u_dt.str_to_date([str(date) + '-01-01' for date in sampled_years]) yimp.frequency = np.ones(n_sampled_years)*sum(len(row) for row in sampling_vect )/n_sampled_years return yimp
[docs] def sample_from_poisson(n_sampled_years, lam, seed=None): """Sample the number of events for n_sampled_years Parameters ----------- n_sampled_years : int The target number of years the impact yearset shall contain. lam: float the applied Poisson distribution is centered around lambda events per year seed : int, optional seed for numpy.random, will be set if not None default: None Returns ------- events_per_year : np.ndarray Number of events per sampled year """ if seed is not None: np.random.seed(seed) return np.round(np.random.poisson(lam=lam, size=n_sampled_years)).astype('int')
[docs] def sample_events(events_per_year, freqs_orig, seed=None): """Sample events uniformely from an array (indices_orig) without replacement (if sum(events_per_year) > n_input_events the input events are repeated (tot_n_events/n_input_events) times, by ensuring that the same events doens't occur more than once per sampled year). Parameters ----------- events_per_year : np.ndarray Number of events per sampled year freqs_orig : np.ndarray Frequency of each input event seed : Any, optional seed for the default bit generator. Default: None Returns ------- sampling_vect : 2D array The sampling vector specifies how to sample the yimp, it consists of one sub-array per sampled_year, which contains the event_ids of the events used to calculate the annual impacts. """ sampling_vect = [] indices_orig = np.arange(len(freqs_orig)) freqs = freqs_orig indices = indices_orig #sample events for each sampled year for amount_events in events_per_year: #if there are not enough input events, choice with no replace will fail if amount_events > len(freqs_orig): raise ValueError(f"cannot sample {amount_events} distinct events for a single year" f" when there are only {len(freqs_orig)} input events") #add the original indices and frequencies to the pool if there are less events #in the pool than needed to fill the year one is sampling for #or if the pool is empty (not covered in case amount_events is 0) if len(np.unique(indices)) < amount_events or len(indices) == 0: indices = np.append(indices, indices_orig) freqs = np.append(freqs, freqs_orig) #ensure that each event only occurs once per sampled year unique_events = np.unique(indices, return_index=True)[0] probab_dis = freqs[np.unique(indices, return_index=True)[1]]/( np.sum(freqs[np.unique(indices, return_index=True)[1]])) #sample events rng = default_rng(seed) selected_events = rng.choice(unique_events, size=amount_events, replace=False, p=probab_dis).astype('int') #determine used events to remove them from sampling pool idx_to_remove = [np.where(indices == event)[0][0] for event in selected_events] indices = np.delete(indices, idx_to_remove) freqs = np.delete(freqs, idx_to_remove) #save sampled events in sampling vector sampling_vect.append(selected_events) return sampling_vect
[docs] def compute_imp_per_year(imp, sampling_vect): """Sample annual impacts from the given event_impacts according to the sampling dictionary Parameters ----------- imp : climada.engine.Impact() impact object containing impacts per event sampling_vect : 2D array The sampling vector specifies how to sample the yimp, it consists of one sub-array per sampled_year, which contains the event_ids of the events used to calculate the annual impacts. Returns ------- imp_per_year: np.ndarray Sampled impact per year (length = sampled_years) """ imp_per_year = [np.sum(imp.at_event[list(sampled_events)]) for sampled_events in sampling_vect] return np.array(imp_per_year)
[docs] def calculate_correction_fac(imp_per_year, imp): """Calculate a correction factor that can be used to scale the yimp in such a way that the expected annual impact (eai) of the yimp amounts to the eai of the input imp Parameters ----------- imp_per_year : np.ndarray sampled yimp imp : climada.engine.Impact() impact object containing impacts per event Returns ------- correction_factor: int The correction factor is calculated as imp_eai/yimp_eai """ yimp_eai = np.sum(imp_per_year)/len(imp_per_year) imp_eai = np.sum(imp.frequency*imp.at_event) correction_factor = imp_eai/yimp_eai LOGGER.info("The correction factor amounts to %s", (correction_factor-1)*100) # if correction_factor > 0.1: # tex = raw_input("Do you want to exclude small events?") return correction_factor