Source code for climada.entity.exposures.gpw_import

"""
This file is part of CLIMADA.

Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Import data from Global Population of the World (GPW) datasets
"""
import logging
import subprocess

from osgeo import gdal
import pandas as pd
from scipy import ndimage as nd
import numpy as np

from climada.util.constants import SYSTEM_DIR
from climada.entity.exposures import litpop as LitPop
logging.root.setLevel(logging.DEBUG)
LOGGER = logging.getLogger(__name__)

FILENAME_GPW = 'gpw_v4_population_count_rev%02i_%04i_30_sec.tif'
GPW_VERSIONS = [11, 10, 12, 13]
# FILENAME_GPW1 = '_30_sec.tif'
YEARS_AVAILABLE = np.array([2000, 2005, 2010, 2015, 2020])
BUFFER_VAL = -340282306073709652508363335590014353408
# Hard coded value which is used for NANs in original GPW data



def _gpw_bbox_cutter(gpw_data, bbox, resolution, arr1_shape=[17400, 43200]):
    """Crops the imported GPW data to the bounding box to reduce memory foot
        print after it has been resized to desired resolution.

    Optional parameters:
        gpw_data (array): Imported GPW data in gridded format
        bbox (array 4x1): Bounding box to which the data is cropped.
        resolution (int): The resolution in arcsec to which the data has
            been resized.

    Returns:
        gpw_data (array): Cropped GPW data
    """

    """gpw data is 17400 rows x 43200 cols in dimension (from 85 N to 60 S in
    latitude, full longitudinal range). Hence, the bounding box can easily be
    converted to the according indices in the gpw data"""
    steps_p_res = 3600 / resolution
    zoom = 30 / resolution
    col_min, row_min, col_max, row_max =\
        LitPop._litpop_coords_in_glb_grid(bbox, resolution)

    # accomodate to fact that not the whole grid is present in the v.10 dataset:
    if arr1_shape[0] == 17400:
        row_min, row_max = int(row_min - 5 * steps_p_res), \
            int(row_max - 5 * steps_p_res)

    rows_gpw = arr1_shape[0]
    cols_gpw = arr1_shape[1]
    if col_max < (cols_gpw / zoom) - 1:
        col_max = col_max + 1
    if row_max < (rows_gpw / zoom) - 1:
        row_max = row_max + 1
    gpw_data = gpw_data[:, col_min:col_max]

    if row_min >= 0 and row_min < (rows_gpw / zoom) and row_max >= 0 \
       and row_max < (rows_gpw / zoom):
        gpw_data = gpw_data[row_min:row_max, :]
    elif row_min < 0 and row_max >= 0 and row_max < (rows_gpw / zoom):
        np.concatenate(np.zeros((abs(row_min), gpw_data.shape[1])),
                       gpw_data[0:row_max, :])
    elif row_min < 0 and row_max < 0:
        gpw_data = np.zeros((row_max - row_min, col_max - col_min))
    elif row_min < 0 and row_max >= (rows_gpw / zoom):
        np.concatenate(np.zeros((abs(row_min), gpw_data.shape[1])), gpw_data,
                       np.zeros((row_max - (rows_gpw / zoom) + 1, gpw_data.shape[1])))
    elif row_min >= (rows_gpw / zoom):
        gpw_data = np.zeros((row_max - row_min, col_max - col_min))
    return gpw_data

[docs]def check_bounding_box(coord_list): """Check if a bounding box is valid. Parameters: coord_list (4x1 array): bounding box to be checked. OUTPUT: isCorrectType (boolean): True if bounding box is valid, false otehrwise """ is_correct_type = True if coord_list.size != 4: is_correct_type = False return is_correct_type min_lat, min_lon, max_lat, max_lon = (coord_list[0], coord_list[1], coord_list[2], coord_list[3]) assert max_lat < min_lat, "Maximum latitude cannot be smaller than minimum latitude." assert max_lon < min_lon, "Maximum longitude cannot be smaller than minimum longitude." assert min_lat < -90, "Minimum latitude cannot be smaller than -90." assert min_lon < -180, "Minimum longitude cannot be smaller than -180." assert max_lat > 90, "Maximum latitude cannot be larger than 90." assert max_lon > 180, "Maximum longitude cannot be larger than 180." return is_correct_type
[docs]def get_box_gpw(**parameters): """Reads data from GPW GeoTiff file and cuts out the data along a chosen bounding box. Parameters ---------- gpw_path : pathlib.Path Absolute path where files are stored. Default: SYSTEM_DIR resolution : int The resolution in arcsec in which the data output is created. country_cut_mode : int Defines how the country is cut out: If 0, the country is only cut out with a bounding box. If 1, the country is cut out along it's borders Default: 0. #TODO: Unimplemented cut_bbox : array-like, shape (1,4) Bounding box (ESRI type) to be cut out. The layout of the bounding box corresponds to the bounding box of the ESRI shape files and is as follows: [minimum longitude, minimum latitude, maximum longitude, maxmimum latitude] If country_cut_mode = 1, the cut_bbox is overwritten/ignored. return_coords : int Determines whether latitude and longitude are delievered along with gpw data (0) or only gpw_data is returned. Default: 0. add_one : boolean Determine whether the integer one is added to all cells to eliminate zero pixels. Default: 0. #TODO: Unimplemented reference_year : int reference year, available years are: 2000, 2005, 2010, 2015 (default), 2020 Returns ------- tile_temp : pandas.arrays.SparseArray GPW data lon : list List with longitudinal infomation on the GPW data. Same dimensionality as tile_temp (only returned if return_coords is 1). lat : list list with latitudinal infomation on the GPW data. Same dimensionality as tile_temp (only returned if return_coords is 1). """ resolution = parameters.get('resolution', 30) cut_bbox = parameters.get('cut_bbox') # country_cut_mode = parameters.get('country_cut_mode', 0) return_coords = parameters.get('return_coords', 0) reference_year = parameters.get('reference_year', 2015) year = YEARS_AVAILABLE[np.abs(YEARS_AVAILABLE - reference_year).argmin()] if year != reference_year: LOGGER.info('Reference year: %i. Using nearest available year for GWP population data: %i', reference_year, year) if (cut_bbox is None) & (return_coords == 0): # If we don't have any bbox by now and we need one, we just use the global cut_bbox = np.array((-180, -90, 180, 90)) zoom_factor = 30 / resolution # Orignal resolution is arc-seconds file_exists = False for ver in GPW_VERSIONS: gpw_path = parameters.get('gpw_path', SYSTEM_DIR) fpath = gpw_path.joinpath(FILENAME_GPW % (ver, year)) if fpath.is_file(): file_exists = True LOGGER.info('GPW Version v4.%2i', ver) break try: if not file_exists: if SYSTEM_DIR.joinpath('GPW_help.pdf').is_file(): subprocess.Popen([str(SYSTEM_DIR.joinpath('GPW_help.pdf'))], shell=True) raise FileExistsError(f'The file {fpath} could not ' + 'be found. Please download the file ' + 'first or choose a different folder. ' + 'Instructions on how to download the ' + 'file has been openend in your PDF ' + 'viewer.') else: raise FileExistsError(f'The file {fpath} could not ' + 'be found. Please download the file ' + 'first or choose a different folder. ' + 'The data can be downloaded from ' + 'http://sedac.ciesin.columbia.edu/' + 'data/collection/gpw-v4/sets/browse') LOGGER.debug('Importing %s', str(fpath)) gpw_file = gdal.Open(str(fpath)) band1 = gpw_file.GetRasterBand(1) arr1 = band1.ReadAsArray() del band1, gpw_file arr1[arr1 < 0] = 0 if arr1.shape != (17400, 43200) and arr1.shape != (21600, 43200): LOGGER.warning('GPW data dimensions mismatch. Actual dimensions: %s x %s', arr1.shape[0], arr1.shape[1]) LOGGER.warning('Expected dimensions: 17400x43200 or 21600x43200.') if zoom_factor != 1: total_population = arr1.sum() tile_temp = nd.zoom(arr1, zoom_factor, order=1) # normalize interpolated gridded population count to keep total population stable: tile_temp = tile_temp * (total_population / tile_temp.sum()) else: tile_temp = arr1 if tile_temp.ndim == 2: if cut_bbox is not None: tile_temp = _gpw_bbox_cutter(tile_temp, cut_bbox, resolution, arr1_shape=arr1.shape) else: LOGGER.error('Error: Matrix has an invalid number of dimensions \ (more than 2). Could not continue operation.') raise TypeError tile_temp = pd.arrays.SparseArray( tile_temp.reshape((tile_temp.size,), order='F'), fill_value=0) del arr1 if return_coords == 1: lon = tuple((cut_bbox[0], 1 / (3600 / resolution))) lat = tuple((cut_bbox[1], 1 / (3600 / resolution))) return tile_temp, lon, lat return tile_temp except: LOGGER.error('Importing the GPW population density file failed.') raise