Source code for rcatool.stats.event_duration

"""
Event Duration Analysis (EDA) of Precipitation
----------------------------------------------

Author: Petter Lind
Created: Fall 2020
Updates:

"""

import numpy as np


[docs] def eda(data, keepdims=False, axis=0, thr=0.1, duration_bins=None, event_statistic='amount', statistic_bins=None, dry_events=False, dry_bins=None): """ Calculate event duration statistics Parameters ---------- data: array 2D or 1D array of data. All data points are collectively used in the asop calculation unless 'keepdims' is True. Then calculation is performed along zeroth axis (expected time dimension). keepdims: boolean If data is 2d (time in third dimesion) and keepdims is set to True, calculation is applied to the dimension defined by axis argument (default 0) and returns a 2d array of asop components. If set to False (default) all values are collectively assembled before calculation. axis: int The axis over which to apply the calculation if keepdims is set to True. Default is 0. event_statistic: str The statistic to calculate for each event; choices are 'amount', 'mean int' or 'max int'. duration_bins: list/array Defines the bin edges for event durations, including the rightmost edge, allowing for non-uniform bin widths. statistic_bins: list/array Defines the bin edges for event statistic (amount/mean/max), including the rightmost edge, allowing for non-uniform bin widths. thr: float Value of threshold to identify start/end of events. Default 0.1. dry_events: bool If set to True, duration of dry intervals will be calculated. 'dry_bins' must then be provided. Returns ------- eda_arr: array data array with frequency of event statistic (amount, mean, max) per duration bin. """ def eda_calc(pdata, inthr, dur_bins, stat, stat_bins, calc_dry, dbins): # Flatten data to one dimension if isinstance(pdata, np.ma.MaskedArray): data1d = pdata.compressed() elif isinstance(pdata, (list, tuple)): data1d = np.array(pdata) else: data1d = pdata.ravel() if all(np.isnan(data1d)): print("All data missing/masked!") eda_arr = np.zeros((dur_bins.size-1, stat_bins.size-1)) eda_arr[:] = np.nan else: # if any(np.isnan(data1d)): # data1d = data1d[~np.isnan(data1d)] # When is data above threshold indata = data1d >= inthr # Make sure all events are well-bounded bounded = np.hstack(([0], indata, [0])) # Identify start and end of events diffs = np.diff(bounded) run_starts, = np.where(diffs > 0) run_ends, = np.where(diffs < 0) # Calculate durations durations = run_ends - run_starts if stat == 'amount': stat_data = np.array([np.sum(data1d[s:e]) for s, e in zip(run_starts, run_ends)]) elif stat == 'mean int': stat_data = np.array([np.mean(data1d[s:e]) for s, e in zip(run_starts, run_ends)]) elif stat == 'max int': stat_data = np.array([np.max(data1d[s:e]) for s, e in zip(run_starts, run_ends)]) stat_dict = {bint: stat_data[durations == bint] if bint in durations else np.nan for bint in dur_bins} eda_arr = np.array( [np.histogram(arr, bins=stat_bins)[0] for d, arr in stat_dict.items()]).swapaxes(0, 1) if calc_dry: dry = ~indata bounded_dry = np.hstack(([0], dry, [0])) diffs_dry = np.diff(bounded_dry) run_starts_dry, = np.where(diffs_dry > 0) run_ends_dry, = np.where(diffs_dry < 0) dry_durations = run_ends_dry - run_starts_dry dry_data = np.histogram(dry_durations, bins=dbins)[0] eda_arr = np.c_[dry_data, eda_arr] return eda_arr # Bins msg = "\t\n'N.B. 'duration_bins' must be provided!" assert duration_bins is not None, msg msg = "\t\n'N.B. 'statistic_bins' must be provided!" assert statistic_bins is not None, msg if dry_events: msg = "\t\n'dry_bins' must be provided if 'dry_events' is True!" assert dry_bins is not None, msg if keepdims: eda_comp = np.apply_along_axis( eda_calc, axis=axis, arr=data, inthr=thr, dur_bins=duration_bins, stat=event_statistic, stat_bins=statistic_bins, calc_dry=dry_events, dbins=dry_bins) else: eda_comp = eda_calc(data, inthr=thr, dur_bins=duration_bins, stat=event_statistic, stat_bins=statistic_bins, calc_dry=dry_events, dbins=dry_bins) return eda_comp