Source code for pyveg.src.plotting

"""
Plotting code.
"""

import os
import json
import datetime

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from pandas.plotting import register_matplotlib_converters

from pyveg.src.data_analysis_utils import (
    get_AR1_parameter_estimate,
    get_kendell_tau,
    write_to_json,
    stl_decomposition,
    get_max_lagged_cor,
    get_corrs_by_lag,
    get_datetime_xs,
)

# globally set image quality
DPI = 150


[docs]def plot_time_series(df, output_dir):
    """
    Given a time series DataFrames (constructed with `make_time_series`),
    plot the vegetitation and precipitation time series.

    Parameters
    ----------
    df : DataFrame
        Time series DataFrame.

    output_dir : str
        Directory to save the plots in.
    """

    def make_plot(df, veg_prefix, output_dir, veg_prefix_b=None, smoothing_option='smooth'):

        # handle the case where vegetation and precipitation have mismatched NaNs
        veg_df = df.dropna(subset=[veg_prefix+'_offset50_mean'])

        # get vegetation x values to datetime objects
        veg_xs = get_datetime_xs(veg_df)

        # get vegetation y values
        veg_means = veg_df[veg_prefix + '_offset50_mean']
        veg_std = veg_df[veg_prefix + '_offset50_std']

        # create a figure
        fig, ax = plt.subplots(figsize=(15, 4.5))
        plt.xlabel('Time', fontsize=14)

        # set up veg y axis
        color = 'tab:green'
        ax.set_ylabel(f'{veg_prefix} Offset50', color=color, fontsize=14)
        ax.tick_params(axis='y', labelcolor=color)
        ax.set_ylim([veg_means.min() - 1*veg_std.max(), veg_means.max() + 3*veg_std.max()])


        # plot smoothed vegetation means and std
        ax.plot(veg_xs, veg_means, marker='o', markersize=7,
                markeredgecolor=(0.9172, 0.9627, 0.9172), markeredgewidth=2,
                label='Offset50', linewidth=2, color='green')

        ax.fill_between(veg_xs, veg_means - veg_std, veg_means + veg_std,
                        facecolor='green', alpha=0.1, label='Std Dev')

        #    # get smoothed mean, std
        veg_means_smooth = veg_df[veg_prefix+'_offset50_'+smoothing_option+'_mean']

        # plot vegetation legend
        plt.legend(loc='upper left')

        # plot precipitation if availible
        if 'total_precipitation' in df.columns:
            # handle the case where vegetation and precipitation have mismatched NaNs
            precip_df = df.dropna(subset=['total_precipitation'])
            precip_ys = precip_df.total_precipitation

            # get precipitation x values to datetime objects
            precip_xs = get_datetime_xs(precip_df)

            # duplicate axis for preciptation
            ax2 = ax.twinx()
            color = 'tab:blue'
            ax2.set_ylabel(f'Precipitation [mm]', color=color, fontsize=14)
            ax2.tick_params(axis='y', labelcolor=color)
            ax2.set_ylim([min(precip_ys)-1*np.array(precip_ys).std(), max(precip_ys)+2*np.array(precip_ys).std()])

            # plot precipitation
            ax2.plot(precip_xs, precip_ys, linewidth=2, color=color, alpha=0.75)

            # add veg-precip correlation
            max_corr_smooth, max_corr = get_max_lagged_cor(os.path.dirname(output_dir), veg_prefix)
            textstr = f'$r_{{t-{max_corr[1]}}}={max_corr[0]:.2f}$'

            # old correlation just calculates the 0-lag correlation
            #raw_corr = veg_means.corr(precip_ys)
            #smoothed_corr = veg_means_smooth.corr(precip_ys)
            #textstr = f'$r={smoothed_corr:.2f}$ (${raw_corr:.2f}$ unsmoothed)'
            ax2.text(0.13, 0.95, textstr, transform=ax2.transAxes, fontsize=14, verticalalignment='top')

        # plot second vegetation time series if availible
        if veg_prefix_b:

            # handle the case where vegetation and precipitation have mismatched NaNs
            veg_df_b = df.dropna(subset=[veg_prefix_b+'_offset50_mean'])

            # get vegetation x values to datetime objects
            veg_xs_b = get_datetime_xs(veg_df_b)

            # get vegetation y values
            veg_means_b = veg_df_b[veg_prefix_b+'_offset50_mean']
            veg_means_smooth_b = veg_df_b[veg_prefix_b+'_offset50_smooth_mean']
            veg_stds_smooth_b = veg_df_b[veg_prefix_b+'_offset50_smooth_std']

            # plot secondary time series
            ax3 = ax.twinx()
            ax3.spines["left"].set_position(("axes", -0.08))
            ax3.spines["left"].set_visible(True)
            color = 'tab:purple'
            ax3.set_ylabel(veg_prefix_b + ' Offset50', color=color, fontsize=14)
            ax3.tick_params(axis='y', labelcolor=color)
            ax3.yaxis.tick_left()
            ax3.yaxis.set_label_position('left')
            ax3.set_ylim([veg_means.min() - 1*veg_std.max(), veg_means.max() + 3*veg_std.max()])

            # plot unsmoothed vegetation means
            ax.plot(veg_xs_b, veg_means_b, label='Unsmoothed', linewidth=1, color='indigo', linestyle='dashed', alpha=0.2)

            # plot smoothed vegetation means and std
            ax3.plot(veg_xs_b, veg_means_smooth_b, marker='o', markersize=7, 
                    markeredgecolor=(0.8172, 0.7627, 0.9172), markeredgewidth=2, 
                    label='Smoothed', linewidth=2, color=color)

            ax3.fill_between(veg_xs_b, veg_means_smooth_b - veg_stds_smooth_b, veg_means_smooth_b + veg_stds_smooth_b, 
                            facecolor='tab:purple', alpha=0.1, label='Std Dev')

            # add veg-veg correlation
            vegveg_corr = veg_means.corr(veg_means_b)
            vegveg_corr_smooth = veg_means_smooth.corr(veg_means_smooth_b)
            textstr = f'$r_{{vv}} = {vegveg_corr:.2f}$'
            ax2.text(0.55, 0.85, textstr, transform=ax2.transAxes, fontsize=14, verticalalignment='top')

            # update prefix for filename use
            veg_prefix = veg_prefix + '+' + veg_prefix_b

        # add autoregression info
        veg_means.index = veg_df.date

        # layout
        sns.set_style('white')
        fig.tight_layout()

        filename_suffix = '_' + smoothing_option

        # save the plot
        output_filename = veg_prefix + '-time-series' + filename_suffix + '.png'
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        #plt.close(fig)

    # make output dir if necessary
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    # make plots for selected columns
    for column in df.columns:
        if 'offset50_mean' in column:
            veg_prefix = column.split('_')[0]
            print(f'Plotting {veg_prefix} time series.')
            make_plot(df, veg_prefix, output_dir)
            make_plot(df, veg_prefix, output_dir, smoothing_option='smooth_res')

    # if we have two vegetation time series availible, plot them both
    if np.sum(df.columns.str.contains('offset50_mean')) == 2:
        veg_columns = df.columns[np.where(df.columns.str.contains('offset50_mean'))].values
        veg_prefixes = [c.split('_')[0] for c in veg_columns]
        assert( len(veg_prefixes) == 2 )
        make_plot(df, veg_prefixes[0], output_dir, veg_prefix_b=veg_prefixes[1])


[docs]def plot_ndvi_time_series(df, output_dir):
    def make_plot(df, veg_prefix, col_name, utput_dir):

        veg_df = df.dropna(subset=[col_name])

        # get vegetation x values to datetime objects
        veg_xs = get_datetime_xs(veg_df)

        # get vegetation y values
        veg_means = veg_df[col_name]
        #veg_means = veg_df[veg_prefix + '_ndvi_veg_mean']
        if any([col_name.replace('mean', 'std') == c for c in df.columns]):
            veg_std = veg_df[col_name.replace('mean', 'std')]

        # create a figure
        fig, ax = plt.subplots(figsize=(15, 4.5))
        plt.xlabel('Time', fontsize=14)

        # set up veg y axis
        color = 'tab:green'
        ax.set_ylabel(f'{veg_prefix} NDVI', color=color, fontsize=14)
        ax.tick_params(axis='y', labelcolor=color)

        if any([col_name.replace('mean', 'std') == c for c in df.columns]):
            ax.set_ylim([veg_means.min() - 1*veg_std.max(), veg_means.max() + 3*veg_std.max()])

        # plot ndvi
        #ax.plot(veg_xs, ndvi_means, label='Unsmoothed', linewidth=1, color='dimgray', linestyle='dotted')

        ax.plot(veg_xs, veg_means, marker='o', markersize=7, 
                markeredgecolor=(0.9172, 0.9627, 0.9172), markeredgewidth=2,
                label='Smoothed', linewidth=2, color='green')

        if any([col_name.replace('mean', 'std') == c for c in df.columns]):
            ax.fill_between(veg_xs, veg_means - veg_std, veg_means + veg_std, 
                            facecolor='green', alpha=0.1, label='Std Dev')

        # plot precipitation if availible
        if 'total_precipitation' in df.columns:
            # handle the case where vegetation and precipitation have mismatched NaNs
            precip_df = df.dropna(subset=['total_precipitation'])
            precip_ys = precip_df.total_precipitation

            # get precipitation x values to datetime objects
            precip_xs = get_datetime_xs(precip_df)

            # duplicate axis for preciptation
            ax2 = ax.twinx()
            color = 'tab:blue'
            ax2.set_ylabel(f'Precipitation [mm]', color=color, fontsize=14)
            ax2.tick_params(axis='y', labelcolor=color)
            ax2.set_ylim([min(precip_ys)-1*np.array(precip_ys).std(), max(precip_ys)+2*np.array(precip_ys).std()])

            # plot precipitation
            ax2.plot(precip_xs, precip_ys, linewidth=2, color=color, alpha=0.75)

            # add correlation information
            correlations = get_corrs_by_lag(df[col_name], df['total_precipitation'])
            max_corr = np.max(np.array(correlations))
            max_corr_lag = np.array(np.argmax(correlations))
            textstr = f'$r_{{t-{max_corr_lag}}}={max_corr:.2f}$ '
            ax2.text(0.13, 0.95, textstr, transform=ax2.transAxes, fontsize=14, verticalalignment='top')

        # layout
        sns.set_style('white')
        fig.tight_layout()

        # save the plot
        output_filename = veg_prefix + '-ndvi-time-series.png'
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        plt.close(fig)

    # make output dir if necessary
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    # make plots for selected columns
    for column in df.columns:
        if 'ndvi' in column and 'mean' in column:
            veg_prefix = column.split('_')[0]
            print(f'Plotting {veg_prefix} NDVI time series.')
            make_plot(df, veg_prefix, column, output_dir)

    
[docs]def plot_autocorrelation_function(df, output_dir, filename_suffix=''):
    """
    Given a time series DataFrames (constructed with `make_time_series`),
    plot the autocorrelation function relevant columns.

    Parameters
    ----------
    df : DataFrame
        Time series DataFrame.

    output_dir : str
        Directory to save the plots in.
    """

    def make_plots(series, output_dir, filename_suffix=''):

        # make the full autocorrelation function plot
        fig, _ = plt.subplots(figsize=(8,5))
        pd.plotting.autocorrelation_plot(series, label=series.name)
        plt.legend()

        # save the plot
        output_filename = series.name + '-autocorrelation-function' + filename_suffix + '.png'
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        plt.close(fig)

        # use statsmodels for partial autocorrelation
        from statsmodels.graphics.tsaplots import plot_pacf
        fig, ax = plt.subplots(figsize=(8,5))
        plot_pacf(series, label=series.name, ax=ax, zero=False)
        plt.ylim([-1.0, 1.0])
        plt.xlabel('Lag')
        plt.ylabel('Partial Autocorrelation')

        # save the plot
        output_filename = series.name + '-partial-autocorrelation-function' + filename_suffix + '.png'
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        #plt.close(fig)


    # make plots for selected columns
    for column in df.columns:
        if ('offset50' in column or 'ndvi' in column) and 'mean' in column or 'total_precipitation' in column:
            print(f'Plotting autocorrelation functions for "{column}"...')
            make_plots(df[column].dropna(), output_dir, filename_suffix=filename_suffix)


[docs]def plot_cross_correlations(df, output_dir):
    """
    Plot a scatterplot matrix showing correlations between vegetation
    and precipitation time series, with different lags. Additionally
    write out the correlations as a function of the lag for later use.

    Parameters
    ----------
    df: DataFrame
        Time-series data.
    output_dir : str
        Directory to save the plot in.
    """

    # check precipitation time series present
    if 'total_precipitation' not in df.columns:
        print('Missing precipitation time series, skipping cross correlation plots.')
        return

    def make_plot(veg_ys, precip_ys, output_dir):

        # set up
        lags = 9
        correlations = []

        # make a new df to ensure NaN veg values are explicit
        df_ = pd.DataFrame()
        df_['precip'] = precip_ys
        df_['offset50'] = veg_ys

        # create fig
        fig, axs = plt.subplots(3, 3, sharex='col', sharey='row', 
                                figsize=(8, 8))

        # loop through offsets
        for lag in range(0, lags):

            # select the relevant Axis object
            ax = axs.flat[lag]

            # format this subplot
            ax.set_title(f'$t-{lag}$')
            ax.grid(False)

            # plot data
            lagged_data = df_['offset50'].shift(-lag)
            corr = precip_ys.corr(lagged_data)
            correlations.append(round(corr,4))
            sns.regplot(precip_ys, lagged_data, label=f'$r={corr:.2f}$', ax=ax)
            
            # format axis label
            if lag < 6:
                ax.set_xlabel('')
            if lag % 3 != 0:
                ax.set_ylabel('')
                
            ax.legend()

        plt.tight_layout()

        # save the plot
        output_filename = veg_ys.name + '-scatterplot-matrix.png'
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        #plt.close(fig)

        # write out correlations as a function of lag
        correlations_dict = {veg_ys.name + '_lagged_correlation': correlations}
        write_to_json(os.path.join(output_dir, 'lagged_correlations.json'), correlations_dict)
    
    # make plots for selected columns
    for column in df.columns:
        if 'offset50' in column and 'mean' in column:
            print(f'Plotting cross correlation matrix for "{column}"...')
            make_plot(df[column], df['total_precipitation'], output_dir)


[docs]def plot_feature_vector(output_dir):
    """
    Read feature vectors from csv (if they exist) and then
    make feature vector plots.

    Parameters
    ----------
    output_dir : str
        Directory to save the plot in.
    """

    # assume feature vectors have been saved in the above location
    fv_dir = os.path.join(os.path.dirname(output_dir), 'processed_data')
    if not os.path.exists(fv_dir):
        print('No feature vectors found, skipping plot!')
        return

    # get feature vectors for different collections
    fvs = [f for f in os.listdir(fv_dir) if '_feature_vectors.csv' in f]
    if len(fvs) == 0:
        print('No feature vectors found, skipping plot!')
        return

    # for each collection
    for fv_filename in fvs:

        # read feature vectors
        df = pd.read_csv(os.path.join(fv_dir, fv_filename)).dropna()

        # percentile columns
        cols = [c for c in df.columns if 'percentile' in c]
        
        # compute feature vector averaged over all sub-images
        feature_vector = df[cols].mean()
        feature_vector_std = df[cols].std()

        # generate x-values
        xs = np.linspace(0,100,len(feature_vector))
        
        # make the plot
        fig, _ = plt.subplots(figsize=(6,5))

        plt.errorbar(xs, feature_vector, marker='o', markersize=5, linestyle='', 
                        yerr=feature_vector_std, color='black', capsize=2, elinewidth=1)

        plt.xlabel('Pixel Rank (%)', fontsize=14)
        plt.ylabel('$X(V-E)$', fontsize=14)
        plt.tight_layout()

        # create new subdir for feature vectors
        fv_subdir = os.path.join(output_dir, 'feature-vectors')
        if not os.path.exists(fv_subdir):
            os.makedirs(fv_subdir, exist_ok=True)

        # save the plot
        output_filename = fv_filename.split('_')[0] + '-feature-vector-summary.png'
        print(f'Plotting feature vector "{os.path.abspath(output_filename)}"...')
        plt.savefig(os.path.join(fv_subdir, output_filename), dpi=DPI)
        plt.close(fig)

        feature_vecs = []
        feature_vecs_stds = []
        offset50s = []
        dates = []

        # loop through time points
        for date, group in df.groupby('date'):
            
            # calculate feature vector and offset50
            feature_vector = group.mean()[cols]
            feature_vecs.append(feature_vector)
            feature_vecs_stds.append(group.std()[cols])
            offset50s.append((feature_vector[-1] - feature_vector[len(feature_vector)//2]))
            dates.append(date)

        # get max and min
        imax = np.argmax(np.array(offset50s))
        imin = np.argmin(np.array(offset50s))

        max_fv = feature_vecs[imax]
        max_fv_std = feature_vecs_stds[imax]
        max_date = dates[imax]
        min_fv = feature_vecs[imin]
        min_fv_std = feature_vecs_stds[imin]
        min_date = dates[imin]

        # plot the min/max veg feature vectors
        fig, _ = plt.subplots(figsize=(6,5))

        # add to plot
        plt.errorbar(xs, max_fv, marker='o', markersize=5, linestyle='', label=f'max veg: {max_date}',
                        yerr=max_fv_std, color='tab:green', capsize=2, elinewidth=1)
        plt.errorbar(xs, min_fv, marker='o', markersize=5, linestyle='', label=f'min veg: {min_date}',
                        yerr=min_fv_std, color='tab:red', capsize=2, elinewidth=1)

        # format plot
        plt.xlabel('Pixel Rank (%)', fontsize=14)
        plt.ylabel('$X(V-E)$', fontsize=14)
        plt.legend()
        plt.tight_layout()
        
        # save the plot
        output_filename = fv_filename.split('_')[0] + '-feature-vector-minmax.png'
        print(f'Plotting minmax feature vector "{os.path.abspath(output_filename)}"...')
        plt.savefig(os.path.join(fv_subdir, output_filename), dpi=DPI)
        #plt.close(fig)


[docs]def plot_stl_decomposition(df, period, output_dir):
    """
     Run the STL decomposition and plot the results network centrality and
     precipitation DataFrames in `df`.

     Parameters
     ----------
     df : DataFrame
         The time-series results.
     period : float
        Periodicity to model.
     output_dir : str
         Directory to save the plot in.
     """

    def make_plot(df, column, output_dir):
        """
        Plot STL decomposition results.

        Parameters
        ----------
        df : DataFrame
            The input time-series.
        column : str
            Column name to run STL on.
        output_dir : str
            Directory to save the plot in.
        """

        # run fit
        res = stl_decomposition(df[column], period)

        # concert x values to datetime objects
        xs = get_datetime_xs(df)

        # formatting
        default_figsize = plt.rcParams['figure.figsize']
        default_fontsize = plt.rcParams['font.size']
        plt.rc('figure', figsize=(20, 8))
        plt.rc('font', size=15)

        fig = res.plot()
        ax_list = fig.axes
        for ax in ax_list[:-1]:
            ax.tick_params(labelbottom=False)

        # set xlabel with datetime object
        #ax_list[-1].set_xticklabels(xs, rotation=0, va="center")

        # save plot
        filename = os.path.join(output_dir, column+'_STL_decomposition.png')
        plt.savefig(filename, dpi=DPI)
        #plt.close(fig)

        # undo rc changes
        plt.rc('figure', figsize=default_figsize)
        plt.rc('font', size=default_fontsize)
    
    # make output dir if necessary
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    # make plots for selected columns
    for column in df.columns:
        if ('offset50' in column or 'ndvi' in column) and 'mean' in column or 'total_precipitation' in column:
            
            print(f'Plotting STL decomposition for "{column}"...')
            
            # produce plot
            make_plot(df.dropna(), column, output_dir)


[docs]def plot_moving_window_analysis(df, output_dir, filename_suffix=''):
    """
    Given a moving window time series DataFrame, plot the time series 
    of AR1 and Variance.

    Parameters
    ----------
    df : DataFrame
        The time-series results for variance and AR1.
    output_dir : str
        Directory to save the plot in.
    filename_suffix: str
        Add suffix string to file name
    """

    def make_plot(df, column, output_dir, smoothing_option):
        """
        Parameters
        ----------
        df : DataFrame
            The time-series results for variance and AR1.
        column : str
            Column name an offset50 variance column in df.
        output_dir : str
            Directory to save the plot in.
        smoothing_option: str
            Label for smoothing variable to be used
        """

        # get short string prefix on column name
        collection_prefix = column.split('_')[0] if 'offset50' in column else 'precipitation'
        
        # hand mismatched NaNs
        ar1_df = df.dropna(subset=[column.replace('var', 'ar1')])
        var_df = df.dropna(subset=[column])

        # extract x values and convert to datetime objects
        ar1_xs = get_datetime_xs(ar1_df)
        var_xs = get_datetime_xs(var_df)

        # extract individual time series
        variance = var_df[column]
        ar1 = ar1_df[column.replace('var', 'ar1')]
        ar1_se = ar1_df[column.replace('var', 'ar1_se')]

        if any([smoothing_option in c for c in df.columns]):
            variance_smooth = var_df[column.replace('offset50_mean', 'offset50_' + smoothing_option + '_mean')]
            ar1_smooth = ar1_df[column.replace('var', 'ar1').replace('offset50_mean', 'offset50_' + smoothing_option + '_mean')]
            ar1_se_smooth = ar1_df[column.replace('var', 'ar1_se').replace('offset50_mean', 'offset50_' + smoothing_option + '_mean')]

        # create a figure
        fig, ax = plt.subplots(figsize=(15, 5))
        plt.xlabel('Time', fontsize=12)

        # set up veg y axis
        color = 'tab:blue'
        ax.set_ylabel(f'{collection_prefix} AR1', color=color, fontsize=12)
        ax.tick_params(axis='y', labelcolor=color)

        # plot unsmoothed vegetation ar1 and std
        ax.plot(ar1_xs, ar1, label='AR1', linewidth=2, color='tab:blue')
        ax.fill_between(ar1_xs, ar1 - ar1_se, ar1 + ar1_se,
                        facecolor='blue', alpha=0.1, label='AR1 SE')

        if any([smoothing_option in c for c in df.columns]):
            # plot smoothed vegetation ar1 and std
            ax.plot(ar1_xs, ar1_smooth, label='AR1 Smoothed', linewidth=2, color='tab:blue', linestyle='dotted')
            ax.fill_between(ar1_xs, ar1_smooth - ar1_se_smooth, ar1_smooth + ar1_se_smooth,
                            facecolor='none', alpha=0.15, label='AR1 SE Smoothed', hatch='X', edgecolor='tab:blue')

        # set y lim
        try: # in case there are no ar1 values, the array will be empty
            ax.set_ylim([min(ar1-ar1_se)-0.8*max(ar1+ar1_se), 1.8*max(ar1+ar1_se)])
        except:
            return

        # plot legend
        plt.legend(loc='upper left')

        # duplicate x-axis for variance
        ax2 = ax.twinx()
        color = 'tab:red'
        ax2.set_ylabel(f'{collection_prefix} Variance', color=color, fontsize=12)
        ax2.tick_params(axis='y', labelcolor=color)

        # plot variance
        ax2.plot(var_xs, variance, linewidth=2, color=color, alpha=0.75, label='Variance')
        if any([smoothing_option in c for c in df.columns]):
            ax2.plot(var_xs, variance_smooth, linewidth=2, color=color, alpha=0.75, 
                     linestyle='dotted', label='Variance Smoothed')

        # set y lim
        ax2.set_ylim([0, 2*max(variance)])


        # add legend
        plt.legend(loc='lower left')

        # add Kendall tau

        tau, p = get_kendell_tau(ar1)
        tau_var, p_var = get_kendell_tau(variance)

        if any([smoothing_option in c for c in df.columns]):
            tau_smooth, p_smooth = get_kendell_tau(ar1_smooth)
            tau_var_smooth, p_var_smooth = get_kendell_tau(variance_smooth)

        # add to plot
        textstr = ''
        if any([smoothing_option in c for c in df.columns]):
            textstr += f'AR1 Kendall $\\tau,~p$-$\\mathrm{{value}}={tau_smooth:.2f}$, ${p_smooth:.2f}$'
        textstr += f' (${tau:.2f}$, ${p:.2f}$ unsmoothed)'
        ax2.text(0.43, 0.95, textstr, transform=ax2.transAxes, fontsize=14, verticalalignment='top')

        textstr = ''
        if any([smoothing_option in c for c in df.columns]):
            textstr += f'Variance Kendall $\\tau,~p$-$\\mathrm{{value}}={tau_var_smooth:.2f}$, ${p_var_smooth:.2f}$'
        textstr += f' (${tau_var:.2f}$, ${p_var:.2f}$ unsmoothed)'
        ax2.text(0.43, 0.85, textstr, transform=ax2.transAxes, fontsize=14, verticalalignment='top')
        
        # layout
        fig.tight_layout()

        # save the plot
        output_filename = collection_prefix + '-AR1-var-' + smoothing_option + '.png'
        print(f'Plotting {collection_prefix} moving window time series...')
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        plt.close(fig)


    for column in df.columns:
        if (('offset50_mean' in column or 'total_precipitation' in column) and 
             'var' in column):
            make_plot(df, column, output_dir, 'smooth_res')


[docs]def plot_correlation_mwa(df, output_dir, filename_suffix=''):
    """
    Given a moving window time series DataFrame, plot the time series 
    of veg-precip correlation.

    Parameters
    ----------
    df : DataFrame
        The time-series results for veg-precip correlation coeff and lag.
    output_dir : str
        Directory to save the plot in.
    filename_suffix: str
        Add suffix string to file name
    """

    def make_plot(df, column_name, output_dir, filename_suffix):

        # get datetime x-values
        xs = get_datetime_xs(df)
        
        # create a figure
        fig, ax = plt.subplots(figsize=(15, 4.5))
        plt.plot(xs, df[column_name])

        # label axes
        plt.xlabel('Time', fontsize=12)
        plt.ylabel(column_name, fontsize=12)

        # calculate Kendall taus and annotate plot
        tau, p = get_kendell_tau(df[column_name].dropna())
        textstr = f'Kendall $\\tau,~p = {tau:.3f}$, ${p:.4f}$'
        ax.text(0.1, 0.95, textstr, transform=ax.transAxes, fontsize=14, verticalalignment='top')

        # save the plot
        output_filename = f'{column_name}' + filename_suffix + '.png'
        collection_prefix = column_name.split('_')[0]
        print(f'Plotting {collection_prefix} correlation moving window analysis...')
        plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
        plt.close(fig)

    for column in df.columns:
        if 'precip' in column and ('corr' in column or 'lag' in column):
            make_plot(df, column, output_dir, filename_suffix)


[docs]def plot_ews_resiliance(series_name, EWSmetrics_df, Kendalltau_df, dates, output_dir):
    """
    Make early warning signals resiliance plots using the output
    from the ewstools package.

    Parameters
    ----------
    series_name : str
        String containing data collection and time series variable.
    EWSmetrics_df : DataFrame
        DataFrame from ewstools containing ews time series.
    Kendalltau_df : DataFrame
        DataFrame from ewstools containing Kendall tau values for EWSmetrics_df time series
    output_dir: str
        Output dir to save plot in.
    """
    def zoom_out(ys):
        ymin = ys.mean() - 2*((ys.mean() - ys).abs().max())
        ymax = ys.mean() + 2*((ys.mean() - ys).abs().max())
        if np.isnan(ymin):
            ymin = -1
        if np.isnan(ymax):
            ymax = 1

        return [ymin, ymax]

    def annotate(text, xy=(6    , 70), size=10):
        if 'Kendall' in text:
            xy = (xy[0], 60)
        plt.gca().annotate(text, xy=xy, xycoords='axes points',
                           size=size, ha='left', va='top')

    dates = get_datetime_xs(pd.DataFrame(dates).dropna())
    fig, _ = plt.subplots(figsize=(4,8), sharex='col')

    ax1 = plt.subplot(611)
    ys = EWSmetrics_df['State variable']

    plt.plot(dates[-len(ys):], ys, color='black')
    plt.plot(dates[-len(ys):], EWSmetrics_df['Smoothing'], color='red', linestyle='dashed')
    plt.ylim(zoom_out(ys))
    annotate(series_name)

    ax2 = plt.subplot(612, sharex=ax1)
    ys = EWSmetrics_df['Residuals']
    plt.plot(dates[-len(ys):], ys, color='black', label='Time Series')
    plt.ylim(zoom_out(ys))
    annotate('Residuals')

    ax3 = plt.subplot(613, sharex=ax1)
    ys = EWSmetrics_df['Lag-1 AC']
    plt.plot(dates[-len(ys):], ys, color='black')
    plt.ylim(zoom_out(ys))
    annotate('Lag-1 AC')
    tau = Kendalltau_df['Lag-1 AC'].iloc[0]
    annotate(f'Kendall $\\tau = {tau:.2f}$', size=8)
    
    """ys = EWSmetrics_df['Lag-2 AC']
    plt.plot(ys, color='navy')
    plt.ylim(zoom_out(ys))
    annotate('Lag-1 AC', xy=(8, 65))
    tau = Kendalltau_df['Lag-1 AC'].iloc[0]
    annotate(f'Kendall $\\tau = {tau:.2f}$', xy=(8, 55), size=8)"""
        
    ax4 = plt.subplot(614, sharex=ax1)
    ys = EWSmetrics_df['Standard deviation']
    plt.plot(dates[-len(ys):], ys, color='black')
    plt.ylim(zoom_out(ys))
    annotate('Standard deviation')
    tau = Kendalltau_df['Standard deviation'].iloc[0]
    annotate(f'Kendall $\\tau = {tau:.2f}$', size=8)
    
    ax5 = plt.subplot(615, sharex=ax1)
    ys = EWSmetrics_df['Skewness']
    plt.plot(dates[-len(ys):], ys, color='black')
    plt.ylim(zoom_out(ys))
    annotate('Skewness')
    tau = Kendalltau_df['Skewness'].iloc[0]
    annotate(f'Kendall $\\tau = {tau:.2f}$', size=8)
    
    ax6 = plt.subplot(616, sharex=ax1)
    ys = EWSmetrics_df['Kurtosis']
    plt.plot(dates[-len(ys):], ys, color='black')
    plt.ylim(zoom_out(ys))
    annotate('Kurtosis')
    tau = Kendalltau_df['Kurtosis'].iloc[0]
    annotate(f'Kendall $\\tau = {tau:.2f}$', size=8)
    
    plt.xlabel('Time')

    # remove vertical space between plots
    plt.subplots_adjust(hspace=0.0)

    # tick formatting
    for ax in [ax1, ax2, ax3, ax4, ax5]:
        ax.tick_params(
            axis="both",
            which="both",
            bottom=True,
            top=False,
            labelbottom=False,
            left=True,
            labelleft=True,
            direction="out",
            labelsize=8,
        )

    # show x labels for bottom plot
    ax6.tick_params(
        axis="both",
        which="both",
        bottom=True,
        top=False,
        labelbottom=True,
        left=True,
        labelleft=True,
        direction="out",
        labelsize=8,
    )

    # save the plot
    output_filename = series_name.replace(' ', '-') + '-ews.png'
    print(f'Plotting {series_name} ews plots...')
    plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI, bbox_inches='tight')
    plt.close(fig)


[docs]def plot_sensitivity_heatmap(series_name, df, output_dir):
    """
    Produce heatmap plot for the sensitivy analysis

    Parameters
    ----------

    df: Dataframe
        The output dataframe from the sensitivity analysis function.
    output_dir:
        Path to the directory to save the produced figures
    """

    for column in df.columns:
        if column == "smooth" or column=="winsize":
            continue
        else:
            fig, ax = plt.subplots(figsize=(5, 5))
            piv = pd.pivot_table(df, values=column, index=["smooth"], columns=["winsize"], fill_value=0)
            ax = sns.heatmap(piv, square=True,cmap='viridis',vmin=-1, vmax=1)
            ax.set_title('Sensitivity for '+ column)
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=90)
            plt.tight_layout()
            plt.xlabel('Rolling Window')
            plt.ylabel('Smoothing')
            output_filename = series_name.replace(' ', '-') + '-' + column + '-sensitivity.png'
            print(f'Plotting {series_name} {column} sensitivity plot...')
            plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
            plt.close(fig)


[docs]def kendall_tau_histograms(series_name, df, output_dir):
    """
    Produce histograms with kendall tau distribution from surrogates for significance analysis

    Parameters
    ----------
    series_name : str
        String containing data collection and time series variable.
    df: Dataframe
          The output dataframe from the sensitivity analysis function.
    output_dir:
          Path to the directory to save the produced figures
    """

    for column in df.columns:

        if column == "true_data":
            continue

        else:
            data_df = df[df["true_data"] == True][column]
            surrogates_df = df[df["true_data"] != True][column]
            fig, ax = plt.subplots(figsize=(5, 5))

            ax.hist(surrogates_df)
            plt.axvline(data_df.values, color="black", linestyle="solid", linewidth=2)
            plt.text(
                data_df.values,
                ax.get_ylim()[0] + 8,
                "Data",
                horizontalalignment="left",
                color="black",
            )
            plt.axvline(
                surrogates_df.quantile(0.95),
                color="black",
                linestyle="dashed",
                linewidth=2,
            )
            plt.text(
                surrogates_df.quantile(0.95),
                ax.get_ylim()[1] - 12,
                "0.95 \nquantile",
                horizontalalignment="left",
                color="black",
            )
            ax.set_title("Significance testing for " + column)
            plt.xlabel("Kendall tau")
            plt.ylabel("Frequency")
            output_filename = (
                series_name.replace(" ", "-") + "-" + column + "-significance.png"
            )

            plt.savefig(os.path.join(output_dir, output_filename), dpi=DPI)
            plt.close(fig)