#!/usr/bin/env python
"""
This script analyses summary statistics produced previously with `analyse_gee_data.py` for individual locations.
"""
import os
import argparse
import json
import re
import tempfile
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
sns.set_style("whitegrid")
try:
from pyveg.src.zenodo_utils import download_file, list_files, get_deposition_id
except:
print("Unable to import zenodo_utils")
[docs]def barplot_plots(df, output_dir):
"""
Create barplots of summary data.
Parameters
-----------
df : dataframe
Dataframe of summary data.
output_dir : str
Path to the directory to save plots to.
"""
plt.figure()
ax30 = sns.barplot(x='name',y='S2_offset50_mean_max',hue='total_precipitation_mean',data=df)
ax30.set_xlabel("Mean precipitation over time series")
ax30.set_ylabel("Max Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_precipitation_bar.png'))
plt.figure()
ax31 = sns.barplot(x='name',y='S2_offset50_mean_max',hue='S2_offset50_mean_Lag-1 AC (0.99 rolling window)',data=df)
ax31.set_xlabel("Offset50 Lag-1 AC (0.99 rolling window)")
ax31.set_ylabel("Max Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_lag1ACvalue_bar.png'))
[docs]def scatter_plots(df, output_dir):
"""
Create scatter plots and correlation plots of summary data.
Parameters
-----------
df : dataframe
Dataframe of summary data.
output_dir : str
Path to the directory to save plots to.
"""
plt.figure()
ax = sns.scatterplot(y="S2_offset50_mean_mean", x="total_precipitation_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax.set_xlabel("Mean precipitation over time series")
ax.set_ylabel("Mean Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_vs_precipitation.png'))
plt.figure()
ax1 = sns.scatterplot(x="longitude", y="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax1.set_ylabel("Mean Offset50 over time series")
ax1.set_xlabel("Longitude")
plt.savefig(os.path.join(output_dir,'offset50_vs_Longitude.png'))
plt.figure()
ax2 = sns.scatterplot(x="latitude", y="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax2.set_ylabel("Mean Offset50 over time series")
ax2.set_xlabel("Latitude")
plt.savefig(os.path.join(output_dir,'offset50_vs_Latitude.png'))
plt.figure()
ax3 = sns.scatterplot(x="longitude", y="latitude", size="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax3.set_ylabel("Latitude")
ax3.set_xlabel("Longitude")
plt.savefig(os.path.join(output_dir,'lat_long_offset50.png'))
plt.figure()
ax4 = sns.scatterplot(y="S2_offset50_mean_Lag-1 AC (0.99 rolling window)", x="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax4.set_ylabel("Offset50 Lag-1 AC (0.99 rolling window)")
ax4.set_xlabel("Mean Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_offset50AR1.png'))
plt.figure()
ax5 = sns.scatterplot(y="S2_offset50_mean_Variance (0.99 rolling window)", x="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax5.set_ylabel("Offset50 Variance (0.99 rolling window)")
ax5.set_xlabel("Mean Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_offset50Variance.png'))
plt.figure()
ax6 = sns.scatterplot(y="S2_offset50_mean_Kendall tau Lag-1 AC (0.5 rolling window)", x="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax6.set_ylabel("Offset50 Kendal tau Lag-1 AC (0.5 rolling window)")
ax6.set_xlabel("Mean Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_offset50KendaltauAR1.png'))
plt.figure()
ax7 = sns.scatterplot(y="S2_offset50_mean_Kendall tau Variance (0.5 rolling window)", x="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax7.set_ylabel("Offset50 Kendal tau Variance (0.5 rolling window)")
ax7.set_xlabel("Mean Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50_offset50KendaltauVariance.png'))
plt.figure()
ax8 = sns.scatterplot(y="S2_offset50_mean_max", x="total_precipitation_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax8.set_xlabel("Mean precipitation over time series")
ax8.set_ylabel("Max Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50max_vs_precipitation.png'))
plt.figure()
ax9 = sns.scatterplot(y="S2_offset50_mean_Lag-1 AC (0.99 rolling window)", x="S2_offset50_mean_max", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax9.set_ylabel("Offset50 Lag-1 AC (0.99 rolling window)")
ax9.set_xlabel("Max Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50max_offset50AR1.png'))
plt.figure()
ax10 = sns.scatterplot(y="S2_offset50_mean_Variance (0.99 rolling window)", x="S2_offset50_mean_max", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax10.set_ylabel("Offset50 Variance (0.99 rolling window)")
ax10.set_xlabel("Max Offset50 over time series")
plt.savefig(os.path.join(output_dir,'offset50max_offset50Variance.png'))
plt.figure()
ax11 = sns.scatterplot(y="total_precipitation_mean", x="S2_offset50_mean_Variance (0.99 rolling window)", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax11.set_ylabel("Mean precipitation over time series")
ax11.set_xlabel("Offset50 Variance (0.99 rolling window)")
plt.savefig(os.path.join(output_dir,'precipitation_vs_offset50Variance.png'))
plt.figure()
ax12 = sns.scatterplot(y="total_precipitation_mean", x="S2_offset50_mean_Lag-1 AC (0.99 rolling window)", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax12.set_ylabel("Mean precipitation over time series")
ax12.set_xlabel("Offset50 Lag-1 AC (0.99 rolling window)")
plt.savefig(os.path.join(output_dir,'precipitation_vs_offset50AR1.png'))
plt.figure()
ax13 = sns.scatterplot(y="S2_offset50_mean_max", x="S2_ndvi_mean_max", data=df, hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax13.set_ylabel("Max offset50 over time series")
ax13.set_xlabel("Max NDVI over time series")
plt.savefig(os.path.join(output_dir, 'offset50max_vs_NDVImax.png'))
plt.figure()
ax14 = sns.scatterplot(y="S2_offset50_mean_mean", x="S2_ndvi_mean_mean", data=df, hue="S2_offset50_mean_pattern_type",palette="Accent_r",edgecolor="k",linewidth=1)
ax14.set_ylabel("Mean offset50 over time series")
ax14.set_xlabel("Mean NDVI over time series")
plt.savefig(os.path.join(output_dir, 'offset50max_vs_NDVImean.png'))
plt.figure()
ax15 = sns.scatterplot(y="S2_offset50_mean_Lag-1 AC (0.99 rolling window) Seasonal", x="S2_offset50_mean_max", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax15.set_ylabel("Offset50 Lag-1 AC (0.99 rolling window) Seasonal")
ax15.set_xlabel("Mean NDVI over time series")
plt.savefig(os.path.join(output_dir, 'offset50max_vs_offset50AR1_0.99_Seasonal.png'))
plt.figure()
ax16 = sns.scatterplot(y="S2_offset50_mean_Variance (0.99 rolling window) Seasonal", x="S2_offset50_mean_max", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax16.set_ylabel("Offset50 Variance (0.99 rolling window) Seasonal")
ax16.set_xlabel("Max offset50")
plt.savefig(os.path.join(output_dir, 'offset50max_vs_offset50Variance_0.99_Seasonal.png'))
plt.figure()
ax17 = sns.scatterplot(y="S2_offset50_mean_Lag-1 AC (0.99 rolling window) Seasonal", x="total_precipitation_mean", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax17.set_ylabel("Offset50 Lag-1 AC (0.99 rolling window) Seasonal")
ax17.set_xlabel("Mean precipitation")
plt.savefig(os.path.join(output_dir, 'precipitationmean_vs_offset50AR1_0.99_Seasonal.png'))
plt.figure()
ax18 = sns.scatterplot(y="S2_offset50_mean_Variance (0.99 rolling window) Seasonal", x="total_precipitation_mean", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax18.set_ylabel("Offset50 Variance (0.99 rolling window) Seasonal")
ax18.set_xlabel("Mean precipitation")
plt.savefig(os.path.join(output_dir, 'precipitationmean_vs_offset50Variance_0.99_Seasonal.png'))
plt.figure()
ax19 = sns.scatterplot(y="offset50_cb_return_rate", x="S2_offset50_mean_max", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax19.set_ylabel("Offset50 return rate")
ax19.set_xlabel("Max offset50")
plt.savefig(os.path.join(output_dir, 'offset50max_vs_offset50returnrate.png'))
plt.figure()
ax20 = sns.scatterplot(y="offset50_cb_return_rate", x="total_precipitation_mean", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax20.set_ylabel("Offset50 return rate")
ax20.set_xlabel("Mean precipitation over time series")
plt.savefig(os.path.join(output_dir, 'meanprecipitation_vs_offset50returnrate.png'))
plt.figure()
ax21 = sns.scatterplot(y="ndvi_cb_return_rate", x="S2_ndvi_mean_max", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax21.set_ylabel("NDVI crystall ball return rate")
ax21.set_xlabel("Max NDVI in time series")
plt.savefig(os.path.join(output_dir, 'NDVImax_vs_NDVIreturnrate.png'))
plt.figure()
ax22 = sns.scatterplot(y="ndvi_cb_return_rate", x="total_precipitation_mean", data=df,
hue="S2_offset50_mean_pattern_type", palette="Accent_r", edgecolor="k", linewidth=1)
ax22.set_ylabel("NDVI crystall ball return rate")
ax22.set_xlabel("Mean precipitation over time series")
plt.savefig(os.path.join(output_dir, 'meanprecipitation_vs_NDVIreturnrate.png'))
[docs]def correlation_plots(df, output_dir):
"""
Create correlation plots of summary data.
Parameters
-----------
df : dataframe
Dataframe of summary data.
output_dir : str
Path to the directory to save plots to.
"""
#Calculate Correlations and p-values
selected_df = df[["total_precipitation_mean","S2_offset50_mean_mean","S2_offset50_mean_max",
"S2_offset50_mean_Lag-1 AC (0.99 rolling window)","S2_offset50_mean_Variance (0.99 rolling window)",
"latitude","longitude"]]
df_labels = ["Precipitation mean", "Ofset50 mean", "Offset50 max", "Offset50 AR1 (0.99 r.w)",
"Offset50 variance (0.99 r.w)", "latitude","longitude"]
selected_corr=selected_df.corr(method="pearson")
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)
#Calculate p-value matrix
def calculate_pvalues(df):
df = df.dropna()._get_numeric_data()
dfcols = pd.DataFrame(columns=df.columns)
pvalues = dfcols.transpose().join(dfcols, how='outer')
for r in df.columns:
for c in df.columns:
pvalues[r][c] = round(pearsonr(df[r], df[c])[1], 4)
return pvalues
p_matrix = calculate_pvalues(selected_df)
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# p-value mask
p_mask = np.invert(p_matrix < 0.05)
p_mask_01 = np.invert(p_matrix < 0.1)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(selected_corr, cmap=cmap, vmax=1, vmin=-1, center=0, xticklabels=df_labels, yticklabels=df_labels,
square=True, annot=True, linewidths=.5, cbar=False, cbar_kws={"shrink": .5})
plt.savefig(os.path.join(output_dir,'pearsons_correlation_plot.png'))
sns.heatmap(selected_corr, cmap=cmap, vmax=1, vmin=-1, center=0,xticklabels=df_labels, yticklabels=df_labels,
square=True, annot=True, linewidths=.5, cbar=False, cbar_kws={"shrink": .5},mask=p_mask)
plt.savefig(os.path.join(output_dir, 'significant_0.05_pearsons_correlation.png'))
sns.heatmap(selected_corr, cmap=cmap, vmax=1, vmin=-1, center=0,xticklabels=df_labels, yticklabels=df_labels,
square=True, annot=True, linewidths=.5, cbar=False, cbar_kws={"shrink": .5},mask=p_mask_01)
plt.savefig(os.path.join(output_dir, 'significant_0.1_pearsons_correlation.png'))
selected_corr.to_csv(os.path.join(output_dir,"Pearson_correlation.csv"))
p_matrix.to_csv(os.path.join(output_dir,"Pearson_p_value.csv"))
[docs]def boxplot_plots(df, output_dir):
"""
Create boxplots of summary data.
Parameters
-----------
df : dataframe
Dataframe of summary data.
output_dir : str
Path to the directory to save plots to.
"""
plt.figure()
ax23 = sns.boxplot(x="S2_offset50_mean_pattern_type", y="S2_offset50_mean_max", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r")
ax23.set_ylabel("Max Offset50 Values")
ax23.set_xlabel("Pattern Type")
plt.savefig(os.path.join(output_dir, 'Max_offset50_boxplot.png'))
plt.figure()
ax24 = sns.boxplot(x="S2_offset50_mean_pattern_type", y="S2_offset50_mean_mean", data=df,hue="S2_offset50_mean_pattern_type",palette="Accent_r")
ax24.set_ylabel("Mean Offset50 Values")
ax24.set_xlabel("Pattern Type")
plt.savefig(os.path.join(output_dir, 'Mean_offset50_boxplot.png'))
[docs]def analyse_pyveg_summary_data(input_location, output_dir):
"""
Run analysis on summary statistics data
Parameters
----------
input_location : str
Location of summary statistics output files from analyse_gee_data.py.
Can be 'zenodo' or 'zenodo_test' for the production or sandbox Zenodo depositions, or
the path to the local directory containing the files.
output_dir: str,
Location for outputs of the analysis. If None, use input_dir
"""
df = process_input_data(input_location)
#add new variable to the dataframe
df["offset50_cb_return_rate"] = (df["S2_offset50_mean_CB_fit_alpha"] ** 2).divide(2.0)
df["ndvi_cb_return_rate"] = (df["S2_ndvi_mean_CB_fit_alpha"] ** 2).divide(2.0)
summary_plots = os.path.join(output_dir, "summary_plots")
if not os.path.exists(summary_plots):
os.makedirs(summary_plots, exist_ok=True)
scatter_plots(df,summary_plots)
barplot_plots(df,summary_plots)
correlation_plots(df,summary_plots)
boxplot_plots(df,summary_plots)
[docs]def main():
"""
CLI interface for gee data analysis.
"""
parser = argparse.ArgumentParser(
description="process json files with network centrality measures from from GEE images"
)
parser.add_argument(
"--input_location",
help="results directory from `download_gee_data` script, containing `summary_stats.csv`, OR 'zenodo' or 'zenodo_test' to download the files from the production or sandbox Zenodo depositions.",
)
parser.add_argument(
"--output_dir",
help="location where analysis plots will be put.",default="."
)
print("-" * 35)
print("Running analyse_pyveg_summary_data.py")
print("-" * 35)
# parse args
args = parser.parse_args()
# run analysis code
analyse_pyveg_summary_data(args.input_location,
args.output_dir)
if __name__ == "__main__":
main()