Source code for pyveg.scripts.create_analysis_report

from mdutils.mdutils import MdUtils
import os
import argparse
import glob
from pathlib import Path
import pypandoc
import json
import tempfile

from pyveg.src.azure_utils import download_rgb, download_summary_json


[docs]def get_collection_and_suffix(collection_name): """ Lookup collection and suffix based on the name of the collection as used by GEE Parameters ========== collection_name: str, GEE name of the collection, eg. 'COPERNICUS/S2' Returns ======= collection: str, user-friendly name of the collection, e.g. 'Sentinel2' suffix: str, contraction of collection name, used in the filenames of plots """ if collection_name == 'COPERNICUS/S2': collection = 'Sentinel2' satellite_suffix = 'S2' elif collection_name == 'LANDSAT8': collection = 'Landsat8' satellite_suffix = 'L8' elif collection_name == 'LANDSAT7': collection = 'Landsat7' satellite_suffix = 'L7' elif collection_name == 'LANDSAT5': collection = 'Landsat5' satellite_suffix = 'L5' elif collection_name == 'LANDSAT4': collection = 'Landsat4' satellite_suffix = 'L4' else: raise RuntimeError("Unknown collection_name {}".format(collection_name)) return collection, satellite_suffix
[docs]def add_time_series_plots(mdFile, analysis_plots_location, analysis_plots_location_type, satellite_suffix): if analysis_plots_location_type != "local": print("Only local disk location for analysis plots is currently supported") return mdFile, 0 fig_count = 0 # Time series figures mdFile.new_header(level=1, title='Time series') ts_path = os.path.join(analysis_plots_location,'analysis','time-series') fig_count += 1 mdFile.new_line(mdFile.new_inline_image(text='Time series Offset50', path=os.path.join(ts_path,satellite_suffix+'-time-series_smooth.png'))) #mdFile.new_paragraph('Figure '+str(count)+': '+'Time series Offset50') fig_count += 1 mdFile.new_line(mdFile.new_inline_image(text='Time series NDVI', path=os.path.join(ts_path,satellite_suffix+'-ndvi-time-series.png'))) #mdFile.new_paragraph('Figure '+str(count)+': '+'Time series NDVI') mdFile.new_line('') # STL figures mdFile.new_header(level=1, title='STL decomposition') stl_path = os.path.join(analysis_plots_location, 'analysis', 'detrended','STL') fig_count += 1 mdFile.new_line(mdFile.new_inline_image(text='STL Offset50', path=os.path.join(stl_path, satellite_suffix+'_offset50_mean_STL_decomposition.png'))) #mdFile.new_paragraph('Figure '+str(count)+': '+'STL Offset50') fig_count += 1 mdFile.new_line(mdFile.new_inline_image(text='STL NDVI', path=os.path.join(stl_path, satellite_suffix+'_ndvi_mean_STL_decomposition.png'))) #mdFile.new_paragraph('Figure '+str(count)+': '+'STL NDVI') mdFile.new_line('') if os.path.exists(os.path.join(analysis_plots_location, 'analysis', 'resiliance','deseasonalised','ewstools')): # Early warning figures mdFile.new_header(level=1, title='Early warnings analysis') ews_path = os.path.join(analysis_plots_location, 'analysis', 'resiliance','deseasonalised','ewstools') fig_count += 1 mdFile.new_line(mdFile.new_inline_image(text='EWS Offset50', path=os.path.join(ews_path, satellite_suffix+'-offset50-mean-ews.png'))) #mdFile.new_paragraph('Figure '+str(count)+': '+'EWS Offset50') mdFile.new_line() fig_count += 1 mdFile.new_line( mdFile.new_inline_image(text='EWS NDVI', path=os.path.join(ews_path, satellite_suffix+'-ndvi-mean-ews.png'))) mdFile.new_line('') mdFile.new_header(level=1, title='Average annual time series CB fit') fig_count += 1 mdFile.new_line( mdFile.new_inline_image(text='Offset50 CB fit', path=os.path.join(analysis_plots_location,'analysis', 'fit_ts_CB_S2_offset50_mean.png'))) mdFile.new_line('') fig_count += 1 mdFile.new_line( mdFile.new_inline_image(text='NDVI CB fit', path=os.path.join(analysis_plots_location, 'analysis','fit_ts_CB_S2_ndvi_mean.png'))) mdFile.new_line('') fig_count += 1 mdFile.new_line( mdFile.new_inline_image(text='total precipitation CB fit', path=os.path.join(analysis_plots_location, 'analysis','fit_ts_CB_total_precipitation.png'))) mdFile.new_line('') return mdFile, fig_count
[docs]def add_rgb_images(mdFile, rgb_location, rgb_location_type, fig_count): mdFile.new_header(level=1, title='RGB images through time') # style is set 'atx' format by default. rgb_filenames = [] if rgb_location_type == "local": for root, dirs, files in os.walk(rgb_location): for filename in files: if filename.endswith("RGB.png") and os.path.basename(filename).startswith('sub')==False: rgb_filenames.append(os.path.join(rgb_location, root, filename)) elif rgb_location_type == "azure": tmpdir = tempfile.mkdtemp() download_rgb(rgb_location, tmpdir) rgb_filenames = [os.path.join(tmpdir, fname) for fname in os.listdir(tmpdir)] else: print(""" Trying to add RGB images to report - unknown value for rgb_location_type - {}. Currently accepted values are ['local','azure'] """.format(rgb_location_type)) return mdFile rgb_filenames.sort() for i, rgb_figure in enumerate(rgb_filenames): rgb_figure_name = os.path.basename(rgb_figure) mdFile.new_line(mdFile.new_inline_image(text=rgb_figure, path=rgb_figure)) mdFile.new_line('Figure '+str(fig_count)+': '+rgb_figure_name) mdFile.new_line('') fig_count += 1 return mdFile
[docs]def create_markdown_pdf_report(analysis_plots_location, analysis_plots_location_type, rgb_location, rgb_location_type, do_timeseries, output_dir=None, collection_name=None, metadata=None): if not metadata: if os.path.exists(os.path.join(analysis_plots_location, "results_summary.json")): try: metadata = json.load(open(os.path.join(analysis_plots_location, "results_summary.json")))["metadata"] except: print("Couldn't retrieve metadata from {}".format(analysis_plots_location, "results_summary.json")) elif rgb_location_type == "azure": tmpdir = tempfile.mkdtemp() download_summary_json(rgb_location, tmpdir) try: metadata = json.load(open(os.path.join(tmpdir, "results_summary.json")))["metadata"] except: print("Couldn't retrieve metadata from {}".format(tmpdir, "results_summary.json")) if metadata and not collection_name: collection_name = metadata['collection'] if not collection_name: raise RuntimeError("please provide either a metadata dictionary or a collection name (e.g. 'COPERNICUS/S2')") collection, satellite_suffix = get_collection_and_suffix(collection_name) if metadata: coordinates = (metadata['longitude'], metadata['latitude']) if not coordinates: raise RuntimeError("Unable to find coordinates from path. Please provide a metadata dict.") if not output_dir: # put the output report in the same directory as the analysis plots output_dir = analysis_plots_location os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir,'analysis_report_{}_{}_{}'.format(coordinates[0],coordinates[1],collection)) # create the markdown file mdFile = MdUtils(file_name=output_path, title='Results for {} and coordinates {} (longitude) and {} (latitude)'\ .format(collection, coordinates[0], coordinates[1])) fig_count = 0 if do_timeseries: mdFile, fig_count = add_time_series_plots(mdFile, analysis_plots_location, analysis_plots_location_type, satellite_suffix) # add RGB images mdFile = add_rgb_images(mdFile, rgb_location, rgb_location_type, fig_count) # Create a table of contents and save file mdFile.new_table_of_contents(table_title='Contents', depth=2) mdFile.create_md_file() print ('Converting to pdf.') output = pypandoc.convert_file(output_path+'.md', 'pdf', outputfile=output_path+".pdf") assert output == ""
[docs]def main(): """ CLI interface for gee data analysis. """ parser = argparse.ArgumentParser(description="Collect all figures from analysis and get them into report") parser.add_argument("--input_analysis_plots_location", help="results directory from `download_gee_data` script, containing `results_summary.json` and `analysis` directory") parser.add_argument("--input_analysis_plots_location_type", help="currently supports 'local' or 'azure'",default="local") parser.add_argument("--input_rgb_location", help="location of the RGB plots - either an Azure container or a local directory") parser.add_argument("--input_rgb_location_type", help="currently supports 'local' or 'azure'", default="local") parser.add_argument("--output_dir", help="(optional) directory to store output report. If not specified, will use input_analysis_plots_location") parser.add_argument("--do_timeseries", action="store_true", help="include time-series plots in the report") parser.add_argument("--collection", help="Satellite to be used in the report ",default='COPERNICUS/S2') print('-' * 35) print('Running create_analysis_report.py') print('-' * 35) # parse args args = parser.parse_args() input_analysis_plots_location = args.input_analysis_plots_location input_analysis_plots_location_type = args.input_analysis_plots_location_type input_rgb_location = args.input_rgb_location input_rgb_location_type = args.input_rgb_location_type output_dir = args.output_dir do_timeseries = args.do_timeseries if args.do_timeseries else False collection = args.collection # run markdown code create_markdown_pdf_report(input_analysis_plots_location, input_analysis_plots_location_type, input_rgb_location, input_rgb_location_type, do_timeseries, output_dir, collection)
if __name__ == "__main__": main()