Source code for pyveg.scripts.generate_config_file

#!/usr/bin/env python

"""
Generate a config file pyveg/configs/<config_filename> for use when running
download and processing jobs with
pyveg_run_pipeline --config_file pyveg/configs/<config_filename>

User specifies:
* Coordinates  OR id of location in coordinates.py
* Date range
* time per point
* Satellite collection name (e.g. "Sentinel2", "Landsat8")
* run mode ("local" or "batch")
* whether to run in 'test' mode (fewer dates, and only a few sub-images).

These can be given directly as command-line arguments, or the user will
be prompted for them.

Usage
=====

pyveg_generate_config

then respond to prompts, or

pyveg_generate_config --help

to see a list of command line options.
(Note that command line options and prompted inputs can be mixed-and-matched).

"""

import os
import re
import argparse
import time

from pyveg.configs import collections
from pyveg.coordinates import coordinate_store
from pyveg.src.coordinate_utils import lookup_country

[docs]def get_template_text(): template_filepath = os.path.join( os.path.dirname(__file__), "..", "configs", "config_template.py" ) if not os.path.exists(template_filepath): raise RuntimeError("Unable to find template {}".format(template_filepath)) return open(template_filepath).read()
[docs]def make_output_location(coords_id, collection_name, latitude, longitude, country): # quite restricted on characters allowed in Azure container names - # use NSEW rather than negative numbers in coordinates if latitude.startswith("-"): latitude = latitude[1:]+"S" else: latitude = latitude+"N" if longitude.startswith("-"): longitude = longitude[1:]+"W" else: longitude = longitude+"E" if coords_id: output_location = f"{coords_id}-{collection_name}-{latitude}-{longitude}-{country}" else: output_location = f"{collection_name}-{latitude}-{longitude}-{country}" return output_location
[docs]def make_filename(configs_dir, test_mode, longitude, latitude, country, pattern_type, start_date, end_date, time_per_point, region_size, collection_name, run_mode, coords_id): """ Construct a filename from the specified parameters. """ filename_start = "testconfig" if test_mode else "config" if coords_id: filename_start += "_"+coords_id filepath = os.path.join( configs_dir, f"{filename_start}_{collection_name}_{latitude}N_{longitude}E_{country}_{region_size}_{pattern_type}_{start_date}_{end_date}_{time_per_point}_{run_mode}.py" ) return filepath
[docs]def write_file(configs_dir, output_location, longitude, latitude, country, pattern_type, start_date, end_date, time_per_point, region_size, collection_name, run_mode, n_threads, test_mode=False, coords_id=None ): """ Take the arguments, construct a filename, and write contents """ filename = make_filename(configs_dir, test_mode, longitude, latitude, country, pattern_type, start_date, end_date, time_per_point, region_size, collection_name, run_mode, coords_id) if time_per_point.endswith("d") or time_per_point.endswith("w"): weather_collection_name = "ERA5_daily" weather_start_date = start_date else: weather_collection_name = "ERA5" if test_mode: weather_start_date = start_date else: # also include historical weather data weather_start_date = collections.data_collections[weather_collection_name]["min_date"] text = get_template_text() current_time = time.strftime("%y-%m-%d %H:%M:%S") text = text.replace("CURRENT_TIME", current_time) output_location_type = "azure" if run_mode == "batch" else "local" text = text.replace("COLLECTION_NAME", collection_name) text = text.replace("WEATHER_COLL_NAME", weather_collection_name) text = text.replace("OUTPUT_LOCATION_TYPE", output_location_type) text = text.replace("OUTPUT_LOCATION",output_location) text = text.replace("LATITUDE", latitude) text = text.replace("LONGITUDE", longitude) text = text.replace("PATTERN_TYPE", pattern_type) text = text.replace("START_DATE", start_date) text = text.replace("WEATHER_STARTDATE", weather_start_date) text = text.replace("END_DATE", end_date) text = text.replace("TIME_PER_POINT", time_per_point) text = text.replace("REGION_SIZE", region_size) text = text.replace("RUN_MODE", run_mode) text = text.replace("NUM_THREADS", str(n_threads)) n_subimages = '10' if test_mode else '-1' text = text.replace("NUM_SUBIMAGES", n_subimages) if coords_id: text = text.replace("COORDS_ID_STRING", 'coords_id = "{}"'.format(coords_id)) else: text = text.replace("COORDS_ID_STRING", "") with open(filename, "w") as configfile: configfile.write(text) print("================================\nWrote file \n {}\nWe recommend that you add and commit this to your version control repository.\n================================".format(filename)) return filename
[docs]def main(): # get lists of options for the user to choose from. collection_names = [k for k in collections.data_collections.keys() \ if collections.data_collections[k]["data_type"] == "vegetation"] run_modes = ["local","batch"] date_regex = re.compile("[\d]{4}-[01][\d]-[0123][\d]") time_per_point_regex = re.compile("[\d]+[dwmy]") lat_range = [-90.,90.] long_range = [-180., 180.] n_threads_range = range(1,17) default_n_threads = 4 # create argument parser in case user wants to use command line args parser = argparse.ArgumentParser( description=""" Create a config file for running pyveg_pipeline. If run with no arguments (recommended), the user will be prompted for each parameter, or can choose a default value. """ ) parser.add_argument( "--coords_id", help="(optional) ID of location in coordinates.py", type=str ) parser.add_argument( "--configs_dir", help="path to directory containing config files" ) parser.add_argument( "--collection_name", help="collection name (e.g. 'Sentinel2')" ) parser.add_argument( "--output_dir", help="Directory for local output data", type=str ) parser.add_argument( "--test_mode", help="Run in test mode, over fewer months and with fewer sub-images", action='store_true' ) parser.add_argument( "--latitude", help="latitude in degrees N", type=float ) parser.add_argument( "--longitude", help="longitude in degrees E", type=float ) parser.add_argument( "--country", help="Country of location", type=str ) parser.add_argument( "--start_date", help="start date, format YYYY-MM-DD", type=str ) parser.add_argument( "--end_date", help="end date, format YYYY-MM-DD", type=str ) parser.add_argument( "--time_per_point", help="frequency of image, e.g. '1m', '1w'", type=str ) parser.add_argument( "--region_size", help="Size of region to download, in degrees lat/long", type=float ) parser.add_argument( "--pattern_type", help="Type of patterned vegetation, e.g. 'spots', 'labyrinths'", type=str ) parser.add_argument( "--run_mode", help=""" 'local' for running on local machine, 'azure' for running some time-consuming parts (i.e. vegetation image processing) on Azure batch """, type=str ) parser.add_argument( "--n_threads", help=""" How many threads (cores) to parallelize some processing functions over """, type=int ) args = parser.parse_args() # sanity check if args.coords_id and (args.latitude or args.longitude): print("Please select EITHER coords_id OR latitude/longitude") return ############# # now go through any arguments not already set via command line, # and prompt user for them. # configs_dir configs_dir = args.configs_dir if args.configs_dir else "" while not os.path.exists(configs_dir): if os.path.exists(os.path.join("pyveg","configs")): default_configs_dir = os.path.join("pyveg","configs") elif os.path.exists("configs"): default_configs_dir = "configs" else: default_configs_dir = "." configs_dir = input("Enter path to directory containing config files, or press Return for default path ('{}') : ".format(default_configs_dir)) if len(configs_dir) == 0: configs_dir = default_configs_dir # test mode test_mode = args.test_mode if args.test_mode else False if not test_mode: do_test = input("Would you like to make a test config file, with fewer months, and only a subset of sub-images? Press 'y' if so, or press Return for a normal config. : ") test_mode = do_test.startswith("y") or do_test.startswith("Y") # collection name collection_name = args.collection_name if args.collection_name else None while not collection_name in collection_names: collection_name = input("Please enter a valid collection name from this list: {} : ".format(collection_names)) # (optional) ID from coordinates.py coords_id = args.coords_id if args.coords_id else None latitude = None longitude = None country = None region_size = None pattern_type = None if coords_id: try: row = coordinate_store.loc[coords_id] latitude = row["latitude"] longitude = row["longitude"] country = row["country"] region_size = row["region_size"] pattern_type = row["type"] except(KeyError): print("Unknown id {} - please enter coordinates manually".format(coords_id)) # latitude and longitude if not latitude: latitude = args.latitude if args.latitude else -999. while not (isinstance(latitude, float) and latitude > lat_range[0] and latitude < lat_range[1]): latitude = float(input("please enter Latitude (degrees N) in the range {} : ".format(lat_range))) if not longitude: longitude = args.longitude if args.longitude else -999. while not (isinstance(longitude, float) and longitude > long_range[0] and longitude < long_range[1]): longitude = float(input("please enter Longitude (degrees E) in the range {} : ".format(long_range))) # country country = args.country if args.country else "" if not country: country = input("Enter name of country, or press return to use OpenCage country lookup based on coordinates : ") if len(country) == 0: country = lookup_country(latitude, longitude) # remove spaces country = re.sub("[\s]+","",country) # start date start_date = args.start_date if args.start_date else "" if test_mode: default_start_date = "2019-01-01" else: default_start_date = collections.data_collections[collection_name]["min_date"] while not date_regex.search(start_date): start_date = input("Enter start date in format YYYY-MM-DD, or press Return for default ({}) : ".format(default_start_date)) if len(start_date) == 0: start_date = default_start_date # end date end_date = args.end_date if args.end_date else "" if test_mode: default_end_date = "2019-03-01" else: default_end_date = collections.data_collections[collection_name]["max_date"] while not date_regex.search(end_date): end_date = input("Enter end date in format YYYY-MM-DD, or press Return for default ({}) : ".format(default_end_date)) if len(end_date) == 0: end_date = default_end_date # time per point time_per_point = args.time_per_point if args.time_per_point else "" default_time_per_point = collections.data_collections[collection_name]["time_per_point"] while not time_per_point_regex.search(time_per_point): time_per_point = input("Enter time per point in format e.g. '1m' for 1 month, '1w' for 1 week, or press Return for default ({}) : ".format(default_time_per_point)) if len(time_per_point) == 0: time_per_point = default_time_per_point # region size if not region_size: region_size = args.region_size if args.region_size else -1.0 default_region_size = 0.08 while not (isinstance(region_size, float) and region_size > 0. and region_size <= 0.08): region_size = input("Enter region size in degrees latitude/longitude, or press Return for max/default ({}) : ".format(default_region_size)) if len(region_size) == 0: region_size = default_region_size else: region_size = float(region_size) # now we've established it fulfils the requirements, convert to a str region_size = str(region_size) # pattern_type if not pattern_type: pattern_type = args.pattern_type if args.pattern_type else "" default_pattern_type = "unknown" while len(pattern_type) < 1: pattern_type = input("Enter type of patterned vegetation (e.g. 'spots', 'labyrinths', or press Return for default ('{}') : ".format(default_pattern_type)) if len(pattern_type) == 0: pattern_type = default_pattern_type pattern_type = pattern_type.replace(" ","-").lower() # run mode run_mode = args.run_mode if args.run_mode else "" default_run_mode = "local" while not run_mode in run_modes: run_mode = input("Would you like time-consuming functions to be run on the cloud? Choose from the following: {}, or press Return for default option '{}': ".format(run_modes, default_run_mode)) if len(run_mode) == 0: run_mode = default_run_mode # output directory output_dir = args.output_dir if args.output_dir else "" if run_mode == "local" and not output_dir: output_dir = input("Enter location for output, or press Return for default ('.') : ") if len(output_dir) == 0: output_dir = "." lat_string = "{:.2f}".format(latitude) long_string = "{:.2f}".format(longitude) output_location = make_output_location(coords_id, collection_name, lat_string, long_string, country) if run_mode == "local": output_location = os.path.join(output_dir, output_location) # num threads n_threads = args.n_threads if args.n_threads else 0 while not (isinstance(n_threads, int) and n_threads in n_threads_range): if run_mode == "local": n_threads = input("How many threads would you like time-consuming processing functions to use? (Many computers will have 4 or 8 threads available). Press return for default value {} : ".format(default_n_threads)) if len(n_threads) == 0: n_threads = default_n_threads else: try: n_threads = int(n_threads) except: print("Please enter an integer value") else: n_threads = 1 print(""" output_location {} collection: {} latitude: {} longitude: {} country: {} pattern_type: {} start_date: {} end_date: {} time_per_point: {} region_size: {} run_mode: {} n_threads: {} """.format(output_location, collection_name, lat_string, long_string, country, pattern_type, start_date, end_date, time_per_point, region_size, run_mode, n_threads)) config_filename = write_file(configs_dir, output_location, long_string, lat_string, country, pattern_type, start_date, end_date, time_per_point, region_size, collection_name, run_mode, n_threads, test_mode, coords_id) print(""" To run pyveg using this configuration, do: pyveg_run_pipeline --config_file {} """.format(config_filename))
if __name__ == "__main__": main()