Browse Source

Added kwargs to attempt handler methods.

This would enable a more robust interface.
New arguments could be added without changing
the signature of the methods.

Worked further on docstrings.
experiments/new-attempt-handling
Sven Karsten 2 years ago
parent
commit
0f704a6d1e
  1. 106
      scripts/run/attempt_handler_example.py
  2. 41
      scripts/run/parse_global_settings.py
  3. 13
      scripts/run/run.py

106
scripts/run/attempt_handler_example.py

@ -0,0 +1,106 @@
import os
class MyAttemptHandler():
"""Class that handles your attempts.
The name of the class is arbitrary
Mandatory attributes: `next_attempt` which represents your next attempt, type must be convertable by str() function to string, typically a string or an integer, if all attempts are exhausted and you want to stop this must be set to None.
Class can have arbitrarily more optional attributes.
However, all attributes must be serializable by the pickle library, see https://docs.python.org/3/library/pickle.html.
The serialization into a file is necessary to store the state of the attempt handler over several jobs.
IMPORTANT: If you want to start from scratch, you have to remove such files, which are sored as <run_name>_attempt_handler.obj in the root directory.
Parameters can be arbitrary. In this example:
:param root: Path to the root directory
:type root: str
"""
def __init__(self, root = "."):
# initialize mandatory attribute self.next_attempt
self.next_attempt = 1
# optional arguments and members
# it makes sense to memorize the root directory
self.root = root
# our maximal number of attempts
self.max_attempts = 4
def prepare_attempt(self, **kwargs):
r"""
Mandatory method to prepare the attempt.
Do whatever is necessary to set up the next attempt, e.g. manipulating input files.
:Keyword Arguments:
* **start_date** (*int*) --
Start date of the current job in format YYYMMDD
* **end_date** (*int*) --
End date of the current job in format YYYMMDD
"""
# you can use the keyword arguments
start_date = kwargs["start_date"]
end_date = kwargs["end_date"]
print("Peparing " + str(self.next_attempt) + " for start date " + str(start_date) + " and end date " + str(end_date))
# copy some prepared files to the actual input file
input_nml = self.root + "/input/MOM5_Baltic/input.nml"
os.system("cp " + input_nml + "." + str(self.next_attempt) + " " + input_nml)
return
def evaluate_attempt(self, crashed, **kwargs):
r"""
Mandatory method to evaluate the attempt.
In this method the setting of the next_attempt should typically happen, e.g. incrementation.
Important: If all attempts are exhausted, next_attempt must be set tot `None`.
Important: If model has crashed, this function should return False otherwise following steps are ill-defined.
:param crashed: `True`, if the model has crashed, `False`, otherwise
:type crashed: bool
:Keyword Arguments:
* **start_date** (*int*) --
Start date of the current job in format YYYMMDD
* **end_date** (*int*) --
End date of the current job in format YYYMMDD
:return: `True`, if attempt is accepted (work will be copied to output, hotstart folder is created), `False`, if attempt is not accepted (work will not be copied to output, no hotstart folder is created)
:rtype: bool
"""
# you can use the keyword arguments
start_date = kwargs["start_date"]
end_date = kwargs["end_date"]
print("Evaluating " + str(self.next_attempt) + " for start date " + str(start_date) + " and end date " + str(end_date))
# if the model has crashed, react here
if crashed:
# we have no attempts left, we should stop here
if self.next_attempt == self.max_attempts:
self.next_attempt = None
return False
# there are attempts left, go to the next set of input files
self.next_attempt += 1
# throw away work of failed attempt (you might also store it somewhere for debugging)
return False
# if the model did succeed, we can go back to the previous input files
if self.next_attempt > 1:
self.next_attempt -= 1
return True

41
scripts/run/parse_global_settings.py

@ -4,27 +4,20 @@ import glob
class GlobalSettings:
"""Class that contains the variables of global_settings.py as attributes.
Attributes:
All that are present in global_settings.py.
Attributes will be all that are present in global_settings.py.
additionally:
root_dir Memorized path of root directory.
attempt_handler_obj_file File which is used to serialize the current state of the attempt_handler attribute.
IMPORTANT: If you want to start from scratch, you have to remove this file.
Additionally there will be `root_dir` which is the memorized path of root directory and
`attempt_handler_obj_file`. The latter is the file which is used to serialize the current state of the attempt_handler attribute.
IMPORTANT: If you want to start from scratch, you have to remove this file.
:param root_dir: Path to the root directory
:type root_dir: str
:param global_settings: Path to the global_settings.py file, relative to root_dir, default "input/global_settings.py"
:type global_settings: str
"""
def __init__(self, root_dir, global_settings = "input/global_settings.py"):
"""
Imports the global_settings.py file and creates attributes in this class with the same name and values.
Parameters
----------
root_dir : str
Path to the root directory
global_settings : str, default "input/global_settings.py"
Path to the global_settings.py file, relative to root_dir.
"""
# create a local dictionary with content of the global_settings file
ldict = {}
@ -59,13 +52,6 @@ class GlobalSettings:
This function is used to store the state of the attempt_handler object at the end of a run.
Such it can be restored when starting a new run.
Parameters
----------
None
Returns
-------
None
"""
# if we have no attempt_handler there is nothing to do
@ -84,13 +70,6 @@ class GlobalSettings:
If a file named as attempt_handler_obj_file exists, the attempt_handler object is restore from that.
If there is not such a file, nothing is done here and the attempt_handler object is initialized as implemented in its contructor.
Parameters
----------
None
Returns
-------
None
"""
# if we have no attempt_handler there is nothing to do

13
scripts/run/run.py

@ -115,7 +115,7 @@ for run in range(global_settings.runs_per_job):
# do the customer's preparation here
print("Prepare attempt " + str(attempt) + "...", flush = True)
global_settings.attempt_handler.prepare_attempt()
global_settings.attempt_handler.prepare_attempt(start_date=start_date, end_date=end_date)
print("Preparation of attempt " + str(attempt) + " done.", flush = True)
# if there is no attempt handling we only have attempt "1"
@ -283,7 +283,7 @@ for run in range(global_settings.runs_per_job):
print("Model has to pass the evaluation for attempt " + str(attempt) + "...", flush = True)
# evaluate this attempt: react to crash and/or check attempt's criterion
attempt_failed = not global_settings.attempt_handler.evaluate_attempt(crashed)
attempt_failed = not global_settings.attempt_handler.evaluate_attempt(crashed, start_date=start_date, end_date=end_date)
print("Evaluation for attempt " + str(attempt) + " done.", flush = True)
# store state of attempt_handler for next run
@ -302,8 +302,8 @@ for run in range(global_settings.runs_per_job):
print('Go on with next attempt.', flush=True)
# something went wrong: either model has crashed or the attempt has not passed the criterion
if attempt_failed or crashed:
# in both cases we through away the work and start a new job
if attempt_failed:
# if the attempt failed we throw away the work and start a new job
try:
global_settings.resubmit_command
except:
@ -313,6 +313,11 @@ for run in range(global_settings.runs_per_job):
print('Run failed. Try again.', flush=True)
os.system("cd " + IOW_ESM_ROOT + "/scripts/run; " + global_settings.resubmit_command)
sys.exit()
# if the crashed attempt was not evaluated to false, we stop here
if crashed:
print('Error: Attempt '+str(attempt)+' has crashed but has been successfully evaluated. Abort.', flush=True)
sys.exit()
print(' attempt '+str(attempt)+' succeeded.', flush=True)

Loading…
Cancel
Save