Skip to content

Commit

Permalink
Merge pull request #211 from AutomatingSciencePipeline/python-logging
Browse files Browse the repository at this point in the history
Python logging to separate log files per experiment
  • Loading branch information
budak7273 authored May 3, 2023
2 parents 7d87d25 + f5218b8 commit 64a2f11
Show file tree
Hide file tree
Showing 15 changed files with 366 additions and 117 deletions.
3 changes: 3 additions & 0 deletions apps/backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Ignore docker container volume contents
ExperimentFiles/

# Ignore log files produced by the system
logs/

# Python
__pycache__
.venv
Expand Down
5 changes: 5 additions & 0 deletions apps/backend/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ disable=
C0116, # missing-function-docstring
C0115, # missing-class-docstring
C0103, # we chose to use camel case to be consistent with our frontend code
W1203, # we want to use fstrings for log message readability, the performance cost is not significant enough for us to care. but, don't turn off W1201 because if the dev wants to use % syntax, they should use lazy logging since the syntax readability is equivalent

# Load our own custom plugins
load-plugins=
linters.no_print
187 changes: 111 additions & 76 deletions apps/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import sys
import json
import time
from flask import Flask, jsonify, request
import typing
from flask import Flask, Response, jsonify, request
from flask_cors import CORS
import firebase_admin
from firebase_admin import credentials
Expand All @@ -14,35 +15,39 @@
import pymongo
from pymongo.errors import ConnectionFailure
from bson.binary import Binary
from modules.data.types import DocumentId, IncomingStartRequest

from modules.logging.gladosLogging import EXPERIMENT_LOGGER, SYSTEM_LOGGER, close_experiment_logger, configure_root_logger, open_experiment_logger, upload_experiment_log
from modules.runner import conduct_experiment
from modules.exceptions import CustomFlaskError, GladosInternalError, ExperimentAbort
from modules.output.plots import generateScatterPlot
from modules.configs import generate_config_files

from modules.data.experiment import ExperimentData, ExperimentType
from modules.data.parameters import ParamType, BoolParameter, FloatParam, IntegerParam, Parameter, StringParameter
from modules.data.parameters import Parameter, parseRawHyperparameterData

try:
import magic # Crashes on windows if you're missing the 'python-magic-bin' python package
except ImportError:
print("Failed to import the 'magic' package, you're probably missing a system level dependency")
logging.error("Failed to import the 'magic' package, you're probably missing a system level dependency")
sys.exit(1)

# Must override so that vscode doesn't incorrectly parse another env file and have bad values
HAS_DOTENV_FILE = load_dotenv("./.env", override=True)

ENV_FIREBASE_CREDENTIALS = "FIREBASE_KEY"
DB_COLLECTION_EXPERIMENTS = 'Experiments'
DB_COLLECTION_EXPERIMENTS = "Experiments"

# set up logger
configure_root_logger()
syslogger = logging.getLogger(SYSTEM_LOGGER)
explogger = logging.getLogger(EXPERIMENT_LOGGER)

FIREBASE_CREDENTIALS = os.environ.get(ENV_FIREBASE_CREDENTIALS)
if FIREBASE_CREDENTIALS is None:
if HAS_DOTENV_FILE:
raise AssertionError(f"Missing environment variable {ENV_FIREBASE_CREDENTIALS} in your `.env` file")
else:
raise AssertionError(f"Missing environment variable {ENV_FIREBASE_CREDENTIALS} - you need a `.env` file in this folder with it defined")
else:
print("Loaded firebase credentials from environment variables")
raise AssertionError(f"Missing environment variable {ENV_FIREBASE_CREDENTIALS} - you need a `.env` file in this folder with it defined")

firebaseCredentials = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS))
firebaseApp = firebase_admin.initialize_app(firebaseCredentials)
Expand All @@ -60,15 +65,21 @@
MAX_WORKERS = 1
runner = ProcessPoolExecutor(MAX_WORKERS)

#setting up the Flask webserver (handles the uploaded experiment files)
#setting up the Flask webserver (handles the signal to run an experiment)
flaskApp = Flask(__name__)
CORS(flaskApp)

syslogger.info("GLADOS Backend Started")


@flaskApp.post("/experiment")
def recv_experiment():
runner.submit(handle_exceptions_from_run, request.get_json())
return 'OK'
data = request.get_json()
if _check_request_integrity(data):
runner.submit(run_batch_and_catch_exceptions, data)
return Response(status=200)
syslogger.error("Received malformed experiment request: %s", data)
return Response(status=400)


@flaskApp.get("/queue")
Expand All @@ -82,141 +93,162 @@ def glados_custom_flask_error(error):
return jsonify(error.to_dict()), error.status_code


def handle_exceptions_from_run(data):
def _check_request_integrity(data: typing.Any):
try:
return data['experiment']['id'] is not None
except KeyError:
return False


def run_batch_and_catch_exceptions(data: IncomingStartRequest):
try:
run_batch(data)
except Exception as err:
print(f"Unexpected exception while trying to run the experiment, this was not caught by our own code and needs to be handled better: {err}")
logging.exception(err)
syslogger.error("Unexpected exception while trying to run the experiment, this was not caught by our own code and needs to be handled better.")
syslogger.exception(err)
close_experiment_logger()
# Unsafe to upload experiment logs files here
raise err


def run_batch(data):
print(f'Run_Batch starting with data {data}')
experiments = firebaseDb.collection(DB_COLLECTION_EXPERIMENTS)
def run_batch(data: IncomingStartRequest):
syslogger.info('Run_Batch starting with data %s', data)

# Obtain most basic experiment info
expId = data['experiment']['id']
print(f'received {expId}')
expRef = experiments.document(expId)
syslogger.debug('received %s', expId)

# Parsing the argument data
experimentData = expRef.get().to_dict()
open_experiment_logger(expId)

# Retrieve experiment details from firebase
try:
hyperparameters: "dict[str,Parameter]" = parseParams(json.loads(experimentData['hyperparameters'])['hyperparameters'])
experiments = firebaseDb.collection(DB_COLLECTION_EXPERIMENTS)
expRef = experiments.document(expId)
experimentData = expRef.get().to_dict()
except Exception as err: # pylint: disable=broad-exception-caught
explogger.error("Error retrieving experiment data from firebase, aborting")
close_experiment_run(expId, None, err)
return

# Parse hyperaparameters into their datatype. Required to parse the rest of the experiment data
try:
hyperparameters: "dict[str,Parameter]" = parseRawHyperparameterData(json.loads(experimentData['hyperparameters'])['hyperparameters'])
except KeyError as err:
raise GladosInternalError("Error generating configs - hyperparameters not found in experiment object") from err
print(type(hyperparameters))
explogger.error("Error generating configs - hyperparameters not found in experiment object, aborting")
close_experiment_run(expId, expRef, err)
return
experimentData['hyperparameters'] = hyperparameters

#Parsing into Datatype
experiment = ExperimentData(**experimentData)
experiment.postProcess = experiment.scatter
# Parsing into Datatype
try:
experiment = ExperimentData(**experimentData)
experiment.postProcess = experiment.scatter
except ValueError as err:
explogger.error("Experiment data was not formatted correctly, aborting")
close_experiment_run(expId, expRef, err)
return

#Downloading Experiment File
# If the program errors here after you just deleted the ExperimentFiles on your dev machine, restart the docker container, seems to be volume mount weirdness
os.makedirs(f'ExperimentFiles/{expId}')
os.chdir(f'ExperimentFiles/{expId}')
filepath = download_experiment_files(experiment)

rawfiletype, filetype = determine_experiment_file_type(filepath)
experiment.experimentType = filetype
print(f"Raw Filetype: {rawfiletype}\n Filtered Filetype: {filetype.value}")
explogger.info(f"Raw Filetype: {rawfiletype}\n Filtered Filetype: {filetype.value}")

print(f"Generating configs and downloading to ExperimentFiles/{expId}/configFiles")
explogger.info(f"Generating configs and downloading to ExperimentFiles/{expId}/configFiles")

totalExperimentRuns = generate_config_files(experiment)
if totalExperimentRuns == 0:
raise GladosInternalError("Error generating configs - somehow no config files were produced?")
os.chdir('../..')
close_experiment_run(expId, expRef, GladosInternalError("Error generating configs - somehow no config files were produced?"))

experiment.totalExperimentRuns = totalExperimentRuns

expRef.update({"totalExperimentRuns": experiment.totalExperimentRuns})

possibleException = None
try:
conduct_experiment(experiment, expRef)

post_process_experiment(experiment)

upload_experiment_results(experiment)
except ExperimentAbort as err:
print(f'Experiment {expId} critical failure, not doing any result uploading or post processing')
logging.exception(err)
except Exception as err:
print(f'Uncaught exception "{err}" while running an experiment the GLADOS code needs to be changed to handle this in a cleaner manner')
logging.exception(err)
raise err
explogger.error(f'Experiment {expId} critical failure, not doing any result uploading or post processing')
possibleException = err
except Exception as err: # pylint: disable=broad-exception-caught
explogger.error(f'Uncaught exception "{err}" while running an experiment. The GLADOS code needs to be changed to handle this in a cleaner manner')
possibleException = err
finally:
expRef.update({'finished': True, 'finishedAtEpochMillis': int(time.time() * 1000)})
print(f'Exiting experiment {expId}')
close_experiment_run(expId, expRef, possibleException)
os.chdir('../..')


def close_experiment_run(expId: DocumentId, expRef: "typing.Any | None", error: "Exception | None"):
if error:
explogger.exception(error)
explogger.info(f'Exiting experiment {expId}')
if expRef:
expRef.update({'finished': True, 'finishedAtEpochMillis': int(time.time() * 1000)})
else:
syslogger.warning(f'No experiment ref supplied when closing {expId} , could not update it to finished')
close_experiment_logger()
upload_experiment_log(expId)


def determine_experiment_file_type(filepath):
rawfiletype = magic.from_file(filepath)
print(rawfiletype)
explogger.info("Raw filetype is %s", rawfiletype)
filetype = 'unknown'
if 'Python script' in rawfiletype or 'python3' in rawfiletype:
filetype = ExperimentType.PYTHON
elif 'Java archive data (JAR)' in rawfiletype:
filetype = ExperimentType.JAVA

if filetype == 'unknown':
print(f"{rawfiletype} could not be mapped to python or java, if it should consider updating the matching statements")
explogger.info(f"{rawfiletype} could not be mapped to python or java, if it should consider updating the matching statements")
raise NotImplementedError("Unknown experiment file type")
return rawfiletype, filetype


def download_experiment_files(experiment: ExperimentData):
if experiment.trialExtraFile != '' or experiment.postProcess != '' or experiment.keepLogs:
print('There will be experiment outputs')
explogger.info('There will be experiment outputs')
os.makedirs('ResCsvs')
print(f'Downloading file for {experiment.expId}')
explogger.info(f'Downloading file for {experiment.expId}')

filepath = f'experiment{experiment.expId}'
experiment.file = filepath
print(f"Downloading {filepath} to ExperimentFiles/{experiment.expId}/{filepath}")
explogger.info(f"Downloading {filepath} to ExperimentFiles/{experiment.expId}/{filepath}")
try:
filedata = firebaseBucket.blob(filepath)
filedata.download_to_filename(filepath)
except Exception as err:
print(f"Error {err} occurred while trying to download experiment file")
explogger.error(f"Error {err} occurred while trying to download experiment file")
raise GladosInternalError('Failed to download experiment files') from err
print(f"Downloaded {filepath} to ExperimentFiles/{experiment.expId}/{filepath}")
explogger.info(f"Downloaded {filepath} to ExperimentFiles/{experiment.expId}/{filepath}")
return filepath


def parseParams(hyperparameters):
result = {}
for param in hyperparameters:
name = param['name']
del param['name']
if param['type'] == 'integer':
param['type'] = ParamType.INTEGER
result[name] = IntegerParam(**param)
elif param['type'] == 'float':
param['type'] = ParamType.FLOAT
result[name] = FloatParam(**param)
elif param['type'] == 'bool':
param['type'] = ParamType.BOOL
result[name] = BoolParameter(**param)
elif param['type'] == 'string':
param['type'] = ParamType.STRING
result[name] = StringParameter(**param)
else:
raise GladosInternalError(f"{param['type']} is not a supported type")
return result


def upload_experiment_results(experiment: ExperimentData):
print('Uploading Results to the frontend')
"""
Call this function when inside the experiment folder!
"""
explogger.info('Uploading Results to the frontend')
uploadBlob = firebaseBucket.blob(f"results/result{experiment.expId}.csv")
uploadBlob.upload_from_filename('results.csv')

_upload_to_mongodb(experiment)

if experiment.trialExtraFile != '' or experiment.postProcess:
print('Uploading Result Csvs')
if experiment.trialExtraFile != '' or experiment.postProcess or experiment.keepLogs:
explogger.info('Uploading Result Csvs')
try:
shutil.make_archive('ResultCsvs', 'zip', 'ResCsvs')

# TODO remove firebase usage once transition to mongo file storage is complete
uploadBlob = firebaseBucket.blob(f"results/result{experiment.expId}.zip")
uploadBlob.upload_from_filename('ResultCsvs.zip')

Expand All @@ -226,15 +258,15 @@ def upload_experiment_results(experiment: ExperimentData):
experimentZipEntry = {"_id": experiment.expId, "fileContent": encoded}
try:
resultZipId = mongoResultsZipCollections.insert_one(experimentZipEntry).inserted_id
print(f"inserted zip file into mongodb with id: {resultZipId}")
explogger.info(f"inserted zip file into mongodb with id: {resultZipId}")
except Exception as err:
raise GladosInternalError("Encountered error while inserting results into MongoDB") from err
except Exception as err:
raise GladosInternalError("Error uploading to firebase") from err
raise GladosInternalError("Error uploading experiment results") from err


def _upload_to_mongodb(experiment: ExperimentData):
print('Uploading to MongoDB')
explogger.info('Uploading to MongoDB')
try:
mongoClient.admin.command('ping')
except ConnectionFailure as err:
Expand All @@ -251,17 +283,20 @@ def _upload_to_mongodb(experiment: ExperimentData):
experimentResultEntry = {"_id": experiment.expId, "resultContent": experimentData}
try:
resultId = mongoResultsCollection.insert_one(experimentResultEntry).inserted_id
print(f"inserted result csv into mongodb with id: {resultId}")
explogger.info(f"inserted result csv into mongodb with id: {resultId}")
except Exception as err:
raise GladosInternalError("Encountered error while inserting results into MongoDB") from err


def post_process_experiment(experiment: ExperimentData):
"""
Call this function when inside the experiment folder!
"""
if experiment.postProcess:
print("Beginning post processing")
explogger.info("Beginning post processing")
try:
if experiment.scatter:
print("Creating Scatter Plot")
explogger.info("Creating Scatter Plot")
depVar = experiment.scatterDepVar
indVar = experiment.scatterIndVar
generateScatterPlot(indVar, depVar, 'results.csv', experiment.expId)
Expand Down
26 changes: 26 additions & 0 deletions apps/backend/legacy-tests/logging_tryme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import logging

logging.info('Starting logging try me')

logger = logging.getLogger('helloworld')

logger.setLevel(logging.DEBUG)

debug_file_log = logging.FileHandler(filename="TESTING.DEBUG.log", encoding="utf-8")
debug_file_log.setLevel(logging.DEBUG)
logger.addHandler(debug_file_log)

logger.debug('This message should go to only the first log file')

debug_file_log.close()
logger.removeHandler(debug_file_log)

debug_file_log_2 = logging.FileHandler(filename="TESTING2.DEBUG.log", encoding="utf-8")
debug_file_log_2.setLevel(logging.DEBUG)
logger.addHandler(debug_file_log_2)

logger.debug('This message should go to only the second log file')

debug_file_log_2.close()

logging.info('Logging try me concluded')
Loading

0 comments on commit 64a2f11

Please sign in to comment.