JoshuaMarden · JoshuaMarden · Aug 19, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/.DS_Store b/.github/.DS_Store
diff --git a/.github/workflows/.DS_Store b/.github/workflows/.DS_Store
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,34 @@
+name: CI
+
+on: [push, pull_request]
+
+jobs:
+  run-unittests:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+
+    - name: Create and activate virtual environment
+      run: |
+        python -m venv venv  # Create a virtual environment
+        source venv/bin/activate  # Activate the virtual environment
+
+    - name: Install dependencies
+      run: |
+        source venv/bin/activate  # Ensure the virtual environment is active
+        pip install --upgrade pip  # Upgrade pip within the virtual environment
+        pip install -r requirements.txt  # Install dependencies from requirements.txt
+
+    - name: Set up environment
+      run: |
+        source venv/bin/activate  # Ensure the virtual environment is active
+        source ./add_root_to_path.sh  # Run the script to modify PYTHONPATH
+
+    - name: Run tests
+      run: |
+        source venv/bin/activate  # Ensure the virtual environment is active
+        pytest  # Run tests directly with pytest
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@ data/*
 *.feather
 *.log
 
+
 # Python Module Data
 #(e.g __pychache__ directories)
 __pycache__/

diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 J J Marden
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/pipeline/extract_carbon.py b/pipeline/extract_carbon.py
@@ -52,7 +52,7 @@ def fetch_data(self) -> Optional[Dict[str, Any]]:
             response.raise_for_status()
             return response.json()
         except requests.exceptions.RequestException as e:
-            self.logger.error(f"An error occurred: {e}")
+            self.logger.error("An error occurred: %s", e)
             return None
 
 
@@ -135,19 +135,18 @@ def execute(self) -> Optional[pd.DataFrame]:
 
         if data:
             self.logger.info("Data fetched successfully from API.")
-            self.logger.debug(f"Fetched Data: {data}")
+            self.logger.debug("Fetched Data: %s", data)
 
             self.logger.info("Processing the fetched data.")
             df = self.data_processor.process_data(data)
 
             if df is not None:
                 self.logger.info("Data processed successfully into DataFrame.")
-                self.logger.debug("DataFrame of Carbon Forecast Data:")
-                self.logger.debug(df.to_string())
+                self.logger.debug("DataFrame of Carbon Forecast Data:\n%s", df.to_string())
 
                 self.logger.info("Saving the processed data locally.")
                 self.data_processor.save_data_locally(df)
-                self.logger.info(f"Data saved locally at `{self.data_processor.save_location}`.")
+                self.logger.info("Data saved locally at `%s`.", self.data_processor.save_location)
 
                 self.logger.info("Attempting to get S3 client.")
                 s3_client = self.data_processor.get_s3_client()
@@ -156,7 +155,8 @@ def execute(self) -> Optional[pd.DataFrame]:
                     self.logger.info("S3 client retrieved successfully.")
                     self.logger.info("Uploading the data to S3.")
                     self.data_processor.save_data_to_s3()
-                    self.logger.info(f"Data successfully uploaded to S3 bucket `{self.s3_bucket}` as `{self.s3_file_name}`.")
+                    self.logger.info("Data successfully uploaded to S3 bucket `%s` as `%s`.",
+                                     self.s3_bucket, self.s3_file_name)
                 else:
                     self.logger.error("Failed to get S3 client. Data was not uploaded to S3.")
                 return df

diff --git a/pipeline/extract_demand.py b/pipeline/extract_demand.py
@@ -64,7 +64,7 @@ def fetch_data(self) -> Optional[Dict[str, Any]]:
             response.raise_for_status()
             return response.json()
         except requests.exceptions.RequestException as e:
-            self.logger.error(f"An error occurred: {e}")
+            self.logger.error("An error occurred: %s", e)
             return None
 
 
@@ -100,7 +100,6 @@ def process_data(self, data: Dict[str, Any],
         a pd.DataFrame, the second element is a dictionary containing the
         time window over which the data was fetched.
         """
-        logger = logger or self.logger
 
         if not data or "data" not in data:
             logger.warning("No data found in response.")
@@ -159,16 +158,16 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, Dict[str, datetime]]]:
                     df, time_period = result
 
                     self.logger.debug("DataFrame of Demand Data:")
-                    self.logger.debug(df.to_string())  # Log the entire DataFrame as a string
+                    self.logger.debug("%s", df.to_string())  # Log the entire DataFrame as a string
                     self.logger.info("Head of the DataFrame:")
-                    self.logger.info("\n" + df.head().to_string())
+                    self.logger.info("\n%s", df.head().to_string())
                     self.logger.info("Time Period of Data:")
-                    self.logger.info(time_period)
+                    self.logger.info("%s", time_period)
 
                     # Saving data locally
                     self.logger.info("Saving data locally.")
                     local_save_path = self.data_processor.save_data_locally(df)
-                    self.logger.info(f"Data successfully saved locally at {local_save_path}.")
+                    self.logger.info("Data successfully saved locally at %s.", local_save_path)
 
                     # Uploading data to S3
                     self.logger.info("Preparing to upload data to S3.")
@@ -177,7 +176,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, Dict[str, datetime]]]:
                     if s3_client:
                         self.logger.info("S3 client initialized successfully.")
                         self.data_processor.save_data_to_s3()
-                        self.logger.info(f"Data successfully uploaded to S3 at `{self.s3_file_name}`.")
+                        self.logger.info("Data successfully uploaded to S3 at `%s`.", self.s3_file_name)
                     else:
                         self.logger.error("Failed to initialize S3 client.")
 
@@ -187,7 +186,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, Dict[str, datetime]]]:
             else:
                 self.logger.error("Failed to retrieve data from API.")
         except Exception as e:
-            self.logger.error(f"An error occurred during the execution: {e}")
+            self.logger.error("An error occurred during the execution: %s", e)
 
         self.logger.info("Execution of the workflow completed.")
         return None

diff --git a/pipeline/extract_generation.py b/pipeline/extract_generation.py
@@ -67,7 +67,7 @@ def fetch_data(self) -> Optional[Dict[str, Any]]:
             response.raise_for_status()
             return response.json()
         except requests.exceptions.RequestException as e:
-            self.logger.error(f"An error occurred: {e}")
+            self.logger.error("An error occurred: %s", e)
             return None
 
 
@@ -103,7 +103,7 @@ def process_data(self, data: Dict[str, Any]) -> Optional[Tuple[pd.DataFrame, Dic
         """
 
         if not data or "data" not in data:
-            logger.warning("No data found in response.")
+            self.logger.warning("No data found in response.")
             return None
 
         df = pd.DataFrame(data["data"])
@@ -159,19 +159,14 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, Dict[str, datetime]]]:
                 if result is not None:
                     df, time_period = result
 
-                    self.logger.debug("DataFrame of Demand Data:")
-                    # Log the entire DataFrame as a string
-                    self.logger.debug(df.to_string())
-                    self.logger.info("Head of the DataFrame:")
-                    self.logger.info("\n" + df.head().to_string())
-                    self.logger.info("Time Period of Data:")
-                    self.logger.info(time_period)
+                    self.logger.debug("DataFrame of Demand Data:\n%s", df.to_string())
+                    self.logger.info("Head of the DataFrame:\n%s", df.head().to_string())
+                    self.logger.info("Time Period of Data: %s", time_period)
 
                     # Saving data locally
                     self.logger.info("Saving data locally.")
                     local_save_path = self.data_processor.save_data_locally(df)
-                    self.logger.info(f"Data successfully saved locally at {
-                                     local_save_path}.")
+                    self.logger.info("Data successfully saved locally at %s.", local_save_path)
 
                     # Uploading data to S3
                     self.logger.info("Preparing to upload data to S3.")
@@ -180,8 +175,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, Dict[str, datetime]]]:
                     if s3_client:
                         self.logger.info("S3 client initialized successfully.")
                         self.data_processor.save_data_to_s3()
-                        self.logger.info(f"Data successfully uploaded to S3 at `{
-                                         self.s3_file_name}`.")
+                        self.logger.info("Data successfully uploaded to S3 at `%s`.", self.s3_file_name)
                     else:
                         self.logger.error("Failed to initialize S3 client.")
 
@@ -191,7 +185,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, Dict[str, datetime]]]:
             else:
                 self.logger.error("Failed to retrieve data from API.")
         except Exception as e:
-            self.logger.error(f"An error occurred during the execution: {e}")
+            self.logger.error("An error occurred during the execution: %s", e)
 
         self.logger.info("Execution of the workflow completed.")
         return None

diff --git a/pipeline/extract_price.py b/pipeline/extract_price.py
@@ -52,7 +52,7 @@ def get_settlement_periods(self,
         df = pd.read_feather(path_to_reference_data)
         periods = df.groupby('settlementDate')['settlementPeriod'].unique().to_dict()
         periods = {k: list(v) for k, v in periods.items()}
-        logger.info(f"Getting price data for {periods}")
+        logger.info("Getting price data for %s", periods)
         return periods
 
     def construct_default_params(self, date: str, period: int) -> str:
@@ -76,8 +76,8 @@ def fetch_data(self, periods: Dict[str, List[int]]) -> List[Dict[str, Any]]:
                     response.raise_for_status()  
                     response_list.append(response.json())  
                 except requests.exceptions.RequestException as e:
-                    self.logger.warning(f"An error occurred when requesting fuel data for {date}, period {period}!")
-                    self.logger.warning(f"{e}")
+                    self.logger.warning("An error occurred when requesting fuel data for %s, period %d!", date, period)
+                    self.logger.warning("%s", e)
 
         return response_list
 
@@ -110,7 +110,6 @@ def process_data(self, response_list: List[Dict[str, Any]],
         """
         Takes a list of responses, merges them into a DataFrame, and returns it.
         """
-        logger = logger or self.logger
 
         if not response_list:
             logger.warning("No data found in response.")
@@ -153,7 +152,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, int]]:
 
         try:
             periods = self.api_client.get_settlement_periods(self.reference_data_path)
-            self.logger.info(f"Retrieved settlement periods: {periods}")
+            self.logger.info("Retrieved settlement periods: %s", periods)
 
             response_list = self.api_client.fetch_data(periods)
             if response_list:
@@ -163,17 +162,14 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, int]]:
                 if result is not None:
                     df, number_of_settlement_periods = result
 
-                    self.logger.debug("DataFrame of Price Data:")
-                    self.logger.debug(df.to_string())  # Log the entire DataFrame as a string
-                    self.logger.info("Head of the DataFrame:")
-                    self.logger.info("\n" + df.head().to_string())
-                    self.logger.info("Number of Settlement Periods:")
-                    self.logger.info(number_of_settlement_periods)
+                    self.logger.debug("DataFrame of Price Data:\n%s", df.to_string())
+                    self.logger.info("Head of the DataFrame:\n%s", df.head().to_string())
+                    self.logger.info("Number of Settlement Periods: %d", number_of_settlement_periods)
 
                     # Saving data locally
                     self.logger.info("Saving data locally.")
                     local_save_path = self.data_processor.save_data_locally(df)
-                    self.logger.info(f"Data successfully saved locally at {local_save_path}.")
+                    self.logger.info("Data successfully saved locally at %s.", local_save_path)
 
                     # Uploading data to S3
                     self.logger.info("Preparing to upload data to S3.")
@@ -182,7 +178,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, int]]:
                     if s3_client:
                         self.logger.info("S3 client initialized successfully.")
                         self.data_processor.save_data_to_s3()
-                        self.logger.info(f"Data successfully uploaded to S3 at `{self.data_processor.s3_file_name}`.")
+                        self.logger.info("Data successfully uploaded to S3 at `%s`.", self.data_processor.s3_file_name)
                     else:
                         self.logger.error("Failed to initialize S3 client.")
 
@@ -192,7 +188,7 @@ def execute(self) -> Optional[Tuple[pd.DataFrame, int]]:
             else:
                 self.logger.error("Failed to retrieve data from the API.")
         except Exception as e:
-            self.logger.error(f"An error occurred during the execution: {e}")
+            self.logger.error("An error occurred during the execution: %s", e)
 
         self.logger.info("Execution of the workflow completed.")
         return None
@@ -227,6 +223,5 @@ def main() -> None:
     logger.info("---> Data inserted and process completed for %s.", script_name)
 
 
-
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/pipeline/extract_to_s3.py b/pipeline/extract_to_s3.py
@@ -5,9 +5,10 @@
 from extract_demand import main as extract_demand
 from extract_price import main as extract_price
 
-
+from constants import Constants as ct
 import config as cg
-save_directory = cg.DATA
+
+save_directory = ct.DATA
 if not os.path.exists(save_directory):
     os.makedirs(save_directory)