refactoring and fixing tests

ViralRedditPosts · Apr 25, 2024 · 7913a72 · 7913a72
1 parent e6cd71c
commit 7913a72
Show file tree

Hide file tree

Showing 8 changed files with 30 additions and 30 deletions.
diff --git a/.gitignore b/.gitignore
@@ -225,7 +225,8 @@ crash.*.log
 # password, private keys, and other secrets. These should not be part of version 
 # control as they are data points which are potentially sensitive and subject 
 # to change depending on the environment.
-*.tfvars
+# we are not putting secrets in .tfvars so I think it is fine to ignore this
+#*.tfvars
 *.tfvars.json
 
 # Ignore override files as they are usually used to override resources locally and so

diff --git a/example_reddit.cfg b/example_reddit.cfg
@@ -1,4 +1,4 @@
-# rename this file reddit.cfg
+# rename this file .reddit.cfg and place it in your home directory
 [reddit_api]
 CLIENTID: "XXXX"
 CLIENTSECRET: "XXXX"

diff --git a/.../getRedditDataFunction/lambda_function.py → ...t_reddit_data_function/lambda_function.py b/.../getRedditDataFunction/lambda_function.py → ...t_reddit_data_function/lambda_function.py
@@ -1,6 +1,6 @@
-import redditUtils as ru
+import reddit_utils as ru
 import viral_reddit_posts_utils.configUtils as cu
-import tableDefinition
+import table_definition
 import praw
 import boto3
 import os
@@ -34,7 +34,7 @@ def lambda_handler(event, context):
     print(f"Gathering data for {subreddit}")
     # Get Rising Reddit data
     print("\tGetting Rising Data")
-    schema = tableDefinition.schema
+    schema = table_definition.schema
     topN = 25
     view = 'rising'
     risingData = ru.get_reddit_data(reddit=reddit, subreddit=subreddit, view=view, schema=schema, top_n=topN)
@@ -47,7 +47,7 @@ def lambda_handler(event, context):
 
     # Get Hot Reddit data
     print("\tGetting Hot Data")
-    schema = tableDefinition.schema
+    schema = table_definition.schema
     topN = 3
     view = 'hot'
     hotData = ru.get_reddit_data(reddit=reddit, subreddit=subreddit, view=view, schema=schema, top_n=topN)

diff --git a/...ions/getRedditDataFunction/redditUtils.py → .../get_reddit_data_function/reddit_utils.py b/...ions/getRedditDataFunction/redditUtils.py → .../get_reddit_data_function/reddit_utils.py
@@ -1,6 +1,6 @@
 from datetime import datetime, UTC
 from collections import namedtuple, OrderedDict
-import tableDefinition
+import table_definition
 import json
 from decimal import Decimal
 import pickle
@@ -12,7 +12,7 @@ def get_reddit_data(
         subreddit: str,
         top_n: int = 25,
         view: str = 'rising',
-        schema: OrderedDict = tableDefinition.schema,
+        schema: OrderedDict = table_definition.schema,
         time_filter: str | None = None,
         verbose: bool = False
 ):

diff --git a/.../getRedditDataFunction/tableDefinition.py → ..._reddit_data_function/table_definition.py b/.../getRedditDataFunction/tableDefinition.py → ..._reddit_data_function/table_definition.py
diff --git a/...ions/getRedditDataFunction/test_lambda.py → ...s/get_reddit_data_function/test_lambda.py b/...ions/getRedditDataFunction/test_lambda.py → ...s/get_reddit_data_function/test_lambda.py
@@ -1,14 +1,11 @@
 from datetime import datetime, UTC, timedelta
 import pytest
-import redditUtils as ru
+import reddit_utils as ru
 import praw
-import tableDefinition
+import table_definition
 from collections import namedtuple
 import boto3
-import sys
 import os
-THIS_DIR = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(os.path.join(THIS_DIR, '../../'))
 import viral_reddit_posts_utils.configUtils as cu
 from moto import mock_dynamodb
 from unittest.mock import patch, Mock
@@ -80,7 +77,7 @@ def test_get_reddit_data(
     subreddit,
     top_n=25,
     view='rising',
-    schema=tableDefinition.schema,
+    schema=table_definition.schema,
     time_filter=None,
     verbose=True
   )
@@ -94,7 +91,7 @@ def test_get_reddit_data(
 
 @pytest.fixture(scope='module')
 def duplicated_data():
-  schema = tableDefinition.schema
+  schema = table_definition.schema
   columns = list(schema.keys())
   Row = namedtuple(typename="Row", field_names=columns)
   # these are identical examples except one has a later loadTSUTC
@@ -127,24 +124,26 @@ def class_set_up(self):
     dynamodb = boto3.resource('dynamodb', region_name='us-east-2')
     # create table and write to sample data
     table_name = 'rising'
-    td = tableDefinition.getTableDefinition(tableName=table_name)
-    self.testTable = dynamodb.create_table(**td)
-    self.schema = tableDefinition.schema
+    td = table_definition.getTableDefinition(tableName=table_name)
+    self.test_table = dynamodb.create_table(**td)
+    self.schema = table_definition.schema
     self.columns = self.schema.keys()
     self.Row = namedtuple(typename="Row", field_names=self.columns)
 
   @pytest.mark.xfail(reason="BatchWriter fails on duplicate keys. This might xpass, possibly a fault in mock object.")
   def test_duplicate_data(self):
     self.class_set_up()
-    testTable = self.testTable
+    testTable = self.test_table
     schema = self.schema
     Row=self.Row
 
     data = [
-      Row(loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
+      Row(subscribers=10000000, activeUsers=10000,
+          loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
          subreddit='pics', title='Magnolia tree blooming in my friends yard', createdTSUTC='2023-04-30 04:19:43',
          timeElapsedMin=44, score=3, numComments=0, upvoteRatio=1.0, numGildings=0),
-      Row(loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
+      Row(subscribers=10000000, activeUsers=10000,
+          loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
           subreddit='pics', title='Magnolia tree blooming in my friends yard', createdTSUTC='2023-04-30 04:19:43',
           timeElapsedMin=44, score=3, numComments=0, upvoteRatio=1.0, numGildings=0)
      ]
@@ -153,7 +152,7 @@ def test_duplicate_data(self):
 
   def test_unique_data(self):
     self.class_set_up()
-    testTable = self.testTable
+    test_table = self.test_table
     schema = self.schema
     Row = self.Row
 
@@ -167,12 +166,12 @@ def test_unique_data(self):
           subreddit='pics', title='A piece of wood sticking up in front of a fire.', createdTSUTC='2023-04-30 04:29:23',
           timeElapsedMin=34, score=0, numComments=0, upvoteRatio=0.4, numGildings=0)
     ]
-    ru.batch_writer(table=testTable, data=data, schema=schema)
+    ru.batch_writer(table=test_table, data=data, schema=schema)
     print("uniqueDataTester test complete")
 
   def test_diff_primary_index_same_second_index(self):
     self.class_set_up()
-    test_table = self.testTable
+    test_table = self.test_table
     schema = self.schema
     Row = self.Row
 

diff --git a/main.tf b/main.tf
@@ -36,15 +36,15 @@ locals {
 #     build_number = timestamp()
 #   }
 #   provisioner "local-exec" {
-#     command    = "./scripts/zipLambdaFunction.sh -f getRedditDataFunction"
+#     command    = "./scripts/zipLambdaFunction.sh -f get_reddit_data_function"
 #     on_failure = fail # OR continue
 #   }
 # }
 
 data "archive_file" "lambda_zip" {
   type        = "zip"
-  source_dir  = "./lambdaFunctions/getRedditDataFunction/"
-  output_path = "./scripts/zippedLambdaFunction/getRedditDataFunction.zip"
+  source_dir  = "./lambda_functions/get_reddit_data_function/"
+  output_path = "./scripts/zippedLambdaFunction/get_reddit_data_function.zip"
 }
 
 # zip the PRAW and boto3 packages
@@ -197,7 +197,7 @@ resource "aws_lambda_layer_version" "utils_layer" {
 resource "aws_lambda_function" "lambda_function" {
   # depends_on = [resource.null_resource.zip_function]
 
-  filename      = "./scripts/zippedLambdaFunction/getRedditDataFunction.zip"
+  filename      = "./scripts/zippedLambdaFunction/get_reddit_data_function.zip"
   function_name = "lambda-reddit-scraping-${var.env}"
   role          = aws_iam_role.iam_for_lambda.arn
   handler       = "lambda_function.lambda_handler"

diff --git a/scripts/zipLambdaFunction.sh b/scripts/zipLambdaFunction.sh
@@ -20,11 +20,11 @@ SCRIPT_PATH=${0%/*}  # https://stackoverflow.com/questions/6393551/what-is-the-m
 CWD=${pwd}
 cd $SCRIPT_PATH
 
-[ -d "../lambdaFunctions/${function_name}" ] && echo "Directory ../lambdaFunctions/${function_name} exists." || { echo "Error: Directory ../lambdaFunctions/${function_name} does not exist."; exit 1; }
+[ -d "../lambda_functions/${function_name}" ] && echo "Directory ../lambda_functions/${function_name} exists." || { echo "Error: Directory ../lambda_functions/${function_name} does not exist."; exit 1; }
 
 cd ./zippedLambdaFunction/
 rm -r ./${function_name} || true
-cp -r ../../lambdaFunctions/${function_name} ./  # copy lambda function files here
+cp -r ../../lambda_functions/${function_name} ./  # copy lambda function files here
 rm -rf ${function_name}.zip # remove first if it exists
 cd ./${function_name}/  # for some reason you have to zip from within this folder or it wont work, it otherwise wraps it in another folder
 #rm -rf ./*.ipynb*  # remove any notebook stuff