Skip to content

Commit

Permalink
refactoring and fixing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kennethjmyers committed Apr 25, 2024
1 parent e6cd71c commit 7913a72
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 30 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ crash.*.log
# password, private keys, and other secrets. These should not be part of version
# control as they are data points which are potentially sensitive and subject
# to change depending on the environment.
*.tfvars
# we are not putting secrets in .tfvars so I think it is fine to ignore this
#*.tfvars
*.tfvars.json

# Ignore override files as they are usually used to override resources locally and so
Expand Down
2 changes: 1 addition & 1 deletion example_reddit.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# rename this file reddit.cfg
# rename this file .reddit.cfg and place it in your home directory
[reddit_api]
CLIENTID: "XXXX"
CLIENTSECRET: "XXXX"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import redditUtils as ru
import reddit_utils as ru
import viral_reddit_posts_utils.configUtils as cu
import tableDefinition
import table_definition
import praw
import boto3
import os
Expand Down Expand Up @@ -34,7 +34,7 @@ def lambda_handler(event, context):
print(f"Gathering data for {subreddit}")
# Get Rising Reddit data
print("\tGetting Rising Data")
schema = tableDefinition.schema
schema = table_definition.schema
topN = 25
view = 'rising'
risingData = ru.get_reddit_data(reddit=reddit, subreddit=subreddit, view=view, schema=schema, top_n=topN)
Expand All @@ -47,7 +47,7 @@ def lambda_handler(event, context):

# Get Hot Reddit data
print("\tGetting Hot Data")
schema = tableDefinition.schema
schema = table_definition.schema
topN = 3
view = 'hot'
hotData = ru.get_reddit_data(reddit=reddit, subreddit=subreddit, view=view, schema=schema, top_n=topN)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime, UTC
from collections import namedtuple, OrderedDict
import tableDefinition
import table_definition
import json
from decimal import Decimal
import pickle
Expand All @@ -12,7 +12,7 @@ def get_reddit_data(
subreddit: str,
top_n: int = 25,
view: str = 'rising',
schema: OrderedDict = tableDefinition.schema,
schema: OrderedDict = table_definition.schema,
time_filter: str | None = None,
verbose: bool = False
):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from datetime import datetime, UTC, timedelta
import pytest
import redditUtils as ru
import reddit_utils as ru
import praw
import tableDefinition
import table_definition
from collections import namedtuple
import boto3
import sys
import os
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(THIS_DIR, '../../'))
import viral_reddit_posts_utils.configUtils as cu
from moto import mock_dynamodb
from unittest.mock import patch, Mock
Expand Down Expand Up @@ -80,7 +77,7 @@ def test_get_reddit_data(
subreddit,
top_n=25,
view='rising',
schema=tableDefinition.schema,
schema=table_definition.schema,
time_filter=None,
verbose=True
)
Expand All @@ -94,7 +91,7 @@ def test_get_reddit_data(

@pytest.fixture(scope='module')
def duplicated_data():
schema = tableDefinition.schema
schema = table_definition.schema
columns = list(schema.keys())
Row = namedtuple(typename="Row", field_names=columns)
# these are identical examples except one has a later loadTSUTC
Expand Down Expand Up @@ -127,24 +124,26 @@ def class_set_up(self):
dynamodb = boto3.resource('dynamodb', region_name='us-east-2')
# create table and write to sample data
table_name = 'rising'
td = tableDefinition.getTableDefinition(tableName=table_name)
self.testTable = dynamodb.create_table(**td)
self.schema = tableDefinition.schema
td = table_definition.getTableDefinition(tableName=table_name)
self.test_table = dynamodb.create_table(**td)
self.schema = table_definition.schema
self.columns = self.schema.keys()
self.Row = namedtuple(typename="Row", field_names=self.columns)

@pytest.mark.xfail(reason="BatchWriter fails on duplicate keys. This might xpass, possibly a fault in mock object.")
def test_duplicate_data(self):
self.class_set_up()
testTable = self.testTable
testTable = self.test_table
schema = self.schema
Row=self.Row

data = [
Row(loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
Row(subscribers=10000000, activeUsers=10000,
loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
subreddit='pics', title='Magnolia tree blooming in my friends yard', createdTSUTC='2023-04-30 04:19:43',
timeElapsedMin=44, score=3, numComments=0, upvoteRatio=1.0, numGildings=0),
Row(loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
Row(subscribers=10000000, activeUsers=10000,
loadDateUTC='2023-04-30', loadTimeUTC='05:03:44', loadTSUTC='2023-04-30 05:03:44', postId='133fkqz',
subreddit='pics', title='Magnolia tree blooming in my friends yard', createdTSUTC='2023-04-30 04:19:43',
timeElapsedMin=44, score=3, numComments=0, upvoteRatio=1.0, numGildings=0)
]
Expand All @@ -153,7 +152,7 @@ def test_duplicate_data(self):

def test_unique_data(self):
self.class_set_up()
testTable = self.testTable
test_table = self.test_table
schema = self.schema
Row = self.Row

Expand All @@ -167,12 +166,12 @@ def test_unique_data(self):
subreddit='pics', title='A piece of wood sticking up in front of a fire.', createdTSUTC='2023-04-30 04:29:23',
timeElapsedMin=34, score=0, numComments=0, upvoteRatio=0.4, numGildings=0)
]
ru.batch_writer(table=testTable, data=data, schema=schema)
ru.batch_writer(table=test_table, data=data, schema=schema)
print("uniqueDataTester test complete")

def test_diff_primary_index_same_second_index(self):
self.class_set_up()
test_table = self.testTable
test_table = self.test_table
schema = self.schema
Row = self.Row

Expand Down
8 changes: 4 additions & 4 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ locals {
# build_number = timestamp()
# }
# provisioner "local-exec" {
# command = "./scripts/zipLambdaFunction.sh -f getRedditDataFunction"
# command = "./scripts/zipLambdaFunction.sh -f get_reddit_data_function"
# on_failure = fail # OR continue
# }
# }

data "archive_file" "lambda_zip" {
type = "zip"
source_dir = "./lambdaFunctions/getRedditDataFunction/"
output_path = "./scripts/zippedLambdaFunction/getRedditDataFunction.zip"
source_dir = "./lambda_functions/get_reddit_data_function/"
output_path = "./scripts/zippedLambdaFunction/get_reddit_data_function.zip"
}

# zip the PRAW and boto3 packages
Expand Down Expand Up @@ -197,7 +197,7 @@ resource "aws_lambda_layer_version" "utils_layer" {
resource "aws_lambda_function" "lambda_function" {
# depends_on = [resource.null_resource.zip_function]

filename = "./scripts/zippedLambdaFunction/getRedditDataFunction.zip"
filename = "./scripts/zippedLambdaFunction/get_reddit_data_function.zip"
function_name = "lambda-reddit-scraping-${var.env}"
role = aws_iam_role.iam_for_lambda.arn
handler = "lambda_function.lambda_handler"
Expand Down
4 changes: 2 additions & 2 deletions scripts/zipLambdaFunction.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ SCRIPT_PATH=${0%/*} # https://stackoverflow.com/questions/6393551/what-is-the-m
CWD=${pwd}
cd $SCRIPT_PATH

[ -d "../lambdaFunctions/${function_name}" ] && echo "Directory ../lambdaFunctions/${function_name} exists." || { echo "Error: Directory ../lambdaFunctions/${function_name} does not exist."; exit 1; }
[ -d "../lambda_functions/${function_name}" ] && echo "Directory ../lambda_functions/${function_name} exists." || { echo "Error: Directory ../lambda_functions/${function_name} does not exist."; exit 1; }

cd ./zippedLambdaFunction/
rm -r ./${function_name} || true
cp -r ../../lambdaFunctions/${function_name} ./ # copy lambda function files here
cp -r ../../lambda_functions/${function_name} ./ # copy lambda function files here
rm -rf ${function_name}.zip # remove first if it exists
cd ./${function_name}/ # for some reason you have to zip from within this folder or it wont work, it otherwise wraps it in another folder
#rm -rf ./*.ipynb* # remove any notebook stuff
Expand Down

0 comments on commit 7913a72

Please sign in to comment.