Skip to content

Commit

Permalink
use json preprocessed data
Browse files Browse the repository at this point in the history
- DirectoryWatcher is now integrated in the API. If a new article is added to the "data_from_A" directory, it will be detected, processed and outputted into the entity_mentions.json file
- Fixed up tests
- Removed unused imports in main.py
  • Loading branch information
FredTheNoob committed Nov 24, 2023
1 parent 05b8e40 commit a476aa8
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 96 deletions.
21 changes: 13 additions & 8 deletions lib/DirectoryWatcher.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,33 @@
# directory_watcher.py
import threading
import asyncio
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

class DirectoryWatcher:
def __init__(self, directory, callback):
def __init__(self, directory, async_callback):
self.directory = directory
self.callback = callback
self.async_callback = async_callback
self.is_watching = True
self.event_handler = FileSystemEventHandler()
self.event_handler.on_created = self.on_created
self.observer = Observer()
self.observer.schedule(self.event_handler, path=self.directory, recursive=False)

def on_created(self, event):
if event.is_directory:
return
# Call your callback method here
self.callback(event.src_path)
# Call the asynchronous callback using asyncio.run
asyncio.run(self.async_callback(event.src_path))

def start_watching(self):
# Define a thread target function
def run_observer():
self.observer.schedule(self.event_handler, path=self.directory, recursive=False)
self.observer.start()
try:
while self.is_watching:
pass
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(self.run_once())
except KeyboardInterrupt:
pass
finally:
Expand All @@ -38,7 +40,10 @@ def run_observer():
# Return the thread in case you want to manage it externally
return watcher_thread

async def run_once(self):
await asyncio.sleep(1) # Adjust as needed

def stop_watching(self):
self.is_watching = False
self.observer.stop()
self.observer.join()
self.observer.join()
49 changes: 23 additions & 26 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,31 @@
import string
from components import *
from components.EntityLinker import entitylinkerFunc
from components.EntityLinker import GetAllEntities
import sys, json, os
from multiprocessing import Process
from lib.Exceptions.ArticleNotFoundException import ArticleNotFoundException
from lib.Exceptions.InputException import InputException
import json, os
from lib.Exceptions.UndetectedLanguageException import (
UndetectedLanguageException,
)
from lib.DirectoryWatcher import DirectoryWatcher
from langdetect import detect
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import HTMLResponse
from fastapi import FastAPI, HTTPException, Request, Query
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pathlib import Path
from fastapi.templating import Jinja2Templates

templates = Jinja2Templates(directory="public")
app = FastAPI(title="API")

dirWatcher = DirectoryWatcher(directory = "data_from_A", callback=lambda file_path :print("whatever" + file_path))
DIRECTORY_TO_WATCH = "data_from_A/"

async def newFileCreated(file_path: str):
await main(file_path)

dirWatcher = DirectoryWatcher(directory=DIRECTORY_TO_WATCH, async_callback=newFileCreated)

@app.on_event("startup")
async def startEvent():
dirWatcher.start_watching() #Starts DirectoryWatcher
dirWatcher.start_watching()

@app.on_event("shutdown")
def shutdown_event():
print("Shutting down...")
dirWatcher.stop_watching()

app.mount(
Expand All @@ -45,15 +40,17 @@ async def root(request: Request):
"index.html", {"request": request}
)




@app.get("/entitymentions")
async def getJson():
await main()
with open("entity_mentions.json", "r") as entityJson:
entityMentions = json.load(entityJson)
return entityMentions
async def get_json(article: str = Query(..., title="Article Filename")):
path = DIRECTORY_TO_WATCH + article
if not os.path.exists(path):
raise HTTPException(status_code=404, detail="Article not found")

await main(path) # Pass the article parameter to your main function
with open("entity_mentions.json", "r") as entity_json:
entity_mentions = json.load(entity_json)
return entity_mentions


@app.get("/{articlename}/entities")
Expand Down Expand Up @@ -82,19 +79,18 @@ async def checklang(request: Request):
return language


async def main():
if not os.path.exists("entity_mentions.json"):
open("entity_mentions.json", "w").close()
async def main(file_path: str = "Artikel.txt"):
open("entity_mentions.json", "w").close()

text = GetSpacyData.GetText(
"Artikel.txt"
file_path
) # Takes in title of article. Gets article text in string format
doc = GetSpacyData.GetTokens(
text
) # finds entities in text, returns entities in doc object

text = GetSpacyData.GetText(
"Artikel.txt"
file_path
) # Takes in title of article. Gets article text in string format

try:
Expand All @@ -114,14 +110,15 @@ async def main():
"./Database/DB.db"
) # makes the DB containing the entities of KG
# Returns JSON object containing an array of entity links

entLinks = await entitylinkerFunc(
ents
) # Returns JSON object containing an array of entity links

entsJSON = GetSpacyData.BuildJSONFromEntities(
entLinks,
doc,
"Artikel.txt"
file_path
)

with open("entity_mentions.json", "w", encoding="utf8") as entityJson:
Expand Down
27 changes: 15 additions & 12 deletions tests/integration/test_GetJSON.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
import pytest
from fastapi.testclient import TestClient
import sys
from unittest.mock import patch

sys.path.append(".")
from main import app
from main import app, DIRECTORY_TO_WATCH


@pytest.mark.asyncio
async def test_SlashEntityMentionsIsUp():
with TestClient(app) as client:
res = client.get("/entitymentions")
assert res.status_code == 200
client.__exit__
client.close()
with patch('main.DIRECTORY_TO_WATCH', 'data_from_A/'):
with TestClient(app) as client:
res = client.get("/entitymentions?article=test.txt")
assert res.status_code == 200
client.__exit__
client.close()


@pytest.mark.asyncio
async def test_SlashEntityMentionsReturnsJsonArray():
with TestClient(app) as client:
res = client.get("/entitymentions")
assert type(res.json()) == list
assert type(res.json()[0]) == dict
client.__exit__
client.close()
with patch('main.DIRECTORY_TO_WATCH', 'data_from_A/'):
with TestClient(app) as client:
res = client.get("/entitymentions?article=test.txt")
assert type(res.json()) == list
assert type(res.json()[0]) == dict
client.__exit__
client.close()
56 changes: 56 additions & 0 deletions tests/unit/test_DirectoryWatcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pytest
from unittest.mock import MagicMock, patch
from lib.DirectoryWatcher import DirectoryWatcher
import asyncio

@pytest.mark.asyncio
async def test_on_created():
# Mock the async_callback function
async_callback_mock = MagicMock()
watcher = DirectoryWatcher(directory='/path/to/watch', async_callback=async_callback_mock)

# Mock asyncio.run to avoid the RuntimeError
with patch('asyncio.run'):
# Simulate an on_created event
event_mock = MagicMock(is_directory=False, src_path='/path/to/file.txt')
watcher.on_created(event_mock)

# Ensure that async_callback is called with the correct parameters
async_callback_mock.assert_called_once_with('/path/to/file.txt')

@pytest.mark.asyncio
async def test_start_and_stop_watching():
# Mock the observer and async_callback
observer_mock = MagicMock()
async_callback_mock = MagicMock()

# Create a watcher with mocked components
watcher = DirectoryWatcher(directory='/path/to/watch', async_callback=async_callback_mock)
watcher.observer = observer_mock

# Start watching
watcher_thread = watcher.start_watching()

# Ensure that observer is scheduled and started
observer_mock.schedule.assert_called_once()
observer_mock.start.assert_called_once()

# Stop watching
watcher.stop_watching()

# Ensure that observer is stopped and joined
observer_mock.stop.assert_called_once()
observer_mock.join.assert_called_once()

# Ensure that the watcher thread has completed
watcher_thread.join()

@pytest.mark.asyncio
async def test_run_once():
# Create a watcher with mocked components
watcher = DirectoryWatcher(directory='/path/to/watch', async_callback=lambda x: x)

# Ensure that run_once does not raise any exceptions
await watcher.run_once()

# You can add more tests as needed, especially for edge cases and specific behaviors.
50 changes: 0 additions & 50 deletions tests/unit/test_FileWatcher.py

This file was deleted.

0 comments on commit a476aa8

Please sign in to comment.