Skip to content

Commit

Permalink
move modifyTxt code to GetText (only god knows why that works)
Browse files Browse the repository at this point in the history
  • Loading branch information
FredTheNoob committed Dec 8, 2023
1 parent 4fcb3c9 commit 06e32df
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 48 deletions.
15 changes: 8 additions & 7 deletions components/GetSpacyData.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json, os
import string
import sys
from langdetect import detect
from typing import List
Expand All @@ -21,11 +22,13 @@
# GetText shall get text from pipeline del A
def GetText(title: str):
file = open(title, "r")
content = file.read()
file.close

stringWithText = file.read()
lines = content.split('\n\n')
modified_lines = [line + '. ' if not line.endswith(tuple(string.punctuation)) else line + ' ' for line in lines]

file.close
return stringWithText
return ' '.join(modified_lines)


def GetTokens(text: str):
Expand Down Expand Up @@ -64,21 +67,19 @@ def BuildJSONFromEntities(entities: List[EntityLinked], doc, fileName: str) -> J
# Use the 'start' and 'end' indexes of the entity to get its index within its sentence
sentence = entity.sentence

print("sentence: ",sentence)

entityJSON = entity.getEntityJSON()

found = False
for sentence_info in sentences_json:
if sentence_info["sentence"] == sentence.replace("\n", ""):
if sentence_info["sentence"] == sentence:
sentence_info["entityMentions"].append(entityJSON)
found = True
break

if not found:
sentences_json.append(
{
"sentence": sentence.replace("\n", ""),
"sentence": sentence,
"sentenceStartIndex": entity.sentenceStartIndex,
"sentenceEndIndex": entity.sentenceEndIndex,
"entityMentions": [entityJSON],
Expand Down
4 changes: 3 additions & 1 deletion lib/DirectoryWatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import asyncio
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

import os

class DirectoryWatcher:
def __init__(self, directory, async_callback):
Expand All @@ -20,6 +20,8 @@ def on_created(self, event):
asyncio.run(self.async_callback(event.src_path))

def start_watching(self):
if not os.path.exists(self.directory):
os.mkdir(self.directory)
# Define a thread target function
def run_observer():
self.observer.schedule(
Expand Down
61 changes: 21 additions & 40 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from fastapi.templating import Jinja2Templates
import requests
from dotenv import load_dotenv
import uvicorn

templates = Jinja2Templates(directory="public")
app = FastAPI(title="API")

Expand All @@ -22,30 +24,26 @@
ACCESS_API_AUTHORIZATION = str(os.getenv("ACCESS_API_AUTHORIZATION"))

async def newFileCreated(file_path: str):
time.sleep(1)
await modifyTxt(file_path)
outputJSON = await processInput(file_path)

Headers = { "Authorization" : PIPELINE_C_AUTHORIZATION, "Access-Authorization": ACCESS_API_AUTHORIZATION }
status = requests.post(PIPELINE_C_URL, json=outputJSON, headers=Headers)
print(status.text)

try:
newFileProcessed = await processInput(file_path)
print(newFileProcessed)
Headers = { "Authorization" : PIPELINE_C_AUTHORIZATION, "Access-Authorization": ACCESS_API_AUTHORIZATION }
r = requests.post(PIPELINE_C_URL, json=[newFileProcessed], headers=Headers)
print(r.content)
print(r)
print(r.json())
except Exception as e:
#Server does not need to freeze everytime an exeption is thrown
print(f"An exception occurred: {str(e)}")
return {"error": str(e)}

dirWatcher = dirWatcher = DirectoryWatcher(
dirWatcher = DirectoryWatcher(
directory=DIRECTORY_TO_WATCH, async_callback=newFileCreated
)


@app.on_event("startup")
async def startEvent():
if not os.path.exists(DIRECTORY_TO_WATCH):
os.mkdir(DIRECTORY_TO_WATCH)

dirWatcher = DirectoryWatcher(
directory=DIRECTORY_TO_WATCH, async_callback=newFileCreated
)
if os.path.exists(DIRECTORY_TO_WATCH):
dirWatcher.start_watching()
dirWatcher.start_watching()


@app.on_event("shutdown")
Expand Down Expand Up @@ -79,7 +77,6 @@ async def get_all_json():
@app.get("/entitymentions")
async def get_json(article: str = Query(..., title="Article Filename")):
path = DIRECTORY_TO_WATCH + article
print(path)
if not os.path.exists(path):
raise HTTPException(status_code=404, detail="Article not found")
try:
Expand All @@ -94,34 +91,13 @@ async def get_json(article: str = Query(..., title="Article Filename")):
async def checklang(request: Request):
data = await request.body()
stringdata = str(data)
print(len(stringdata))
if len(stringdata) < 4:
raise HTTPException(status_code=400, detail="Text is too short")

language = detect(stringdata)

return language


async def modifyTxt(file_path):
try:
with open(file_path, 'r') as file:
content = file.read()
if not content:
print("The file is empty.")
lines = content.split('\n\n')
# List comprehension that adds '. ' to lines not ending with punctuation, else adds a space.
modified_lines = [line + '. ' if not line.endswith(tuple(string.punctuation)) else line + ' ' for line in lines]
file.close()
with open(file_path, 'w') as file:
file.write(' '.join(modified_lines))
file.close()
except FileNotFoundError:
print(f"The file at {file_path} could not be found.")
except Exception as e:
print(f"An error occurred: {e}")


async def processInput(file_path: str = "Artikel.txt"):
text = GetSpacyData.GetText(
file_path
Expand Down Expand Up @@ -158,3 +134,8 @@ async def processInput(file_path: str = "Artikel.txt"):
json.dump(entsJSON.allFiles, entityJson, ensure_ascii=False, indent=4)

return entsJSON.newFile

# Allows this file to be run like so: "python main.py"
# Useful for debugging
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)

0 comments on commit 06e32df

Please sign in to comment.