Skip to content

Commit

Permalink
update to endpoints
Browse files Browse the repository at this point in the history
- rewrite entity mentions endpoint response to be the file processed, not all  entitymentions (formerly /{articlename}/entities)
- added /entitymentions/all endpoint to get the entire entitymentions file
  • Loading branch information
FredTheNoob committed Nov 24, 2023
1 parent ea50934 commit 7ea46fe
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 59 deletions.
24 changes: 2 additions & 22 deletions components/EntityLinker.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,12 @@
from typing import List
from Levenshtein import distance
from components import Db
from lib.EntityLinked import EntityLinked
from lib.Entity import Entity
from fuzzywuzzy import fuzz


def GetAllEntities(entityMentions):
allEntities = []
fileName = ""
for file in entityMentions:
fileName = file["fileName"]
for sentence in file["sentences"]:
for entity in sentence["entityMentions"]:
newEntity = Entity(
name=entity["name"],
startIndex=entity["startIndex"],
endIndex=entity["endIndex"],
sentence=sentence["sentence"],
sentenceStartIndex=sentence["sentenceStartIndex"],
sentenceEndIndex=sentence["sentenceEndIndex"],
label=entity["label"],
type=entity["type"],
)
allEntities.append(newEntity)
return allEntities


async def entitylinkerFunc(entities, threshold=80):
async def entitylinkerFunc(entities: List[Entity], threshold:int=80):
iri_dict = {}
linked_entities = []
db_path = "./Database/DB.db"
Expand Down
17 changes: 9 additions & 8 deletions components/GetSpacyData.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import spacy, json, os
import json, os
import sys
from langdetect import detect
from typing import List
Expand All @@ -7,6 +7,7 @@
from lib.Exceptions.UndetectedLanguageException import (
UndetectedLanguageException,
)
from lib.JSONEntityOutput import JSONEntityOutput

sys.path.append(".")
from lib.Entity import Entity
Expand All @@ -17,8 +18,8 @@
nlp_da = da_core_news_lg.load()


# GetText skal få text fra pipeline del A
def GetText(title):
# GetText shall get text from pipeline del A
def GetText(title: str):
file = open(title, "r")

stringWithText = file.read()
Expand All @@ -27,7 +28,7 @@ def GetText(title):
return stringWithText


def GetTokens(text):
def GetTokens(text: str):
result = DetectLang(text)
if result == "da":
return nlp_da(text)
Expand All @@ -37,14 +38,14 @@ def GetTokens(text):
raise UndetectedLanguageException()


def DetectLang(text):
def DetectLang(text: str):
stringdata = str(text)
language = detect(stringdata)
return language


# Method to fully extract entity mentions, find the sentences and calculate indexes and finally create a final JSON
def BuildJSONFromEntities(entities: List[EntityLinked], doc, fileName: str):
def BuildJSONFromEntities(entities: List[EntityLinked], doc, fileName: str) -> JSONEntityOutput:
# Create a list of sentences with their entities in the desired JSON format
currentJson = open("./entity_mentions.json", "r")
currentJson.seek(0, os.SEEK_END)
Expand Down Expand Up @@ -88,12 +89,12 @@ def BuildJSONFromEntities(entities: List[EntityLinked], doc, fileName: str):
if len(currentJson) != 0:
for index in currentJson:
if index["fileName"] == final_json["fileName"]:
return currentJson
return JSONEntityOutput(final_json, currentJson)
else:
currentJson.append(final_json)
else:
currentJson.append(final_json)
return currentJson
return JSONEntityOutput(final_json, currentJson)

def GetEntities(doc) -> List[Entity]:
entities = []
Expand Down
4 changes: 4 additions & 0 deletions lib/JSONEntityOutput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class JSONEntityOutput:
def __init__(self, newFile, allFiles):
self.newFile = newFile
self.allFiles = allFiles
45 changes: 21 additions & 24 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,55 +16,51 @@
DIRECTORY_TO_WATCH = "data_from_A/"

async def newFileCreated(file_path: str):
await main(file_path)
await processInput(file_path)

dirWatcher = DirectoryWatcher(directory=DIRECTORY_TO_WATCH, async_callback=newFileCreated)


@app.on_event("startup")
async def startEvent():
dirWatcher.start_watching()


@app.on_event("shutdown")
def shutdown_event():
dirWatcher.stop_watching()


app.mount(
"/static",
StaticFiles(directory="static"),
name="static",
)


@app.get('/')
async def root(request: Request):
return templates.TemplateResponse(
"index.html", {"request": request}
)

@app.get("/entitymentions/all")
async def get_all_json():
if not os.path.exists("entity_mentions.json"):
raise HTTPException(status_code=404, detail="mentions not found")

with open("entity_mentions.json", "r") as entity_json:
entity_mentions = json.load(entity_json)
return entity_mentions

@app.get("/entitymentions")
async def get_json(article: str = Query(..., title="Article Filename")):
path = DIRECTORY_TO_WATCH + article
if not os.path.exists(path):
raise HTTPException(status_code=404, detail="Article not found")

await main(path) # Pass the article parameter to your main function
with open("entity_mentions.json", "r") as entity_json:
entity_mentions = json.load(entity_json)
return entity_mentions


@app.get("/{articlename}/entities")
async def getentities(articlename: str):
await main()
with open("entity_mentions.json", "r") as entityJson:
entityMentions = json.load(entityJson)
for elem in entityMentions:
path = elem["fileName"]
name = path.split("/")
if name[-1] == articlename:
return elem
raise HTTPException(status_code=404, detail="Article not found")

newFile = await processInput(path)
return newFile

@app.post("/detectlanguage")
async def checklang(request: Request):
Expand All @@ -79,8 +75,9 @@ async def checklang(request: Request):
return language


async def main(file_path: str = "Artikel.txt"):
open("entity_mentions.json", "w").close()
async def processInput(file_path: str = "Artikel.txt"):
if not os.path.exists("entity_mentions.json"):
open("entity_mentions.json", "w").close()

text = GetSpacyData.GetText(
file_path
Expand All @@ -104,8 +101,6 @@ async def main(file_path: str = "Artikel.txt"):
doc
) # construct entities from text

# To prevent appending challenges, the final JSON is created in GetEntities()
# entMentions= GetSpacyData.entityMentionJson(ents) #Returns JSON object containing an array of entity mentions
await Db.InitializeIndexDB(
"./Database/DB.db"
) # makes the DB containing the entities of KG
Expand All @@ -122,4 +117,6 @@ async def main(file_path: str = "Artikel.txt"):
)

with open("entity_mentions.json", "w", encoding="utf8") as entityJson:
json.dump(entsJSON, entityJson, ensure_ascii=False, indent=4)
json.dump(entsJSON.allFiles, entityJson, ensure_ascii=False, indent=4)

return entsJSON.newFile
17 changes: 13 additions & 4 deletions tests/integration/test_GetJSON.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,20 @@ async def test_SlashEntityMentionsIsUp():


@pytest.mark.asyncio
async def test_SlashEntityMentionsReturnsJsonArray():
async def test_SlashEntityMentionsAllReturnsJsonArray():
with TestClient(app) as client:
res = client.get("/entitymentions/all")
print(type(res.json()))
assert type(res.json()) == list
assert type(res.json()[0]) == dict
client.__exit__
client.close()

@pytest.mark.asyncio
async def test_SlashEntityMentionsReturnsJson():
with patch('main.DIRECTORY_TO_WATCH', 'data_from_A/'):
with TestClient(app) as client:
res = client.get("/entitymentions?article=test.txt")
assert type(res.json()) == list
assert type(res.json()[0]) == dict
assert type(res.json()) == dict
client.__exit__
client.close()
client.close()
4 changes: 3 additions & 1 deletion tests/unit/test_GetSpacyData.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,14 @@ def test_GetEntities():
)
)

entsJSON = GetSpacyData.BuildJSONFromEntities(
entsJSONOutput = GetSpacyData.BuildJSONFromEntities(
entLinks,
docFile,
filename
)

entsJSON = entsJSONOutput.allFiles

testIndex = 0
for i in range(len(entsJSON)):
if entsJSON[i]["fileName"] == "Testing2023":
Expand Down

0 comments on commit 7ea46fe

Please sign in to comment.