Skip to content

Commit

Permalink
Seperate main.py and main functionalities
Browse files Browse the repository at this point in the history
  • Loading branch information
erthium committed Jan 14, 2024
1 parent 0d3f1af commit 629c7d6
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 64 deletions.
68 changes: 68 additions & 0 deletions python3/distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
"""

def recursive_lev(a:str, b:str) -> int:
if len(a) == 0: return len(b)
if len(b) == 0: return len(a)
if a[0] == b[0]: return recursive_lev(a[1:], b[1:])
return 1 + min([
recursive_lev(a[1:], b),
recursive_lev(a, b[1:]),
recursive_lev(a[1:], b[1:])
])


def iterative_lev(a:str, b:str, print_matrix:bool = False) -> int:
a_len = len(a)
b_len = len(b)

if a_len == 0: return len(b)
if b_len == 0: return len(a)

d = [[0 for _ in range(b_len + 1)] for _ in range(a_len + 1)]

for i in range(1, a_len + 1):
d[i][0] = i

for j in range(1, b_len + 1):
d[0][j] = j

for i in range(1, a_len + 1):
for j in range(1, b_len + 1):
sub_cost = 1
if a[i - 1] == b[j - 1]: sub_cost = 0
d[i][j] = min([
d[i - 1][j] + 1,
d[i][j - 1] + 1,
d[i - 1][j - 1] + sub_cost
])

if print_matrix:
for i in range(a_len + 1):
for j in range(b_len + 1):
print(d[i][j], end=' ')
print()
print(f'Distance: {d[a_len][b_len]}')
return d[a_len][b_len]


def simplyfy(s_input:str) -> str:
from unicodedata import normalize
normalized = normalize('NFKD', s_input)
reencoded = normalized.encode('ascii', 'ignore').decode('utf-8')
return reencoded.lower()


def similarity_percentage(a:str, b:str) -> float:
if len(a) == 0 or len(b) == 0: return 0
a = simplyfy(a)
b = simplyfy(b)
return 1 - iterative_lev(a, b) / max(len(a), len(b))


def word_in_word(word:str, word_in:str) -> bool:
if len(word) == 0 or len(word_in) == 0: return False
word = simplyfy(word)
word_in = simplyfy(word_in)
return word in word_in
69 changes: 5 additions & 64 deletions python3/main.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,8 @@
def recursive_lev(a:str, b:str) -> int:
if len(a) == 0: return len(b)
if len(b) == 0: return len(a)
if a[0] == b[0]: return recursive_lev(a[1:], b[1:])
return 1 + min([
recursive_lev(a[1:], b),
recursive_lev(a, b[1:]),
recursive_lev(a[1:], b[1:])
])
from distance import similarity_percentage, word_in_word, simplyfy
import os


def iterative_lev(a:str, b:str, print_matrix:bool = False) -> int:
a_len = len(a)
b_len = len(b)

if a_len == 0: return len(b)
if b_len == 0: return len(a)

d = [[0 for _ in range(b_len + 1)] for _ in range(a_len + 1)]

for i in range(1, a_len + 1):
d[i][0] = i

for j in range(1, b_len + 1):
d[0][j] = j

for i in range(1, a_len + 1):
for j in range(1, b_len + 1):
sub_cost = 1
if a[i - 1] == b[j - 1]: sub_cost = 0
d[i][j] = min([
d[i - 1][j] + 1,
d[i][j - 1] + 1,
d[i - 1][j - 1] + sub_cost
])

if print_matrix:
for i in range(a_len + 1):
for j in range(b_len + 1):
print(d[i][j], end=' ')
print()
print(f'Distance: {d[a_len][b_len]}')
return d[a_len][b_len]


def simplyfy(s_input:str) -> str:
from unicodedata import normalize
normalized = normalize('NFKD', s_input)
reencoded = normalized.encode('ascii', 'ignore').decode('utf-8')
return reencoded.lower()


def similarity_percentage(a:str, b:str) -> float:
if len(a) == 0 or len(b) == 0: return 0
a = simplyfy(a)
b = simplyfy(b)
return 1 - iterative_lev(a, b) / max(len(a), len(b))


def word_in_word(word:str, word_in:str) -> bool:
if len(word) == 0 or len(word_in) == 0: return False
word = simplyfy(word)
word_in = simplyfy(word_in)
return word in word_in
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
NEWS_PATH = os.path.join(SCRIPT_DIR, '../local_data/security_news.csv')


def check_word_eng_dict(word:str):
Expand All @@ -74,7 +15,7 @@ def check_word_eng_dict(word:str):


def check_word_news(word:str, limit:int = 0.72) -> None:
with open('../../local_data/security_news.csv', 'r') as file:
with open(NEWS_PATH, 'r') as file:
for line in file:
line = line.split(';;')
title_pieces = line[0].split()
Expand Down

0 comments on commit 629c7d6

Please sign in to comment.