diff --git a/cpp/Makefile b/cpp/Makefile new file mode 100644 index 0000000..94f8968 --- /dev/null +++ b/cpp/Makefile @@ -0,0 +1,28 @@ +CC := g++ +CFLAGS := -Wall -std=c++11 -Iinclude/SDL2 +LDFLAGS := -lSDL2 -lSDL2_image + +SRC_DIR := src +OBJ_DIR := build +BIN_DIR := bin + +SRC := $(wildcard $(SRC_DIR)/*.cpp) +OBJ := $(patsubst $(SRC_DIR)/%.cpp, $(OBJ_DIR)/%.o, $(SRC)) +EXECUTABLE := $(BIN_DIR)/test + +.PHONY: all clean run + +all: clean $(EXECUTABLE) + +$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp + @mkdir -p $(@D) + $(CC) $(CFLAGS) -c $< -o $@ + +$(EXECUTABLE): $(OBJ) + $(CC) $^ -o $@ $(LDFLAGS) + +clean: + rm -f $(OBJ_DIR)/*.o $(EXECUTABLE) + +run: $(EXECUTABLE) + ./$(EXECUTABLE) diff --git a/cpp/bin/test b/cpp/bin/test new file mode 100755 index 0000000..7bc515f Binary files /dev/null and b/cpp/bin/test differ diff --git a/src/cpp/main.cpp b/cpp/src/main.cpp similarity index 68% rename from src/cpp/main.cpp rename to cpp/src/main.cpp index 12f2057..8cbc2c5 100644 --- a/src/cpp/main.cpp +++ b/cpp/src/main.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include using namespace std; @@ -57,6 +59,16 @@ int iterative_lev(const string& a, const string& b, bool print_matrix = false){ return d[a_len][b_len]; } +string simplyfy(const string& word){ + string reencoded; + wstring_convert> converter; + wstring normalized = converter.from_bytes(word); + for (wchar_t c : normalized){ + if (c < 128) reencoded += tolower(c); + } + return reencoded; +} + double similarity_percentage(const string& a, const string& b){ if (a.length() == 0 || b.length() == 0) return 0; int lev_dist = iterative_lev(a, b); @@ -64,9 +76,23 @@ double similarity_percentage(const string& a, const string& b){ return 1 - lev_dist / max_len; } +bool word_in_word(const string& a, const string& b){ + if (a.length() == 0 || b.length() == 0) return false; + if (a.length() > b.length()) return false; + for (int i = 0; i < b.length() - a.length() + 1; i++){ + if (b.substr(i, a.length()) == a) return true; + } + return false; +} +// format is -> title;;link;;date +const string news_path = "../../local_data/security_news.csv"; +void check_word_news(const string& word, double limit = 0.72){ + +} + int main(){ - iterative_lev("4321", "1234", true); - + cout << "Ertuğrul Şentürk" << endl; + cout << simplyfy("Ertuğrul Şentürk") << endl; return 0; } diff --git a/src/python3/data_scrapping.py b/python3/data_scrapping.py similarity index 100% rename from src/python3/data_scrapping.py rename to python3/data_scrapping.py diff --git a/src/python3/main.py b/python3/main.py similarity index 83% rename from src/python3/main.py rename to python3/main.py index 276879a..e86ad9d 100644 --- a/src/python3/main.py +++ b/python3/main.py @@ -41,32 +41,26 @@ def iterative_lev(a:str, b:str, print_matrix:bool = False) -> int: print() print(f'Distance: {d[a_len][b_len]}') return d[a_len][b_len] -""" -double similarity_percentage(const string& a, const string& b){ - if (a.length() == 0 || b.length() == 0) return 0; - int lev_dist = iterative_lev(a, b); - double max_len = max(a.length(), b.length()); - return 1 - lev_dist / max_len; -} -""" from unicodedata import normalize -def simplyfy_string(s_input:str) -> str: +def simplyfy(s_input:str) -> str: normalized = normalize('NFKD', s_input) reencoded = normalized.encode('ascii', 'ignore').decode('utf-8') return reencoded.lower() + def similarity_percentage(a:str, b:str) -> float: if len(a) == 0 or len(b) == 0: return 0 - a = simplyfy_string(a) - b = simplyfy_string(b) + a = simplyfy(a) + b = simplyfy(b) return 1 - iterative_lev(a, b) / max(len(a), len(b)) + def word_in_word(word:str, word_in:str) -> bool: if len(word) == 0 or len(word_in) == 0: return False - word = simplyfy_string(word) - word_in = simplyfy_string(word_in) + word = simplyfy(word) + word_in = simplyfy(word_in) return word in word_in @@ -79,8 +73,8 @@ def check_word_eng_dict(word:str): print(f'Word: {line[0]} - Similarity: %{similarity * 100} - Meaning: {line[2]}') -def check_word_news(word:str, limit:int = 0.72): - with open('security_news.csv', 'r') as file: +def check_word_news(word:str, limit:int = 0.72) -> None: + with open('../../local_data/security_news.csv', 'r') as file: for line in file: line = line.split(';;') title_pieces = line[0].split() @@ -92,6 +86,7 @@ def check_word_news(word:str, limit:int = 0.72): print(f'Similarity: %{(similarity*100):.2f}') print(f'Link: {line[1]}') + def main(): # General test for functions above """ @@ -115,12 +110,10 @@ def main(): print(similarity_percentage("asd", "asddd")) print(similarity_percentage("asd", "asdddd")) - print(simplyfy_string("Staré Město")) + print(simplyfy("Staré Město")) """ - #check_word_news("Google") - #print(simplyfy_string('Ertuğrul Şentürk')) - - + check_word_news("gpt") + #print(simplyfy('Ertuğrul Şentürk')) if __name__ == '__main__': diff --git a/src/cpp/Makefile b/src/cpp/Makefile deleted file mode 100644 index 0ffef85..0000000 --- a/src/cpp/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -all: test - -test: main.cpp - g++ main.cpp -o test - -run: test - ./test - -clean: - rm -f test \ No newline at end of file