-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlang_reco.py
36 lines (27 loc) · 1.06 KB
/
lang_reco.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import nltk
#importing stop words library (or not library !)
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk import word_tokenize
#langues reconnues par python stopwords
language = ['turkish', 'tajik', 'swedish', 'spanish', 'slovene', 'russian', 'romanian', 'portuguese', 'norwegian', 'nepali', 'kazakh', 'italian', 'indonesian', 'hungarian', 'greek', 'german', 'french', 'finnish', 'english', 'dutch', 'danish', 'azerbaijani', 'arabic']
potentiel = []
reconnu = []
dico = {}
def reclangue() :
text = str(input('type here '))
tokenizedtxt = word_tokenize(text)
for lang in language :
stpwrs = (stopwords.words(lang))
for word in tokenizedtxt :
if word in stpwrs :
potentiel.append(lang)
for lng in potentiel :
if lng not in reconnu :
reconnu.append(lng)
for lng in reconnu :
cal = potentiel.count(lng)
dico[lng] = cal
dict(sorted(dico.items(), key=lambda item: item[1]))
return print(max(dico, key=dico.get))
reclangue()