-
Notifications
You must be signed in to change notification settings - Fork 48
Laboratory work #3, Vladislava Tsvetkova - 22FPL2 #165
Conversation
lab_3_generate_by_ngrams/main.py
Outdated
@@ -26,7 +24,7 @@ def __init__(self, end_of_word_token: str) -> None: | |||
end_of_word_token (str): A token denoting word boundary | |||
""" | |||
self._end_of_word_token = end_of_word_token | |||
self._storage = {end_of_word_token: 0} | |||
self._storage = {'_': 0} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What '_'
is ?
lab_3_generate_by_ngrams/main.py
Outdated
return token[0] | ||
|
||
return None | ||
return self._storage[element] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are you sure?
for token in element: | ||
if token.isalpha(): | ||
self._put(token) | ||
if token in (' ', self._end_of_word_token): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you create it on each iteratiuon. Why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Create it once before loop
@@ -1 +1 @@ | |||
10 | |||
0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed
@@ -105,13 +104,9 @@ def get_token(self, element_id: int) -> Optional[str]: | |||
""" | |||
if not isinstance(element_id, int): | |||
return None | |||
if element_id not in self._storage.values()): | |||
return None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where is the imlementation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You removed it. Right now only checks are on the function
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's here now
print(greedy_text_generator.run(51, 'Vernon')) | ||
n_gram_language_model = NGramLanguageModel(encoded[:100], 7) | ||
print(n_gram_language_model.build()) | ||
greedy_text_generator = GreedyTextGenerator(n_gram_language_model, text_processor) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add build, please
for token in element: | ||
if token.isalpha(): | ||
self._put(token) | ||
if token in (' ', self._end_of_word_token): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Create it once before loop
lab_3_generate_by_ngrams/main.py
Outdated
@@ -272,6 +260,7 @@ class NGramLanguageModel: | |||
_encoded_corpus (tuple): Encoded text | |||
""" | |||
|
|||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove
lab_3_generate_by_ngrams/main.py
Outdated
max_freq_tokens = [token for token, freq in tokens.items() if freq == max_freq] | ||
max_freq_tokens = sorted(max_freq_tokens, reverse=True) | ||
encoded_prompt += (max_freq_tokens[0],) | ||
best_predictions = [token for token, freq in next_tokens.items() if freq == max(next_tokens.values())] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can calculate max
only once, not in the loop
@@ -105,13 +104,9 @@ def get_token(self, element_id: int) -> Optional[str]: | |||
""" | |||
if not isinstance(element_id, int): | |||
return None | |||
if element_id not in self._storage.values()): | |||
return None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You removed it. Right now only checks are on the function
lab_3_generate_by_ngrams/main.py
Outdated
|
||
for n_gram in set(n_grams): | ||
number_of_n_grams = n_grams.count(n_gram) | ||
context_count = len([context for context in n_grams |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use Count
No description provided.