Skip to content

Commit

Permalink
for 4
Browse files Browse the repository at this point in the history
  • Loading branch information
RostislavHmelevski committed Jun 2, 2024
1 parent 32a90fb commit 600e3f6
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
3 changes: 2 additions & 1 deletion lab_5_scrapper/scrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def _fill_article_with_meta_information(self, article_soup: BeautifulSoup) -> No
topics = article_soup.find(class_="argcat")
self.article.topics = topics.text.replace('\n', '')
time = article_soup.find('time', itemprop="datePublished")
self.article.time = time.text.replace('\n', '')
self.article.time = self.unify_date_format(time.attrs.get('datetime'))

def unify_date_format(self, date_str: str) -> datetime.datetime:
"""
Expand All @@ -339,6 +339,7 @@ def unify_date_format(self, date_str: str) -> datetime.datetime:
datetime.datetime: Datetime object
"""


def parse(self) -> Union[Article, bool, list]:
"""
Parse each article.
Expand Down
9 changes: 6 additions & 3 deletions lab_6_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Pipeline for CONLL-U formatting.
"""
# pylint: disable=too-few-public-methods, unused-import, undefined-variable, too-many-nested-blocks
import pathlib
import stanza

try:
Expand All @@ -11,9 +10,13 @@
DiGraph = None # type: ignore
print('No libraries installed. Failed to import.')

from core_utils.article.article import Article, get_article_id_from_filepath, split_by_sentence
import pathlib
import spacy_udpipe

from core_utils.article.article import (Article, ArtifactType, get_article_id_from_filepath,
split_by_sentence)
from core_utils.article.io import from_raw, to_cleaned
from core_utils.constants import ASSETS_PATH
from core_utils.constants import ASSETS_PATH, UDPIPE_MODEL_PATH
from core_utils.pipeline import (AbstractCoNLLUAnalyzer, CoNLLUDocument, LibraryWrapper,
PipelineProtocol, StanzaDocument, TreeNode)

Expand Down
16 changes: 8 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
beautifulsoup4==4.12.3
bs4==0.0.2
requests~=2.31.0
pydantic~=2.7.2
DateTime~=5.5
stanza~=1.8.2
spacy~=3.7.4
networkx~=3.2.1
pillow~=10.3.0
matplotlib~=3.8.4
datetime==5.5
matplotlib==3.8.4
networkx==3.2.1
pillow==10.3.0
pydantic==2.7.2
requests==2.31.0
spacy==3.7.4
stanza==1.8.2

0 comments on commit 600e3f6

Please sign in to comment.