diff --git a/lparchive2epub/lib.py b/lparchive2epub/lib.py index 036f7fe..256a575 100644 --- a/lparchive2epub/lib.py +++ b/lparchive2epub/lib.py @@ -20,7 +20,7 @@ class Chapters: new_href: str def __hash__(self): - return hash((self.original_href, str(self.txt.string))) + return hash((self.original_href, str(self.txt.text))) def __post_init__(self): self.sort_index = self.num @@ -75,10 +75,10 @@ def intro(url: str, p: BeautifulSoup) -> Intro: content = p.find("div", id="content") chapters = Extractor.all_chapters(url, p) for c in chapters: - a = content.find("a", text=str(c.txt.string)) + a = content.find("a", string=str(c.txt)) a['href'] = c.new_href - images = Extractor.all_images("introduction", content) + images = Extractor.all_images(content) return Intro( title=title, language=language, author=author, intro=content, images=images, chapters=chapters @@ -101,7 +101,7 @@ def build_chapter(chap: Tuple[int, BeautifulSoup]) -> Chapters: return Chapters( num=i, original_href=original_href, - txt=c, + txt=c.text, new_href=f"update_{i}.xhtml" ) @@ -110,7 +110,7 @@ def build_chapter(chap: Tuple[int, BeautifulSoup]) -> Chapters: return chapters @staticmethod - def all_images(prefix: str, content: BeautifulSoup) -> List[Image]: + def all_images(content: BeautifulSoup) -> List[Image]: images = content.find_all("img") r = [] for i, x in enumerate(images): @@ -125,7 +125,7 @@ def all_images(prefix: str, content: BeautifulSoup) -> List[Image]: @staticmethod def get_update(prefix: str, p: BeautifulSoup) -> Update: content = p.find("div", id="content") - images = Extractor.all_images(prefix, content) + images = Extractor.all_images(content) return Update(content=content, images=images) @@ -184,7 +184,7 @@ def replace_img_name(content: BeautifulSoup, image: IndexedEpubImage) -> Beautif return content async def build_update(session: aiohttp.ClientSession, chapter: Chapters, data: Update, intro: Intro) -> Page: - update_chapter = epub.EpubHtml(title=str(chapter.txt.string), file_name=chapter.new_href, + update_chapter = epub.EpubHtml(title=str(chapter.txt), file_name=chapter.new_href, lang=intro.language) # TODO: fix language update_chapter.add_item(get_style_item()) diff --git a/tests/test_lparchive2epub.py b/tests/test_lparchive2epub.py index 64946d4..c3d8582 100644 --- a/tests/test_lparchive2epub.py +++ b/tests/test_lparchive2epub.py @@ -40,178 +40,237 @@ def test_extractor_chapters(): b = BeautifulSoup(re1, "html.parser") chapters = Extractor.all_chapters("https://lparchive.org/Resident-Evil-1", b) assert len(chapters) == 58 - assert chapters[0] == Chapters(new_href="update_0.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%201/', txt='Foreword:') - assert chapters[1] == Chapters(new_href="update_1.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%202/', + assert chapters[0] == Chapters(num=0, + new_href="update_0.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%201', + txt='Foreword:') + assert chapters[1] == Chapters(num=1, + new_href="update_1.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%202', txt="Episode I: C'mon C'mon") - assert chapters[2] == Chapters(new_href="update_2.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%203/', + assert chapters[2] == Chapters(num=2, + new_href="update_2.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%203', txt='Episode II: Break on Through to the Other Side') - assert chapters[3] == Chapters(new_href="update_3.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%204/', + assert chapters[3] == Chapters(num=3, + new_href="update_3.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%204', txt='Episode III: Monster Mash') - assert chapters[4] == Chapters(new_href="update_4.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%205/', + assert chapters[4] == Chapters(num=4, + new_href="update_4.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%205', txt='Episode IV: Walk This Way') - assert chapters[5] == Chapters(new_href="update_5.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%206/', + assert chapters[5] == Chapters(num=5, + new_href="update_5.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%206', txt='Episode V: With a Little Help from My Friends') - assert chapters[6] == Chapters(new_href="update_6.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%207/', + assert chapters[6] == Chapters(num=6, + new_href="update_6.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%207', txt="Episode VI: You've Got A Friend In Me") - assert chapters[7] == Chapters(new_href="update_7.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%208/', + assert chapters[7] == Chapters(num=7, + new_href="update_7.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%208', txt='Episode VII: The Devil Went Down to Georgia') - assert chapters[8] == Chapters(new_href="update_8.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%209/', + assert chapters[8] == Chapters(num=8, + new_href="update_8.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%209', txt='Episode VIII: Hurt') - assert chapters[9] == Chapters(new_href="update_9.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2010/', + assert chapters[9] == Chapters(num=9, + new_href="update_9.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2010', txt='Episode IX: The Best Things in Life are Free') - assert chapters[10] == Chapters(new_href="update_10.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2011/', + assert chapters[10] == Chapters(num=10, + new_href="update_10.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2011', txt='Episode X: Another One Bites the Dust') - assert chapters[11] == Chapters(new_href="update_11.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2012/', + assert chapters[11] == Chapters(num=11, + new_href="update_11.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2012', txt='Episode XI: Moonlight Sonata') - assert chapters[12] == Chapters(new_href="update_12.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2013/', + assert chapters[12] == Chapters(num=12, + new_href="update_12.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2013', txt="Episode XII: I've Gotta Get a Message to You") - assert chapters[13] == Chapters(new_href="update_13.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2014/', + assert chapters[13] == Chapters(num=13, + new_href="update_13.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2014', txt='Episode XIII: Communication Breakdown') - assert chapters[14] == Chapters(new_href="update_14.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2015/', + assert chapters[14] == Chapters(num=14, + new_href="update_14.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2015', txt='Episode XIV: Thriller') - assert chapters[15] == Chapters(new_href="update_15.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2016/', + assert chapters[15] == Chapters(num=15, + new_href="update_15.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2016', txt='Tangent: Adagio') - assert chapters[16] == Chapters(new_href="update_16.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2017/', + assert chapters[16] == Chapters(num=16, + new_href="update_16.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2017', txt='Tangent: Baroque') - assert chapters[17] == Chapters(new_href="update_17.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2018/', + assert chapters[17] == Chapters(num=17, + new_href="update_17.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2018', txt='Tangent: Cadenza') - assert chapters[18] == Chapters(new_href="update_18.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2019/', + assert chapters[18] == Chapters(num=18, + new_href="update_18.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2019', txt='Tangent: Dissonance') - assert chapters[19] == Chapters(new_href="update_19.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2020/', + assert chapters[19] == Chapters(num=19, + new_href="update_19.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2020', txt='Tangent: Dissonance B') - assert chapters[20] == Chapters(new_href="update_20.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2021/', + assert chapters[20] == Chapters(num=20, + new_href="update_20.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2021', txt='Tangent: Dissonance C') - assert chapters[21] == Chapters(new_href="update_21.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2022/', + assert chapters[21] == Chapters(num=21, + new_href="update_21.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2022', txt='Tangent: Dissonance D') - assert chapters[22] == Chapters(new_href="update_22.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2023/', + assert chapters[22] == Chapters(num=22, + new_href="update_22.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2023', txt='Tangent: Dissonance E') - assert chapters[23] == Chapters(new_href="update_23.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2024/', + assert chapters[23] == Chapters(num=23, + new_href="update_23.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2024', txt='Tangent: Etude') - assert chapters[24] == Chapters(new_href="update_24.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2025/', + assert chapters[24] == Chapters(num=24, + new_href="update_24.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2025', txt='Tangent: Etude B') - assert chapters[25] == Chapters(new_href="update_25.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2026/', + assert chapters[25] == Chapters(num=25, + new_href="update_25.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2026', txt='Tangent: Etude C') - assert chapters[26] == Chapters(new_href="update_26.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2027/', + assert chapters[26] == Chapters(num=26, + new_href="update_26.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2027', txt='Episode XV: Mexican Radio') - assert chapters[27] == Chapters(new_href="update_27.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2028/', + assert chapters[27] == Chapters(num=27, + new_href="update_27.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2028', txt='Episode XVI: Wishing You Were Here') - assert chapters[28] == Chapters(new_href="update_28.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2029/', + assert chapters[28] == Chapters(num=28, + new_href="update_28.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2029', txt='Chapter 29') - assert chapters[29] == Chapters(new_href="update_29.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2030/', + assert chapters[29] == Chapters(num=29, + new_href="update_29.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2030', txt='Episode XVII: Funky Town') - assert chapters[30] == Chapters(new_href="update_30.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2031/', + assert chapters[30] == Chapters(num=30, + new_href="update_30.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2031', txt='Episode XVIII: Beneath the Sea') - assert chapters[31] == Chapters(new_href="update_31.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2032/', + assert chapters[31] == Chapters(num=31, + new_href="update_31.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2032', txt='Episode XIX: Somewhere Over the Rainbow') - assert chapters[32] == Chapters(new_href="update_32.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2033/', + assert chapters[32] == Chapters(num=32, + new_href="update_32.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2033', txt='Episode XX: Another Brick in the Wall') - assert chapters[33] == Chapters(new_href="update_33.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2034/', + assert chapters[33] == Chapters(num=33, + new_href="update_33.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2034', txt='Episode XXI: Black Dog') - assert chapters[34] == Chapters(new_href="update_34.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2035/', + assert chapters[34] == Chapters(num=34, + new_href="update_34.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2035', txt='Episode XXII: Gotta Get Away') - assert chapters[35] == Chapters(new_href="update_35.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2036/', + assert chapters[35] == Chapters(num=35, + new_href="update_35.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2036', txt='Episode XXIII: Dazed and Confused') - assert chapters[36] == Chapters(new_href="update_36.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2037/', + assert chapters[36] == Chapters(num=36, + new_href="update_36.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2037', txt="Episode XXIV: It Won't Be Long") - assert chapters[37] == Chapters(new_href="update_37.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2038/', + assert chapters[37] == Chapters(num=37, + new_href="update_37.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2038', txt='Episode XV: Comfortably Numb') - assert chapters[38] == Chapters(new_href="update_38.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2039/', + assert chapters[38] == Chapters(num=38, + new_href="update_38.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2039', txt="Episode XXVI: Blowin' in the Wind") - assert chapters[39] == Chapters(new_href="update_39.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2040/', + assert chapters[39] == Chapters(num=39, + new_href="update_39.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2040', txt="Episode XXVII: Don't Stop 'Til You Get Enough") - assert chapters[40] == Chapters(new_href="update_40.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2041/', + assert chapters[40] == Chapters(num=40, + new_href="update_40.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2041', txt='Episode XXVIII: Already Gone') - assert chapters[41] == Chapters(new_href="update_41.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2042/', + assert chapters[41] == Chapters(num=41, + new_href="update_41.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2042', txt='Episode XXIX: Losing my Religion') - assert chapters[42] == Chapters(new_href="update_42.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2043/', + assert chapters[42] == Chapters(num=42, + new_href="update_42.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2043', txt='Episode XXX: Everybody Hurts') - assert chapters[43] == Chapters(new_href="update_43.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2044/', + assert chapters[43] == Chapters(num=43, + new_href="update_43.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2044', txt="Bonus Episode: Wesker's Report II (Part I)") - assert chapters[44] == Chapters(new_href="update_44.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2045/', + assert chapters[44] == Chapters(num=44, + new_href="update_44.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2045', txt='Bonus Episode: Wesker\'s Report II - "Alexia 1"') - assert chapters[45] == Chapters(new_href="update_45.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2046/', + assert chapters[45] == Chapters(num=45, + new_href="update_45.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2046', txt='Bonus Episode: Wesker\'s Report II - "Alexia 2"') - assert chapters[46] == Chapters(new_href="update_46.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2047/', + assert chapters[46] == Chapters(num=46, + new_href="update_46.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2047', txt='Bonus Episode: Wesker\'s Report II - "Nemesis"') - assert chapters[47] == Chapters(new_href="update_47.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2048/', + assert chapters[47] == Chapters(num=47, + new_href="update_47.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2048', txt='Bonus Episode: Wesker\'s Report II - "G-Virus"') - assert chapters[48] == Chapters(new_href="update_48.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2049/', + assert chapters[48] == Chapters(num=48, + new_href="update_48.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2049', txt='Episode XXXI: Weird Science') - assert chapters[49] == Chapters(new_href="update_49.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2050/', + assert chapters[49] == Chapters(num=49, + new_href="update_49.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2050', txt='Episode XXXII: Video Killed the Radio Star') - assert chapters[50] == Chapters(new_href="update_50.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2051/', + assert chapters[50] == Chapters(num=50, + new_href="update_50.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2051', txt='Episode XXXIII: Walking on Sunshine') - assert chapters[51] == Chapters(new_href="update_51.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2052/', + assert chapters[51] == Chapters(num=51, + new_href="update_51.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2052', txt="XXXIV: Singin' in the Rain") - assert chapters[52] == Chapters(new_href="update_52.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2053/', + assert chapters[52] == Chapters(num=52, + new_href="update_52.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2053', txt="Episode XXXV: Knockin' on Heaven's Door") - assert chapters[53] == Chapters(new_href="update_53.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2054/', + assert chapters[53] == Chapters(num=53, + new_href="update_53.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2054', txt='Episode XXXVI: Helter Skelter') - assert chapters[54] == Chapters(new_href="update_54.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2055/', + assert chapters[54] == Chapters(num=54, + new_href="update_54.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2055', txt='Episode XXXVII: Stairway to Heaven') - assert chapters[55] == Chapters(new_href="update_55.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2056/', + assert chapters[55] == Chapters(num=55, + new_href="update_55.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2056', txt='Episode XXXVIII: We Will Rock You') - assert chapters[56] == Chapters(new_href="update_56.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2057/', + assert chapters[56] == Chapters(num=56, + new_href="update_56.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2057', txt="Episode XXXIX: Don't Fear the Reaper") - assert chapters[57] == Chapters(new_href="update_57.xhtml", - original_href='https://lparchive.org/Resident-Evil-1/Update%2058/', + assert chapters[57] == Chapters(num=57, + new_href="update_57.xhtml", + original_href='https://lparchive.org/Resident-Evil-1/Update%2058', txt='Episode XL: Paint it, Black') b = BeautifulSoup(headshoots, "html.parser")