diff --git a/packages/langchain_community/test/document_loaders/web_test.dart b/packages/langchain_community/test/document_loaders/web_test.dart index 5b4059d0..c1e681f8 100644 --- a/packages/langchain_community/test/document_loaders/web_test.dart +++ b/packages/langchain_community/test/document_loaders/web_test.dart @@ -23,5 +23,21 @@ void main() { expect(metadata['title'], contains('Wikipedia')); expect(metadata['language'], 'en'); }); + + test('Test that non-ASCII characters are handled correctly', () async { + const url = 'http://web.tccf.org.tw/lib/addon.php?act=post&id=4975'; + + const loader = WebBaseLoader([url]); + final loadedDocs = await loader.load(); + + expect(loadedDocs, hasLength(1)); + + final loadedDoc = loadedDocs.first; + expect(loadedDoc.pageContent, contains('子宮內膜癌的介紹及治療現況')); + + final metadata = loadedDoc.metadata; + expect(metadata['source'], url); + expect(metadata['title'], contains('子宮內膜癌的介紹及治療現況')); + }); }); }