Skip to content

Commit

Permalink
test: Add non-ASCII character test for web loader
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmigloz committed Nov 29, 2024
1 parent 48e64d5 commit 9f8fe44
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions packages/langchain_community/test/document_loaders/web_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,21 @@ void main() {
expect(metadata['title'], contains('Wikipedia'));
expect(metadata['language'], 'en');
});

test('Test that non-ASCII characters are handled correctly', () async {
const url = 'http://web.tccf.org.tw/lib/addon.php?act=post&id=4975';

const loader = WebBaseLoader([url]);
final loadedDocs = await loader.load();

expect(loadedDocs, hasLength(1));

final loadedDoc = loadedDocs.first;
expect(loadedDoc.pageContent, contains('子宮內膜癌的介紹及治療現況'));

final metadata = loadedDoc.metadata;
expect(metadata['source'], url);
expect(metadata['title'], contains('子宮內膜癌的介紹及治療現況'));
});
});
}

0 comments on commit 9f8fe44

Please sign in to comment.