Skip to content

Commit

Permalink
fix(community): hide console errors on RecursiveUrlLoader (#7679)
Browse files Browse the repository at this point in the history
Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
  • Loading branch information
jfromaniello and jacoblee93 authored Feb 11, 2025
1 parent 6bb5db8 commit ed298b2
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
/* eslint-disable no-process-env */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import { test } from "@jest/globals";
import { test, jest } from "@jest/globals";
import { compile } from "html-to-text";
import { RecursiveUrlLoader } from "../web/recursive_url.js";

describe("RecursiveUrlLoader", () => {
beforeEach(() => {
jest.spyOn(console, "error");
});

afterEach(() => {
jest.restoreAllMocks();
});

test("loading valid url", async () => {
const url = "https://js.langchain.com/docs/introduction";

Expand Down Expand Up @@ -84,4 +92,16 @@ describe("RecursiveUrlLoader", () => {
false
);
});

test("load docs from langsmith without reporting errors", async () => {
const url = "https://docs.smith.langchain.com/";
const loader = new RecursiveUrlLoader(url, {
maxDepth: 5,
timeout: 5000,
});

const docs = await loader.load();
expect(docs.length).toBeGreaterThan(1);
expect(console.error).not.toHaveBeenCalled();
});
});
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import { JSDOM } from "jsdom";
import { JSDOM, VirtualConsole } from "jsdom";
import { Document } from "@langchain/core/documents";
import { AsyncCaller } from "@langchain/core/utils/async_caller";
import {
BaseDocumentLoader,
DocumentLoader,
} from "@langchain/core/document_loaders/base";

const virtualConsole = new VirtualConsole();
virtualConsole.on("error", () => {});

export interface RecursiveUrlLoaderOptions {
excludeDirs?: string[];
extractor?: (text: string) => string;
Expand Down Expand Up @@ -62,7 +65,7 @@ export class RecursiveUrlLoader

private getChildLinks(html: string, baseUrl: string): Array<string> {
const allLinks = Array.from(
new JSDOM(html).window.document.querySelectorAll("a")
new JSDOM(html, { virtualConsole }).window.document.querySelectorAll("a")
).map((a) => a.href);
const absolutePaths = [];
// eslint-disable-next-line no-script-url
Expand Down Expand Up @@ -117,7 +120,7 @@ export class RecursiveUrlLoader
private extractMetadata(rawHtml: string, url: string) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const metadata: Record<string, any> = { source: url };
const { document } = new JSDOM(rawHtml).window;
const { document } = new JSDOM(rawHtml, { virtualConsole }).window;

const title = document.getElementsByTagName("title")[0];
if (title) {
Expand Down

0 comments on commit ed298b2

Please sign in to comment.