Skip to content

Commit

Permalink
fix(background_job): expose crawler job error (#3910)
Browse files Browse the repository at this point in the history
Signed-off-by: Wei Zhang <kweizh@tabbyml.com>
  • Loading branch information
zwpaper authored Feb 27, 2025
1 parent 3477544 commit b945a2c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
5 changes: 4 additions & 1 deletion crates/tabby-crawler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod types;

use std::process::Stdio;

use anyhow::anyhow;
use async_stream::stream;
use futures::{Stream, StreamExt};
use readable_readability::Readability;
Expand Down Expand Up @@ -45,7 +46,9 @@ async fn crawl_url(
.stdout(Stdio::piped())
.stderr(Stdio::piped());

let mut child = command.spawn()?;
let mut child = command
.spawn()
.map_err(|e| anyhow!("Failed to run katana: {}", e))?;

let stdout = child.stdout.take().expect("Failed to acquire stdout");
let mut stdout = tokio::io::BufReader::new(stdout).lines();
Expand Down
17 changes: 11 additions & 6 deletions ee/tabby-webserver/src/service/background_job/web_crawler.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{sync::Arc, time::Duration};

use anyhow::anyhow;
use chrono::Utc;
use futures::StreamExt;
use serde::{Deserialize, Serialize};
Expand All @@ -9,6 +10,7 @@ use tabby_index::public::{
StructuredDocWebFields,
};
use tabby_inference::Embedding;
use tabby_schema::CoreError;

use super::helper::Job;

Expand Down Expand Up @@ -115,20 +117,23 @@ impl WebCrawlerJob {

pub async fn run(self, embedding: Arc<dyn Embedding>) -> tabby_schema::Result<()> {
let url = self.url.clone();
if tokio::time::timeout(
tokio::time::timeout(
Duration::from_secs(CRAWLER_TIMEOUT_SECS),
self.run_impl(embedding),
)
.await
.is_err()
{
.map_err(|_| {
logkit::warn!(
"Crawled for url: {} timeout after {} seconds",
url,
CRAWLER_TIMEOUT_SECS
CRAWLER_TIMEOUT_SECS,
);
}
Ok(())
CoreError::Other(anyhow!(
"Crawled for url: {} timeout after {} seconds",
url,
CRAWLER_TIMEOUT_SECS
))
})?
}
}

Expand Down

0 comments on commit b945a2c

Please sign in to comment.