Skip to content

Commit

Permalink
Improve project detection algorithm (#1035)
Browse files Browse the repository at this point in the history
- Improve support for `Tectonic.toml` projects
- Cache project detection results between LSP requests
  • Loading branch information
pfoerster authored Mar 17, 2024
1 parent 628b4e0 commit da24320
Show file tree
Hide file tree
Showing 28 changed files with 703 additions and 472 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Improve support for handling `Tectonic.toml` projects ([#1014](https://github.com/latex-lsp/texlab/issues/1014))
- Cache results of project detection to improve performance

### Fixed

- Don't return document symbol with empty name if `\item[]` is encountered ([#1040](https://github.com/latex-lsp/texlab/issues/1040))
Expand Down
11 changes: 11 additions & 0 deletions crates/base-db/src/deps.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
mod discover;
mod graph;
mod project;
mod root;

pub use self::{
discover::{discover, watch},
graph::{DirectLinkData, Edge, EdgeData, Graph},
project::{parents, Project},
root::ProjectRoot,
};
117 changes: 117 additions & 0 deletions crates/base-db/src/deps/discover.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
use std::path::PathBuf;

use distro::Language;
use itertools::Itertools;
use rustc_hash::FxHashSet;

use crate::Workspace;

use super::ProjectRoot;

pub fn watch(
workspace: &mut Workspace,
watcher: &mut dyn notify::Watcher,
watched_dirs: &mut FxHashSet<PathBuf>,
) {
let roots = workspace
.iter()
.map(|document| &document.dir)
.filter(|dir| dir.scheme() == "file")
.unique()
.map(|dir| ProjectRoot::walk_and_find(workspace, dir));

for root in roots {
for uri in [&root.src_dir, &root.aux_dir, &root.log_dir, &root.pdf_dir] {
if let Ok(path) = uri.to_file_path() {
if watched_dirs.insert(path.clone()) {
let _ = watcher.watch(&path, notify::RecursiveMode::NonRecursive);
}
}
}
}
}

pub fn discover(workspace: &mut Workspace, checked_paths: &mut FxHashSet<PathBuf>) {
loop {
let mut changed = false;
changed |= discover_parents(workspace, checked_paths);
changed |= discover_children(workspace, checked_paths);
if !changed {
break;
}
}
}

fn discover_parents(workspace: &mut Workspace, checked_paths: &mut FxHashSet<PathBuf>) -> bool {
let dirs = workspace
.iter()
.filter(|document| document.language != Language::Bib)
.filter_map(|document| document.path.as_deref())
.flat_map(|path| path.ancestors().skip(1))
.filter(|path| workspace.contains(path))
.map(|path| path.to_path_buf())
.collect::<FxHashSet<_>>();

let mut changed = false;
for dir in dirs {
if workspace
.iter()
.filter(|document| matches!(document.language, Language::Root | Language::Tectonic))
.filter_map(|document| document.path.as_deref())
.filter_map(|path| path.parent())
.any(|marker| dir.starts_with(marker))
{
continue;
}

let Ok(entries) = std::fs::read_dir(dir) else {
continue;
};

for file in entries
.flatten()
.filter(|entry| entry.file_type().map_or(false, |type_| type_.is_file()))
.map(|entry| entry.path())
{
let Some(lang) = Language::from_path(&file) else {
continue;
};

if !matches!(
lang,
Language::Tex | Language::Root | Language::Tectonic | Language::Latexmkrc
) {
continue;
}

if workspace.lookup_path(&file).is_none() && file.exists() {
changed |= workspace.load(&file, lang).is_ok();
checked_paths.insert(file);
}
}
}

changed
}

fn discover_children(workspace: &mut Workspace, checked_paths: &mut FxHashSet<PathBuf>) -> bool {
let files = workspace
.graphs()
.values()
.flat_map(|graph| graph.missing.iter())
.filter(|uri| uri.scheme() == "file")
.flat_map(|uri| uri.to_file_path())
.collect::<FxHashSet<_>>();

let mut changed = false;
for file in files {
let language = Language::from_path(&file).unwrap_or(Language::Tex);

if workspace.lookup_path(&file).is_none() && file.exists() {
changed |= workspace.load(&file, language).is_ok();
checked_paths.insert(file);
}
}

changed
}
256 changes: 256 additions & 0 deletions crates/base-db/src/deps/graph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
use std::{ffi::OsStr, path::PathBuf, rc::Rc};

use distro::Language;
use itertools::Itertools;
use once_cell::sync::Lazy;
use percent_encoding::percent_decode_str;
use rustc_hash::FxHashSet;
use url::Url;

use crate::{semantics, Document, Workspace};

use super::ProjectRoot;

pub(crate) static HOME_DIR: Lazy<Option<PathBuf>> = Lazy::new(dirs::home_dir);

#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct Edge {
pub source: Url,
pub target: Url,
pub data: EdgeData,
}

#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub enum EdgeData {
DirectLink(DirectLinkData),
AdditionalFiles,
Artifact,
}

#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct DirectLinkData {
pub link: semantics::tex::Link,
pub new_root: Option<ProjectRoot>,
}

#[derive(Debug, Clone, Copy)]
struct Start<'a, 'b> {
source: &'a Document,
root: &'b ProjectRoot,
}

#[derive(Debug)]
pub struct Graph {
pub missing: Vec<Url>,
pub edges: Vec<Edge>,
pub start: Url,
}

impl Graph {
pub fn new(workspace: &Workspace, start: &Document) -> Self {
let mut graph = Self {
missing: Vec::new(),
edges: Vec::new(),
start: start.uri.clone(),
};

let root = ProjectRoot::walk_and_find(workspace, &start.dir);

let mut stack = vec![(start, Rc::new(root))];
let mut visited = FxHashSet::default();

while let Some((source, root)) = stack.pop() {
let index = graph.edges.len();

graph.process(
workspace,
Start {
source,
root: &root,
},
);

for edge in &graph.edges[index..] {
if visited.insert(edge.target.clone()) {
let new_root = match &edge.data {
EdgeData::DirectLink(data) => data.new_root.clone(),
_ => None,
};

let new_root = new_root.map_or_else(|| Rc::clone(&root), Rc::new);

stack.push((workspace.lookup(&edge.target).unwrap(), new_root));
}
}
}

graph
}

pub fn preorder<'a: 'b, 'b>(
&'b self,
workspace: &'a Workspace,
) -> impl DoubleEndedIterator<Item = &'a Document> + '_ {
std::iter::once(&self.start)
.chain(self.edges.iter().map(|group| &group.target))
.unique()
.filter_map(|uri| workspace.lookup(uri))
}

fn process(&mut self, workspace: &Workspace, start: Start) {
self.add_direct_links(workspace, start);
self.add_artifacts(workspace, start);
self.add_additional_files(workspace, start);
}

fn add_additional_files(&mut self, workspace: &Workspace, start: Start) {
for uri in &start.root.additional_files {
match workspace.lookup(uri) {
Some(target) => {
self.edges.push(Edge {
source: start.source.uri.clone(),
target: target.uri.clone(),
data: EdgeData::AdditionalFiles,
});
}
None => {
self.missing.push(uri.clone());
}
}
}
}

fn add_direct_links(&mut self, workspace: &Workspace, start: Start) -> Option<()> {
let data = start.source.data.as_tex()?;

for link in &data.semantics.links {
self.add_direct_link(workspace, start, link);
}

Some(())
}

fn add_direct_link(
&mut self,
workspace: &Workspace,
start: Start,
link: &semantics::tex::Link,
) {
let home_dir = HOME_DIR.as_deref();

let stem = &link.path.text;
let mut file_names = vec![stem.clone()];
link.kind
.extensions()
.iter()
.map(|ext| format!("{stem}.{ext}"))
.for_each(|name| file_names.push(name));

let file_name_db = &workspace.distro().file_name_db;
let distro_files = file_names
.iter()
.filter_map(|name| file_name_db.get(name))
.filter(|path| {
home_dir.map_or(false, |dir| path.starts_with(dir))
|| Language::from_path(path) == Some(Language::Bib)
})
.flat_map(Url::from_file_path);

for target_uri in file_names
.iter()
.flat_map(|file_name| start.root.src_dir.join(file_name))
.chain(distro_files)
{
match workspace.lookup(&target_uri) {
Some(target) => {
let new_root = link
.base_dir
.as_deref()
.and_then(|path| start.root.src_dir.join(path).ok())
.map(|dir| ProjectRoot::walk_and_find(workspace, &dir));

let link_data = DirectLinkData {
link: link.clone(),
new_root,
};

self.edges.push(Edge {
source: start.source.uri.clone(),
target: target.uri.clone(),
data: EdgeData::DirectLink(link_data),
});

break;
}
None => {
self.missing.push(target_uri);
}
};
}
}

fn add_artifacts(&mut self, workspace: &Workspace, start: Start) {
if start.source.language != Language::Tex {
return;
}

let root = start.root;
let relative_path = root.compile_dir.make_relative(&start.source.uri).unwrap();

self.add_artifact(
workspace,
start.source,
&root.aux_dir.join(&relative_path).unwrap(),
"aux",
);

self.add_artifact(workspace, start.source, &root.aux_dir, "aux");
self.add_artifact(workspace, start.source, &root.compile_dir, "aux");

self.add_artifact(
workspace,
start.source,
&root.log_dir.join(&relative_path).unwrap(),
"log",
);

self.add_artifact(workspace, start.source, &root.log_dir, "log");
self.add_artifact(workspace, start.source, &root.compile_dir, "log");
}

fn add_artifact(
&mut self,
workspace: &Workspace,
source: &Document,
dir: &Url,
extension: &str,
) {
let mut path = PathBuf::from(
percent_decode_str(source.uri.path())
.decode_utf8_lossy()
.as_ref(),
);

path.set_extension(extension);
let Some(target_uri) = path
.file_name()
.and_then(OsStr::to_str)
.and_then(|name| dir.join(name).ok())
else {
return;
};

match workspace.lookup(&target_uri) {
Some(target) => {
self.edges.push(Edge {
source: source.uri.clone(),
target: target.uri.clone(),
data: EdgeData::Artifact,
});
}
None => {
self.missing.push(target_uri);
}
}
}
}
Loading

0 comments on commit da24320

Please sign in to comment.