diff --git a/.github/scripts/cross.sh b/.github/scripts/cross.sh new file mode 100755 index 0000000..0c8703c --- /dev/null +++ b/.github/scripts/cross.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -e + +if test "$BUILD_CMD" != "cross" +then + echo "cross.sh - is a helper to assist only in cross compiling environments" >&2 + echo "To use this tool set the BUILD_CMD env var to the \"cross\" value" >&2 + exit 111 +fi + +if test -z "$CROSS_IMAGE" +then + echo "The CROSS_IMAGE env var should be provided" >&2 + exit 111 +fi + +docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..aeef6ac --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,92 @@ +name: Build + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +jobs: + build: + name: ${{ matrix.platform }} (${{ matrix.target }}) (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + timeout-minutes: 40 + strategy: + fail-fast: false + matrix: + platform: + - linux-arm + - linux-arm64 + - linux-x64 + - linux-x86 + - macos-arm64 + - macos-x64 + + include: + # When adding a new `target`: + # 1. Define a new platform alias above + # 2. Add a new record to a matrix map in `cli/npm/install.js` + - { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest } + - { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest } + - { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-latest } + - { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest } + - { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-14 } + - { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-12 } + + env: + BUILD_CMD: cargo + + defaults: + run: + shell: bash + + steps: + - uses: actions/checkout@v4 + + - run: rustup toolchain install stable --profile minimal + - run: rustup target add ${{ matrix.target }} + + - name: Install cross + if: ${{ matrix.os == 'ubuntu-latest' }} + uses: taiki-e/install-action@v2 + with: + tool: cross + + - name: Build custom cross image + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + target="${{ matrix.target }}" + image=ghcr.io/cross-rs/$target:custom + echo "CROSS_IMAGE=$image" >> $GITHUB_ENV + + echo "[target.$target]" >> Cross.toml + echo "image = \"$image\"" >> Cross.toml + echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV + + echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile + echo "RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash -" >> Dockerfile + echo "RUN apt-get update && apt-get -y install nodejs" >> Dockerfile + docker build -t $image . + + - name: Setup env extras + if: ${{ matrix.os == 'ubuntu-latest' }} + env: + TARGET: ${{ matrix.target }} + run: | + PATH="$PWD/.github/scripts:$PATH" + echo "$PWD/.github/scripts" >> $GITHUB_PATH + + echo "ROOT=$PWD" >> $GITHUB_ENV + echo "TARGET=$TARGET" >> $GITHUB_ENV + + echo "BUILD_CMD=cross" >> $GITHUB_ENV + runner=$(BUILD_CMD=cross cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'") + [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV + + - run: $BUILD_CMD build --release --target=${{ matrix.target }} + + - name: Upload CLI artifact + uses: actions/upload-artifact@v4 + with: + name: tsdl.${{ matrix.platform }} + path: target/${{ matrix.target }}/release/tsdl + if-no-files-found: error + retention-days: 7 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..44db6fb --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,31 @@ +name: CI +on: + pull_request: + push: + branches: + - "master" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/master' }} + +jobs: + checks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - run: rustup toolchain install stable --profile minimal + + - name: Install just + uses: taiki-e/install-action@v2 + with: + tool: just + + - run: just lint + + build: + uses: ./.github/workflows/build.yml + + test: + uses: ./.github/workflows/test.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..06fff41 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,68 @@ +name: Release +on: + workflow_dispatch: + push: + tags: + - v[0-9]+.[0-9]+.[0-9]+ + +jobs: + build: + uses: ./.github/workflows/build.yml + with: + run_test: false + + release: + name: Release + runs-on: ubuntu-latest + needs: build + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Display structure of downloaded files + run: ls -lR + working-directory: artifacts + + - name: Prepare release artifacts + run: | + mkdir -p target + for platform in $(cd artifacts; ls | sed 's/^tsdl\.//'); do + exe=$(ls artifacts/tsdl.$platform/tsdl*) + gzip --stdout --name $exe > target/tsdl-$platform.gz + done + rm -rf artifacts + ls -l target/ + + - name: Create release + uses: softprops/action-gh-release@v2 + with: + name: ${{ github.ref_name }} + tag_name: ${{ github.ref_name }} + fail_on_unmatched_files: true + files: | + target/tree-sitter-*.gz + + crates_io: + name: Publish CLI to Crates.io + runs-on: ubuntu-latest + needs: release + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + - name: Publish crates to Crates.io + uses: katyo/publish-crates@v2 + with: + registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..43149ff --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,33 @@ +name: Test + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +jobs: + build: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 40 + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - macos-latest + + defaults: + run: + shell: bash + + steps: + - uses: actions/checkout@v4 + + - run: rustup toolchain install stable --profile minimal + + - name: Install test tools + uses: taiki-e/install-action@v2 + with: + tool: cargo-nextest,just + + - run: just test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..07f214b --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# rust +debug/ +target/ +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# tsp +parsers/ +tmp/ +parsers.toml diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..34737d5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,88 @@ +[package] +authors = ["Firas al-Khalil "] +build = "build.rs" +description = "A downloader/builder of many tree-sitter parsers" +edition = "2021" +name = "tsdl" +version = "0.99.0" + +[lib] +name = "tsdl" +path = "src/lib.rs" + +[[bin]] +name = "tsdl" +path = "src/main.rs" + +[package.metadata.tsdl] +build-dir = "tmp" +config = "parsers.toml" +fresh = false +from = "https://github.com/tree-sitter/tree-sitter-" +out = "parsers" +prefix = "libtree-sitter-" +ref = "master" +show-config = false +sys = false + +[package.metadata.tree-sitter] +repo = "https://github.com/tree-sitter/tree-sitter" +version = "0.23.0" + +[dependencies] +atty = "0.2" +better-panic = "0.3.0" +clap = { version = "4.5", features = ["derive", "cargo"] } +clap-verbosity-flag = "2.2" +console = "0.15" +derive_more = { version = "1.0", features = [ + "as_ref", + "deref", + "display", + "from", + "from_str", + "into", +] } +diff-struct = "0.5" +enum_dispatch = "0.3" +figment = { version = "0.10", features = ["toml", "env"] } +human-panic = "2.0.1" +ignore = "0.4" +indicatif = "0.17" +log = "0.4" +miette = { version = "7.2.0", features = ["fancy"] } +num_cpus = "1.16" +serde = { version = "1.0", features = ["derive"] } +thiserror = "1" +tokio = { version = "1", features = [ + "fs", + "process", + "rt-multi-thread", + "sync", + "time", +] } +toml = "0.8" +tracing = "0.1" +tracing-appender = "0.2" +tracing-error = "0.2" +tracing-log = "0.2" +tracing-subscriber = "0.3" +url = "2.5" + +[dev-dependencies] +assert_cmd = "2.0" +assert_fs = "1.1" +indoc = "2" +predicates = "3.1" +pretty_assertions = "1.4" +rstest = "0.22.0" + +[build-dependencies] +cargo_metadata = "0.18" +const-str = "0.5" +indoc = "2" +serde_json = "1.0" + +[lints.clippy] +pedantic = { level = "warn", priority = -1 } +missing-errors-doc = "allow" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4c6e2b0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 Firas al-Khalil + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..64a0de6 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# TSDL + +A genrator of tree-sitter parsers as dynamic or static libraries + +> :warning: Under Construction! diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..6029619 --- /dev/null +++ b/TODO.md @@ -0,0 +1,51 @@ +# TODO + +## CI + +- [ ] Cross-platform builds: support all tree-sitter platforms. + - [ ] Package + - [ ] Linux + - [ ] Mac + - [ ] Tar / Zip + - [ ] Release binaries and separate linux/mac distribution packages. + - [ ] Github + - [ ] crates.io + +## Commands + +- [ ] check command: check that all tools necessary are installed (gunzip, wget, curl, git) + +## Configurtion + +- [ ] Investigate a figment replacement / custom impl to merge diferent configuration + sources. + +## Maintenance + +- [ ] A sane way to produce change logs +- [ ] just release {{arg}} + - [ ] {{arg}} is a version number => tag with v{{args}} + - [ ] Handle changelog + - [ ] push to main repo with tags + - [ ] CI should kick in + +### Options + +- [ ] --sys-ts, false by default + - [x] Add the flag. + - [ ] Use [TREE_SITTER_LIBDIR](https://github.com/tree-sitter/tree-sitter/blob/4f97cf850535a7b23e648aba6e355caed1f10231/cli/loader/src/lib.rs#L177) + by default + - [ ] Use pkgconfig for sys libs + +## Tests + +- [ ] Use assert_cmd + - [ ] Test --force + - [ ] Test --sys-ts +- [ ] Config + - [ ] with default config + - [ ] You can always download a parser even if it's not in the config. + - [ ] Verify it's actually HEAD when the parser is not in the config using git in the test. + - [ ] with custom config file. + - [ ] ask for parsers defined in the config file + - [ ] ask for parsers !defined in the config file and verify they're from the repo's HEAD. diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..5a9bab9 --- /dev/null +++ b/build.rs @@ -0,0 +1,100 @@ +use std::env; +use std::ffi::OsString; +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use cargo_metadata::MetadataCommand; +use indoc::formatdoc; + +const TARGETS: &[(&str, &str)] = &[ + ("linux-arm", "arm-unknown-linux-gnueabi"), + ("linux-arm64", "aarch64-unknown-linux-gnu"), + ("linux-x64", "x86_64-unknown-linux-gnu"), + ("linux-x86", "i686-unknown-linux-gnu"), + ("macos-arm64", "aarch64-apple-darwin"), + ("macos-x64", "x86_64-apple-darwin"), +]; + +const fn platform_for_target(target: &str) -> &str { + let mut i = 0; + while i < TARGETS.len() { + if const_str::equal!(TARGETS[i].1, target) { + return TARGETS[i].0; + } + i += 1; + } + target +} + +fn main() { + let out_dir = env::var_os("OUT_DIR").unwrap(); + let build_target = env::var_os("TARGET").unwrap(); + let metadata = MetadataCommand::new().exec().unwrap(); + let meta = metadata + .root_package() + .unwrap() + .metadata + .as_object() + .unwrap(); + write_tree_sitter_consts(meta, &build_target, &out_dir); + write_tsdl_consts(meta, &out_dir); +} + +fn write_tsdl_consts(meta: &serde_json::Map, out_dir: &OsString) { + let root = PathBuf::from(file!()); + let tsdl_bin_build_dir = root.parent().unwrap().join("src").canonicalize().unwrap(); + let tsdl_bin_build_dir = tsdl_bin_build_dir.to_str().unwrap(); + let tsdl = meta.get("tsdl").unwrap(); + let tsdl_build_dir = tsdl.get("build-dir").unwrap().as_str().unwrap(); + let tsdl_config_file = tsdl.get("config").unwrap().as_str().unwrap(); + let tsdl_fresh = tsdl.get("fresh").unwrap().as_bool().unwrap(); + let tsdl_from = tsdl.get("from").unwrap().as_str().unwrap(); + let tsdl_out_dir = tsdl.get("out").unwrap().as_str().unwrap(); + let tsdl_prefix = tsdl.get("prefix").unwrap().as_str().unwrap(); + let tsdl_ref = tsdl.get("ref").unwrap().as_str().unwrap(); + let tsdl_show_config = tsdl.get("show-config").unwrap().as_bool().unwrap(); + let tsdl_sys = tsdl.get("sys").unwrap().as_bool().unwrap(); + let tsdl_consts = Path::new(&out_dir).join("tsdl_consts.rs"); + fs::write( + tsdl_consts, + formatdoc!( + r#" + pub const TSDL_BIN_BUILD_DIR: &str = "{tsdl_bin_build_dir}/"; + pub const TSDL_BUILD_DIR: &str = "{tsdl_build_dir}"; + pub const TSDL_CONFIG_FILE: &str = "{tsdl_config_file}"; + pub const TSDL_FRESH: bool = {tsdl_fresh}; + pub const TSDL_FROM: &str = "{tsdl_from}"; + pub const TSDL_OUT_DIR: &str = "{tsdl_out_dir}"; + pub const TSDL_PREFIX: &str = "{tsdl_prefix}"; + pub const TSDL_REF: &str = "{tsdl_ref}"; + pub const TSDL_SHOW_CONFIG: bool = {tsdl_show_config}; + pub const TSDL_SYS: bool = {tsdl_sys}; + "# + ), + ) + .unwrap(); +} + +fn write_tree_sitter_consts( + meta: &serde_json::Map, + build_target: &OsString, + out_dir: &OsString, +) { + let tree_sitter = meta.get("tree-sitter").unwrap(); + let tree_sitter_version = tree_sitter.get("version").unwrap().as_str().unwrap(); + let tree_sitter_repo = tree_sitter.get("repo").unwrap().as_str().unwrap(); + let tree_sitter_platform = platform_for_target(build_target.to_str().unwrap()); + let tree_sitter_consts = Path::new(out_dir).join("tree_sitter_consts.rs"); + fs::write( + tree_sitter_consts, + formatdoc!( + r#" + pub const TREE_SITTER_PLATFORM: &str = "{tree_sitter_platform}"; + pub const TREE_SITTER_REPO: &str = "{tree_sitter_repo}"; + pub const TREE_SITTER_VERSION: &str = "{tree_sitter_version}"; + "# + ), + ) + .unwrap(); +} diff --git a/cliff.toml b/cliff.toml new file mode 100644 index 0000000..33131e0 --- /dev/null +++ b/cliff.toml @@ -0,0 +1,70 @@ +[changelog] +# changelog header +header = """ +# Changelog\n +""" +# template for the changelog body +# https://tera.netlify.app/docs/#introduction +body = """ +{% if version %}\ + ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} +{% else %}\ + ## [unreleased] +{% endif %}\ +{% for group, commits in commits | group_by(attribute="group") %} + ### {{ group | striptags | upper_first }} + {% for commit in commits%}\ + {% if not commit.scope %}\ + - {{ commit.message | upper_first }}\ + {% if commit.github.pr_number %} (){%- endif %} + {% endif %}\ + {% endfor %}\ + {% for group, commits in commits | group_by(attribute="scope") %}\ + {% for commit in commits %}\ + - **{{commit.scope}}**: {{ commit.message | upper_first }}\ + {% if commit.github.pr_number %} (){%- endif %} + {% endfor %}\ + {% endfor %} +{% endfor %} +""" +# remove the leading and trailing whitespace from the template +trim = true + +[git] +# parse the commits based on https://www.conventionalcommits.org +conventional_commits = true +# filter out the commits that are not conventional +filter_unconventional = false +# process each line of a commit as an individual commit +split_commits = false +# regex for preprocessing the commit messages +# commit_preprocessors = [] +# regex for parsing and grouping commits +commit_parsers = [ + { message = "!:", group = "Breaking" }, + { message = "^feat", group = "Features" }, + { message = "^fix", group = "Bug Fixes" }, + { message = "^perf", group = "Performance" }, + { message = "^doc", group = "Documentation" }, + { message = "^refactor", group = "Refactor" }, + { message = "^test", group = "Testing" }, + { message = "^build", group = "Build System and CI" }, + { message = "^ci", group = "Build System and CI" }, + { message = ".*", group = "Other" }, +] +# filter out the commits that are not matched by commit parsers +filter_commits = false +# glob pattern for matching git tags +tag_pattern = "v[0-9]*" +# regex for skipping tags +# skip_tags = "v0.1.0-beta.1" +# regex for ignoring tags +ignore_tags = "" +# sort the tags chronologically +date_order = false +# sort the commits inside sections by oldest/newest order +sort_commits = "oldest" + +[remote.github] +owner = "stackmystack" +repo = "tsdl" diff --git a/justfile b/justfile new file mode 100644 index 0000000..5b3b419 --- /dev/null +++ b/justfile @@ -0,0 +1,68 @@ +#!/usr/bin/env -S just --justfile +alias t := test +alias c := check +alias b := build +alias w := watch +alias r := run + +changelog-update: + echo >> CHANGELOG.md + git log --pretty='format:- %s' >> CHANGELOG.md + +check: fmt clippy test + #!/usr/bin/env bash + set -euxo pipefail + git diff --no-ext-diff --quiet --exit-code + VERSION=`sed -En 's/version[[:space:]]*=[[:space:]]*"([^"]+)"/\1/p' Cargo.toml | head -1` + grep "^\[$VERSION\]" CHANGELOG.md + +clippy: + cargo clippy --all --all-targets -- --deny warnings + +clippy-fix *args: + cargo clippy --fix {{args}} + +clippy-fix-now: + @just clippy-fix --allow-dirty --allow-staged + +fmt: + cargo fmt --all + +fmt-check: + cargo fmt --all -- --check + +install-hooks: + @mkdir -p .git/hooks + @cp scripts/pre-commit.sh .git/hooks/pre-commit + @chmod +x .git/hooks/pre-commit + @echo "Pre-commit hook installed successfully" + +lint: clippy fmt-check + +log: + #!/usr/bin/env bash + latest=$$(sed -rne '/^## tsdl v[0-9]+\.[0-9]+-[0-9]+-g/{s///;s/ \(.*//;p;q;}' NEWS.md); \ + git log --reverse "$latest..HEAD" + +notes: + grep 'master' README.md + +publish: + #!/usr/bin/env bash + set -euxo pipefail + rm -rf tmp/release + git clone # TODO + cd tmp/release + ! grep 'master' README.md + VERSION=`sed -En 's/version[[:space:]]*=[[:space:]]*"([^"]+)"/\1/p' Cargo.toml | head -1` + git tag -a $VERSION -m "Release $VERSION" + git push origin $VERSION + cargo publish + cd ../.. + rm -rf tmp/release + +test *args: + cargo nextest run {{args}} + +contributors: + git shortlog -sne | sed -r 's/^[[:space:]]*[0-9]+[[:space:]]*//' | sort > Contributors diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..b1483ac --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,5 @@ +# This file intentionally left almost blank +# +# The empty `rustfmt.toml` makes rustfmt use the default configuration, +# overriding any which may be found in the contributor's home or parent +# folders. diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh new file mode 100644 index 0000000..3925d3a --- /dev/null +++ b/scripts/pre-commit.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +IFS=$'\n\t' +set -eou pipefail + +just clippy diff --git a/src/args.rs b/src/args.rs new file mode 100644 index 0000000..975b3c5 --- /dev/null +++ b/src/args.rs @@ -0,0 +1,232 @@ +use std::{collections::BTreeMap, fmt, path::PathBuf}; + +use clap_verbosity_flag::{InfoLevel, Verbosity}; +use diff::Diff; +use serde::{Deserialize, Serialize}; + +use crate::consts::{ + TREE_SITTER_PLATFORM, TREE_SITTER_REPO, TREE_SITTER_VERSION, TSDL_BUILD_DIR, TSDL_CONFIG_FILE, + TSDL_FRESH, TSDL_OUT_DIR, TSDL_PREFIX, TSDL_SHOW_CONFIG, TSDL_SYS, +}; + +/// Command-line arguments. +#[derive(Clone, Debug, Deserialize, clap::Parser, Serialize)] +#[command(author, version, about, long_about = None, allow_external_subcommands = true)] +pub struct Args { + #[command(subcommand)] + pub command: Command, + + /// Path to the config file (TOML). + #[arg(short, long, default_value = TSDL_CONFIG_FILE, global = true)] + pub config: PathBuf, + + /// Path to the logging file. If unspecified, it will go to `build_dir/log`. + #[arg(short, long, global = true)] + pub log: Option, + + /// Whether to emit colored logs. + #[arg(long, value_enum, default_value_t = LogColor::Auto, global = true)] + pub log_color: LogColor, + + /// Progress style. + #[arg(long, value_enum, default_value_t = ProgressStyle::Auto, global = true)] + pub progress: ProgressStyle, + + /// Verbosity level: -v, -vv, or -q, -qq. + // clap_verbosity_flag, as of now, refuses to add a serialization feature, so this will not be part of the config file. + // It's global by default, so we don't need to specify it. + #[serde(skip_serializing, skip_deserializing)] + #[command(flatten)] + pub verbose: Verbosity, +} + +#[derive(clap::ValueEnum, Clone, Debug, Deserialize, Serialize)] +pub enum LogColor { + Auto, + No, + Yes, +} + +#[derive(clap::ValueEnum, Clone, Debug, Deserialize, Serialize)] +pub enum ProgressStyle { + Auto, + Fancy, + Plain, +} + +#[derive(clap::Subcommand, Clone, Debug, Deserialize, Serialize)] +pub enum Command { + /// Build one or many parsers. + #[command(visible_alias = "b")] + Build(BuildCommand), + + /// Configuration helpers. + #[serde(skip_serializing, skip_deserializing)] + #[command(visible_alias = "c")] + Config { + #[command(subcommand)] + command: ConfigCommand, + }, +} + +impl Command { + #[must_use] + pub fn as_build(&self) -> Option<&BuildCommand> { + if let Command::Build(build) = self { + Some(build) + } else { + None + } + } + + #[must_use] + pub fn as_config(&self) -> Option<&ConfigCommand> { + if let Command::Config { command } = self { + Some(command) + } else { + None + } + } +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(clap::Args, Clone, Debug, Deserialize, Diff, PartialEq, Eq, Serialize)] +#[diff(attr( + #[derive(Debug, PartialEq)] +))] +#[serde(rename_all = "kebab-case")] +pub struct BuildCommand { + /// Parsers to compile as key=value pairs. + /// values can be either: + /// 1. a simple value denoting the ref of the parser from the default remote. + /// 2. of the format `ref:remote-ref,from:remote-url`. + /// `ref` and `from` are both optional, and will use defualts. + #[serde(skip_serializing, skip_deserializing)] + #[arg(verbatim_doc_comment)] + pub languages: Option>, + + /// Configured Parsers. + #[clap(skip)] + pub parsers: Option>, + + /// Build Directory. + #[serde(default)] + #[arg(short, long, default_value = TSDL_BUILD_DIR)] + pub build_dir: PathBuf, + + /// Number of threads; defaults to the number of available CPUs. + #[arg(short, long, default_value_t = num_cpus::get())] + #[serde(default)] + pub ncpus: usize, + + /// Clears the `build_dir` and starts a fresh build. + #[arg(short, long, default_value_t = TSDL_FRESH)] + #[serde(default)] + pub fresh: bool, + + /// Output Directory. + #[arg(short, long, default_value = TSDL_OUT_DIR)] + #[serde(default)] + pub out_dir: PathBuf, + + /// Prefix parser names. + #[arg(short, long, default_value = TSDL_PREFIX)] + #[serde(default)] + pub prefix: String, + + /// Show Config. + #[arg(long, default_value_t = TSDL_SHOW_CONFIG)] + #[serde(default)] + pub show_config: bool, + + #[command(flatten)] + #[serde(default)] + pub tree_sitter: TreeSitter, + + /// Use the system installed tree sitter. + /// All other tree-sitter flags will be ignored. + #[arg(long, default_value_t = TSDL_SYS)] + #[serde(default)] + pub sys: bool, +} + +impl Default for BuildCommand { + fn default() -> Self { + Self { + languages: None, + parsers: None, + build_dir: PathBuf::from(TSDL_BUILD_DIR), + fresh: TSDL_FRESH, + ncpus: num_cpus::get(), + out_dir: PathBuf::from(TSDL_OUT_DIR), + prefix: String::from(TSDL_PREFIX), + show_config: TSDL_SHOW_CONFIG, + sys: TSDL_SYS, + tree_sitter: TreeSitter::default(), + } + } +} + +#[derive(Clone, Debug, Deserialize, Diff, Serialize, PartialEq, Eq)] +#[diff(attr( + #[derive(Debug, PartialEq)] +))] +#[serde(untagged)] +#[serde(rename_all = "kebab-case")] +pub enum ParserConfig { + Full { + #[serde(alias = "cmd", alias = "script")] + build_script: Option, + #[serde(rename = "ref")] + #[diff(attr( + #[derive(Debug, PartialEq)] + ))] + git_ref: String, + #[diff(attr( + #[derive(Debug, PartialEq)] + ))] + from: Option, + }, + Ref(String), +} + +#[derive(clap::Args, Clone, Debug, Diff, Deserialize, PartialEq, Eq, Serialize)] +#[diff(attr( + #[derive(Debug, PartialEq)] +))] +pub struct TreeSitter { + /// Tree-sitter version. + #[arg(short = 'V', long = "tree-sitter-version", default_value = TREE_SITTER_VERSION)] + pub version: String, + + /// Tree-sitter repo. + #[arg(short = 'R', long = "tree-sitter-repo", default_value = TREE_SITTER_REPO)] + pub repo: String, + + /// Tree-sitter plarform to build. Change at your own risk. + #[clap(long = "tree-sitter-platform", default_value = TREE_SITTER_PLATFORM)] + pub platform: String, +} + +impl Default for TreeSitter { + fn default() -> Self { + Self { + version: TREE_SITTER_VERSION.to_string(), + repo: TREE_SITTER_REPO.to_string(), + platform: TREE_SITTER_PLATFORM.to_string(), + } + } +} + +#[derive(clap::Subcommand, Clone, Debug, Default)] +pub enum ConfigCommand { + #[default] + Current, + Default, +} + +impl fmt::Display for ConfigCommand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", format!("{self:?}").to_lowercase()) + } +} diff --git a/src/build.rs b/src/build.rs new file mode 100644 index 0000000..1178289 --- /dev/null +++ b/src/build.rs @@ -0,0 +1,231 @@ +use std::{ + collections::{BTreeMap, HashSet}, + env::consts::DLL_EXTENSION, + fs::{self, create_dir_all}, + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use miette::{miette, Context, IntoDiagnostic, Result}; +use tokio::time; +use tracing::error; +use url::Url; + +use crate::{ + args::{BuildCommand, ParserConfig}, + config, + consts::TSDL_FROM, + display::{Handle, Progress, ProgressState, TICK_CHARS}, + error, + git::Ref, + parser::{build_languages, Language, NUM_STEPS}, + tree_sitter, SafeCanonicalize, +}; + +pub fn run(command: &BuildCommand, mut progress: Progress) -> Result<()> { + if command.show_config { + config::show(command)?; + } + clear(command, &mut progress)?; + build(command, progress)?; + // if let Err(errs) = { + // errs.iter().for_each(|err| eprintln!("{err:?}\n")); + // std::process::exit(1); + // } + Ok(()) +} + +fn clear(command: &BuildCommand, progress: &mut Progress) -> Result<()> { + if command.fresh && command.build_dir.exists() { + let handle = progress.register("Fresh Build", 1); + let disp = &command.build_dir.display(); + fs::remove_dir_all(&command.build_dir) + .into_diagnostic() + .wrap_err(format!("Removing the build_dir {disp} for a fresh build"))?; + handle.fin(format!("Cleaned {disp}")); + } + fs::create_dir_all(&command.build_dir) + .into_diagnostic() + .wrap_err("Creating the build dir")?; + Ok(()) +} + +fn build(command: &BuildCommand, progress: Progress) -> Result<()> { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .worker_threads(command.ncpus) + .build(); + if let Err(ref err) = rt { + error!("Failed to initialize tokio."); + error!("{err}"); + return Err(miette!("Failed to spawn the tokio runtime")); + } + let rt = rt.unwrap(); + let _guard = rt.enter(); + let screen = Arc::new(Mutex::new(progress)); + rt.spawn(update_screen(screen.clone())); + let ts_cli = rt + .block_on(tree_sitter::prepare(command, screen.clone())) + .wrap_err("Preparing tree-sitter") + .unwrap(); + let languages = languages( + ts_cli, + screen, + &command.languages, + &command.parsers, + command.build_dir.clone(), + command.out_dir.clone(), + &command.prefix, + ) + .unwrap(); + create_dir_all(&command.out_dir) + .into_diagnostic() + .wrap_err(format!( + "Creating output dir {}", + &command.out_dir.display() + )) + .unwrap(); + rt.block_on(build_languages(languages)) +} + +async fn update_screen(progress: Arc>) { + let mut interval = time::interval(time::Duration::from_millis( + 1000 / TICK_CHARS.chars().count() as u64, + )); + loop { + interval.tick().await; + if let Ok(s) = progress.try_lock() { + s.tick(); + } + } +} + +fn languages( + ts_cli: PathBuf, + progress: Arc>, + requested_languages: &Option>, + defined_parsers: &Option>, + build_dir: PathBuf, + out_dir: PathBuf, + prefix: &str, +) -> Result, error::LanguageCollection> { + let (res, errs) = unique_languages( + ts_cli, + build_dir, + out_dir, + prefix, + requested_languages, + defined_parsers, + progress, + ); + if errs.is_empty() { + Ok(res.into_iter().map(Result::unwrap).collect()) + } else { + Err(error::LanguageCollection { + related: errs.into_iter().map(Result::unwrap_err).collect(), + }) + } +} + +type Languages = ( + Vec>, + Vec>, +); + +#[allow(clippy::needless_pass_by_value)] +fn unique_languages( + ts_cli: PathBuf, + build_dir: PathBuf, + out_dir: PathBuf, + prefix: &str, + requested_languages: &Option>, + defined_parsers: &Option>, + progress: Arc>, +) -> Languages { + let ts_cli = Arc::new(ts_cli); + let final_languages = requested_languages + .clone() + .filter(|arr| !arr.is_empty()) + .or_else(|| { + defined_parsers + .as_ref() + .map(|map| map.keys().cloned().collect()) + }) + .unwrap_or_default(); + final_languages + .into_iter() + .collect::>() + .into_iter() + .map(|language| { + let (build_script, git_ref, url) = coords(&language, defined_parsers); + url.map(|repo| { + Language::new( + build_dir.join(&language).canon().unwrap(), + build_script, + git_ref, + progress.lock().unwrap().register(&language, NUM_STEPS), + language.clone(), + out_dir + .join(format!("{prefix}{language}.{DLL_EXTENSION}")) + .canon() + .unwrap(), + repo, + ts_cli.clone(), + ) + }) + .map_err(|err| error::Language { + name: language, + source: err.into(), + }) + }) + .partition(Result::is_ok) +} + +fn coords( + language: &str, + defined_parsers: &Option>, +) -> (Option, Ref, Result) { + match defined_parsers.as_ref().and_then(|p| p.get(language)) { + Some(ParserConfig::Ref(git_ref)) => { + (None, resolve_git_ref(git_ref), default_repo(language)) + } + Some(ParserConfig::Full { + build_script, + git_ref, + from, + }) => ( + build_script.clone(), + resolve_git_ref(git_ref), + from.as_ref().map_or_else( + || default_repo(language), + |f| { + Url::parse(f) + .into_diagnostic() + .wrap_err(format!("Parsing {f} for {language}")) + }, + ), + ), + _ => (None, String::from("HEAD").into(), default_repo(language)), + } +} + +fn resolve_git_ref(git_ref: &str) -> Ref { + Some(git_ref) + .filter(|f| f.len() != 40 && !f.starts_with('v')) + .and_then(|f| { + let versions = f.split('.').collect::>(); + if !versions.is_empty() && versions.iter().all(|f| f.parse::().is_ok()) { + Some(format!("v{f}").into()) + } else { + None + } + }) + .unwrap_or_else(|| git_ref.to_string().into()) +} + +fn default_repo(language: &str) -> Result { + let url = format!("{TSDL_FROM}{language}"); + Url::parse(&url) + .into_diagnostic() + .wrap_err(format!("Creating url {url} for {language}")) +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..928ab6a --- /dev/null +++ b/src/config.rs @@ -0,0 +1,100 @@ +use std::path::Path; + +use diff::Diff; +use figment::{ + providers::{Format, Serialized, Toml}, + Figment, +}; +use miette::{Context, IntoDiagnostic, Result}; +use tracing::debug; + +use crate::{ + args::{BuildCommand, ConfigCommand}, + git, +}; + +pub fn run(command: &ConfigCommand, config: &Path) -> Result<()> { + match command { + ConfigCommand::Current => { + let config: BuildCommand = current(config, None)?; + println!( + "{}", + toml::to_string(&config) + .into_diagnostic() + .wrap_err("Generating default TOML config")? + ); + } + ConfigCommand::Default => println!( + "{}", + toml::to_string(&BuildCommand::default()).into_diagnostic()? + ), + }; + Ok(()) +} + +pub fn current(config: &Path, command: Option<&BuildCommand>) -> Result { + let from_default = BuildCommand::default(); + let mut from_file: BuildCommand = Figment::new() + .merge(Serialized::defaults(from_default.clone())) + .merge(Toml::file(config)) + .extract() + .into_diagnostic() + .wrap_err("Merging default and config file")?; + match command { + Some(from_command) => { + debug!("Merging cli args + config files"); + let diff = from_default.diff(from_command); + log::debug!("diff default command = {:?}", diff); + from_file.apply(&diff); + } + None => { + debug!("Skipping cli args + config file merger."); + } + }; + log::debug!("from_both = {:?}", from_file); + // TODO: read from env vars. + // Figment is screwing with me, and it's overrinding config coming + // from Env::prefixed("TSDL_"). + // The scary thing is that I might have to write my own config + // joiner, where I need to track provenance of the config, and also + // whether it was explicitly set or taken from default … Figment + // has many features I don't care about. + Ok(from_file) +} + +pub fn print_indent(s: &str, indent: &str) { + s.lines().for_each(|line| println!("{indent}{line}")); +} + +pub fn show(command: &BuildCommand) -> Result<()> { + match &command.languages { + Some(langs) => { + println!("Building the following languages:"); + println!(); + println!( + "{}", + String::from_utf8( + git::column(&langs.join(" "), " ", 80) + .wrap_err("Printing requested languages")? + .stdout + ) + .into_diagnostic() + .wrap_err("Converting column-formatted languages to a string for printing")? + ); + } + None => { + println!("Building all languages."); + println!(); + } + } + println!("Running with the following configuration:"); + println!(); + print_indent( + &toml::to_string(&command) + .into_diagnostic() + .wrap_err("Showing config")?, + " ", + ); + println!(); + Ok(()) +} diff --git a/src/consts.rs b/src/consts.rs new file mode 100644 index 0000000..8fc0fd3 --- /dev/null +++ b/src/consts.rs @@ -0,0 +1,4 @@ +// Include the generated constants. +// See build.rs . +include!(concat!(env!("OUT_DIR"), "/tree_sitter_consts.rs")); +include!(concat!(env!("OUT_DIR"), "/tsdl_consts.rs")); diff --git a/src/display.rs b/src/display.rs new file mode 100644 index 0000000..2121c7f --- /dev/null +++ b/src/display.rs @@ -0,0 +1,336 @@ +use std::{ + borrow::Cow, + fmt::Display, + sync::{Arc, Mutex}, + time, +}; + +use console::style; +use enum_dispatch::enum_dispatch; +use miette::{Context, IntoDiagnostic, Result}; + +use crate::{args::ProgressStyle, format_duration}; + +/// TODO: Get rid of the stupid progress bar crate. +/// +/// The API is not nice, and I can't change the number of steps on the fly. +/// Which I need for repos declaring multiple parsers like php. I can't +/// change what's in the tick position easily too. And let's not mention +/// code duplication … +/// +/// What Ineed is a single class that handles plain and fancy progress strategies, +/// instead of having to handle them with static dispatch via `enum_dispatch`. +/// +/// PS: What' _"bad"_ about working with `enum_dispatch` is the language server. +/// Any modificaiton to the trait you're dispatching will not properly propagate +/// and your diagnostics will be behind reality. + +pub const TICK_CHARS: &str = "⠷⠯⠟⠻⠽⠾⠿"; + +#[must_use] +pub fn current(progress: &ProgressStyle) -> Progress { + if match progress { + ProgressStyle::Auto => atty::is(atty::Stream::Stdout), + ProgressStyle::Fancy => true, + ProgressStyle::Plain => false, + } { + Progress::Fancy(Fancy::default()) + } else { + Progress::Plain(Plain::default()) + } +} + +#[derive(Debug, Clone)] +#[enum_dispatch(ProgressState)] +pub enum Progress { + Plain(Plain), + Fancy(Fancy), +} + +#[derive(Debug, Clone, Default)] +pub struct Plain { + handles: Vec, +} + +#[derive(Debug, Clone, Default)] +pub struct Fancy { + handles: Vec, + multi: indicatif::MultiProgress, +} + +#[enum_dispatch] +pub trait ProgressState { + fn clear(&self) -> Result<()>; + fn register(&mut self, name: impl Into, num_tasks: usize) -> ProgressHandle; + fn tick(&self); + fn is_done(&self) -> bool; +} + +#[derive(Debug, Clone)] +#[enum_dispatch(Handle)] +pub enum ProgressHandle { + Plain(PlainHandle), + Fancy(FancyHandle), +} + +#[derive(Debug, Clone)] +pub struct PlainHandle { + cur_task: Arc>, + name: Arc, + num_tasks: usize, + t_start: Option, +} + +#[derive(Debug, Clone)] +pub struct FancyHandle { + bar: indicatif::ProgressBar, + name: Arc, + num_tasks: usize, + t_start: Option, +} + +pub trait HandleMessage: Into> + Display {} +impl HandleMessage for T where T: Into> + Display {} + +#[enum_dispatch] +pub trait Handle { + /// Declares end of execution with an error. + fn err(&self, msg: impl HandleMessage); + /// Declares end of execution with an success. + fn fin(&self, msg: impl HandleMessage); + /// Changes the displayed message for the current step. + fn msg(&self, msg: impl HandleMessage); + /// Declares transition to next step. + fn step(&self, msg: impl HandleMessage); + /// Through err or fin. + fn is_done(&self) -> bool; + /// Declares transition to first strp. + fn start(&mut self, msg: impl HandleMessage); + /// Useful for `Fancy` to redraw time and ticker. + fn tick(&self); +} + +// Implementations. + +impl Fancy { + #[must_use] + pub fn new() -> Self { + Fancy::default() + } +} + +impl Drop for Fancy { + fn drop(&mut self) { + for handle in &self.handles { + handle.bar.finish(); + } + } +} + +impl ProgressState for Fancy { + fn clear(&self) -> Result<()> { + self.multi + .clear() + .into_diagnostic() + .wrap_err("Clearing the multi-progress bar") + } + + fn register(&mut self, name: impl Into, num_tasks: usize) -> ProgressHandle { + let style = + indicatif::ProgressStyle::with_template("{prefix:.bold.dim} {spinner} {wide_msg}") + .unwrap() + .tick_chars(TICK_CHARS); + let bar = self + .multi + .add(indicatif::ProgressBar::new(num_tasks as u64)); + bar.set_prefix(format!("[?/{num_tasks}]")); + bar.set_style(style); + let handle = FancyHandle { + name: Arc::new(name.into()), + bar, + num_tasks, + t_start: None, + }; + self.handles.push(handle.clone()); + ProgressHandle::Fancy(handle) + } + + fn tick(&self) { + for bar in &self.handles { + bar.tick(); + } + } + + fn is_done(&self) -> bool { + self.handles.iter().all(Handle::is_done) + } +} + +impl ProgressState for Plain { + fn clear(&self) -> Result<()> { + Ok(()) + } + + fn register(&mut self, name: impl Into, num_tasks: usize) -> ProgressHandle { + let handle = PlainHandle { + cur_task: Arc::new(Mutex::new(0)), + name: Arc::new(name.into()), + num_tasks, + t_start: None, + }; + self.handles.push(handle.clone()); + ProgressHandle::Plain(handle) + } + + fn tick(&self) {} + + fn is_done(&self) -> bool { + self.handles.iter().all(Handle::is_done) + } +} + +impl FancyHandle { + fn format_elapsed(&self) -> String { + self.t_start + .map(|start| { + format!( + " in {}", + style(format_duration(time::Instant::now().duration_since(start))).yellow() + ) + }) + .unwrap_or_default() + } +} + +impl Handle for FancyHandle { + fn err(&self, msg: impl HandleMessage) { + self.bar.abandon_with_message(format!( + "{} {} {}{}", + *self.name, + style(msg.into()).blue(), + style("failed").red(), + self.format_elapsed() + )); + } + + fn fin(&self, msg: impl HandleMessage) { + self.bar.inc(1); + self.bar + .set_prefix(format!("[{}/{}]", self.bar.position(), self.num_tasks)); + self.bar.finish_with_message(format!( + "{} {} {}{}", + *self.name, + style(msg).blue(), + style("done").green(), + self.format_elapsed() + )); + } + + fn msg(&self, msg: impl HandleMessage) { + self.bar + .set_prefix(format!("[{}/{}]", self.bar.position(), self.num_tasks)); + self.bar.set_message(format!("{} {}", *self.name, msg)); + } + + fn step(&self, msg: impl HandleMessage) { + self.bar.inc(1); + self.bar + .set_prefix(format!("[{}/{}]", self.bar.position(), self.num_tasks)); + self.bar.set_message(format!("{}: {}", *self.name, msg)); + } + + fn is_done(&self) -> bool { + self.bar.is_finished() + } + + fn start(&mut self, msg: impl HandleMessage) { + self.t_start = Some(time::Instant::now()); + self.bar.inc(1); + self.bar + .set_prefix(format!("[{}/{}]", self.bar.position(), self.num_tasks)); + self.bar.set_message(format!("{} {}", *self.name, msg)); + } + + fn tick(&self) { + self.bar.tick(); + } +} + +impl PlainHandle { + fn format_elapsed(&self) -> String { + self.t_start + .map(|start| { + format!( + " in {}", + format_duration(time::Instant::now().duration_since(start)) + ) + }) + .unwrap_or_default() + } +} + +impl Handle for PlainHandle { + fn err(&self, msg: impl HandleMessage) { + eprintln!( + "[{}/{}] {} {} {}{}", + self.cur_task.lock().unwrap(), + self.num_tasks, + *self.name, + style(msg.into()).blue(), + style("failed").red(), + self.format_elapsed() + ); + } + + fn fin(&self, msg: impl HandleMessage) { + let cur_task = { + let mut res = self.cur_task.lock().unwrap(); + *res += 1; + *res + }; + eprintln!( + "[{}/{}] {} {} {}{}", + cur_task, + self.num_tasks, + *self.name, + style(msg).blue(), + style("done").green(), + self.format_elapsed() + ); + } + + fn msg(&self, msg: impl HandleMessage) { + eprintln!( + "[{}/{}] {}: {}", + self.cur_task.lock().unwrap(), + self.num_tasks, + *self.name, + msg + ); + } + + fn step(&self, msg: impl HandleMessage) { + let cur_task = { + let mut res = self.cur_task.lock().unwrap(); + *res += 1; + *res + }; + eprintln!("[{}/{}] {} {}", cur_task, self.num_tasks, *self.name, msg); + } + + fn is_done(&self) -> bool { + *self.cur_task.lock().unwrap() != self.num_tasks + } + + fn start(&mut self, msg: impl HandleMessage) { + self.t_start = Some(time::Instant::now()); + let cur_task = { + let mut res = self.cur_task.lock().unwrap(); + *res += 1; + *res + }; + eprintln!("[{}/{}] {} {}", cur_task, self.num_tasks, *self.name, msg); + } + + fn tick(&self) {} +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..ff6c2b4 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,56 @@ +use std::path::PathBuf; + +use derive_more::derive::Display; +use miette::Diagnostic; +use thiserror::Error; + +#[derive(Debug, Diagnostic, Error)] +#[error("{msg}\nStdOut:\n{stdout}\nStdErr:\n{stderr}")] +pub struct Command { + pub msg: String, + pub stderr: String, + pub stdout: String, +} + +#[derive(Debug, Diagnostic, Error)] +#[error("Could not figure out all languages")] +pub struct LanguageCollection { + #[related] + pub related: Vec, +} + +#[derive(Debug, Error, Diagnostic)] +#[error("{name}")] +pub struct Language { + pub name: String, + #[source] + #[diagnostic_source] + pub source: Box, +} + +#[derive(Debug, Diagnostic, Error)] +#[error("Could not build all parsers")] +pub struct Parser { + #[related] + pub related: Vec>, +} + +#[derive(Debug, Error, Diagnostic)] +#[error("{name}: {kind}")] +pub struct Step { + pub name: String, + pub kind: ParserOp, + #[source] + #[diagnostic_source] + pub source: Box, +} + +#[derive(Debug, Display)] +pub enum ParserOp { + #[display("Could not build in {}", dir.display())] + Build { dir: PathBuf }, + #[display("Could not clone to {}", dir.display())] + Clone { dir: PathBuf }, + #[display("Could not generate in {}", dir.display())] + Generate { dir: PathBuf }, +} diff --git a/src/git.rs b/src/git.rs new file mode 100644 index 0000000..a870a9c --- /dev/null +++ b/src/git.rs @@ -0,0 +1,144 @@ +use std::{ + fmt, + io::Write, + path::Path, + process::{Output, Stdio}, +}; + +use derive_more::{AsRef, Deref, From, FromStr, Into}; +use miette::{IntoDiagnostic, Result}; +use tokio::{fs, process::Command}; + +use crate::sh::Exec; + +#[derive(AsRef, Clone, Debug, Deref, From, FromStr, Hash, Into, PartialEq, Eq)] +#[as_ref(str, [u8], String)] +pub struct Ref(pub String); + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub enum Tag { + Exact { label: String, sha1: Ref }, + Ref(Ref), +} + +impl Tag { + #[must_use] + pub fn git_ref(&self) -> &Ref { + match self { + Tag::Exact { sha1, .. } => sha1, + Tag::Ref(r) => r, + } + } +} + +impl fmt::Display for Ref { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let git_ref = if self.0.len() == 40 && self.0.chars().all(|c| c.is_ascii_hexdigit()) { + &self.0[..7] + } else { + &self.0 + }; + write!(f, "{git_ref}") + } +} + +impl fmt::Display for Tag { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Tag::Exact { label, .. } => write!(f, "{label}"), + Tag::Ref(ref_) => write!(f, "{ref_}"), + } + } +} + +#[tracing::instrument] +pub async fn clone_fast(repo: &str, git_ref: &str, cwd: &Path) -> Result<()> { + if cwd.exists() { + let head_sha1 = String::from_utf8( + Command::new("git") + .current_dir(cwd) + .args(["rev-parse", "HEAD"]) + .exec() + .await? + .stdout, + ) + .into_diagnostic()?; + if head_sha1.trim() != git_ref { + Command::new("git") + .current_dir(cwd) + .args(["reset", "--hard", "HEAD"]) + .exec() + .await?; + fetch_and_checkout(cwd, git_ref).await?; + } + } else { + fs::create_dir_all(cwd).await.into_diagnostic()?; + Command::new("git") + .current_dir(cwd) + .arg("init") + .exec() + .await?; + Command::new("git") + .current_dir(cwd) + .args(["remote", "add", "origin", repo]) + .exec() + .await?; + fetch_and_checkout(cwd, git_ref).await?; + } + Ok(()) +} + +#[tracing::instrument] +async fn fetch_and_checkout(cwd: &Path, git_ref: &str) -> Result<()> { + Command::new("git") + .env("GIT_TERMINAL_PROMPT", "0") + .current_dir(cwd) + .args(["fetch", "origin", "--depth", "1", git_ref]) + .exec() + .await?; + Command::new("git") + .current_dir(cwd) + .args(["reset", "--hard", "FETCH_HEAD"]) + .exec() + .await?; + Ok(()) +} + +#[tracing::instrument] +pub async fn fetch_tags(cwd: &Path) -> Result<()> { + Command::new("git") + .current_dir(cwd) + .args(["fetch", "--tags", "--all"]) + .exec() + .await + .and(Ok(())) +} + +#[tracing::instrument] +pub async fn tag_for_ref(cwd: &Path, git_ref: &str) -> Result { + let output = Command::new("git") + .current_dir(cwd) + .args(["describe", "--abbrev=0", "--tags", git_ref]) + .exec() + .await?; + Ok(String::from_utf8(output.stdout) + .into_diagnostic()? + .trim() + .to_string()) +} + +pub fn column(input: &str, indent: &str, width: usize) -> Result { + let mut child = std::process::Command::new("git") + .arg("column") + .arg("--mode=always") + .arg(format!("--indent={indent}")) + .arg(format!("--width={width}",)) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .into_diagnostic()?; + if let Some(mut stdin) = child.stdin.take() { + stdin.write_all(input.as_bytes()).into_diagnostic()?; + } + child.wait_with_output().into_diagnostic() +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c0203ad --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,62 @@ +use std::{ + env, + path::{Path, PathBuf}, + time, +}; + +use miette::{IntoDiagnostic, Result}; + +extern crate log; + +pub mod args; +pub mod build; +pub mod config; +pub mod consts; +pub mod display; +pub mod error; +pub mod git; +pub mod logging; +pub mod parser; +#[macro_use] +pub mod sh; +pub mod tree_sitter; + +pub trait SafeCanonicalize { + fn canon(&self) -> Result; +} + +impl SafeCanonicalize for Path { + fn canon(&self) -> Result { + if self.is_absolute() { + Ok(self.to_path_buf()) + } else { + let current_dir = env::current_dir().into_diagnostic()?; + Ok(current_dir.join(self)) + } + } +} + +impl SafeCanonicalize for PathBuf { + fn canon(&self) -> Result { + self.as_path().canon() + } +} +fn format_duration(duration: time::Duration) -> String { + let total_seconds = duration.as_secs(); + let milliseconds = duration.subsec_millis(); + if total_seconds < 60 { + format!("{total_seconds}.{milliseconds:#02}s") + } else { + format!("{}mn {}s", total_seconds % 60, total_seconds / 60) + } +} + +pub fn relative_to_cwd(dir: &Path) -> PathBuf { + let canon = dir.canon().unwrap_or_else(|_| dir.to_path_buf()); + let cwd = env::current_dir().unwrap_or_else(|_| dir.to_path_buf()); + if canon != cwd && canon.starts_with(&cwd) { + dir.strip_prefix(cwd).map_or(canon, Path::to_path_buf) + } else { + canon + } +} diff --git a/src/logging.rs b/src/logging.rs new file mode 100644 index 0000000..4425ae4 --- /dev/null +++ b/src/logging.rs @@ -0,0 +1,98 @@ +use std::{ + fs::{self, File}, + path::{Path, PathBuf}, +}; + +use miette::{Context, IntoDiagnostic as _, Result}; +use tracing::level_filters::LevelFilter; +use tracing_appender::non_blocking::WorkerGuard; +#[cfg(debug_assertions)] +use tracing_error::ErrorLayer; +use tracing_log::AsTrace; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + +use crate::{ + args::{Args, LogColor}, + config::current, + consts::TSDL_BUILD_DIR, + SafeCanonicalize, +}; + +pub fn init(args: &Args) -> Result { + let color = match args.log_color { + LogColor::Auto => atty::is(atty::Stream::Stdout), + LogColor::No => false, + LogColor::Yes => true, + }; + console::set_colors_enabled(color); + let filter = args.verbose.log_level_filter().as_trace(); + let without_time = std::env::var("TSDL_LOG_TIME") + .map(|v| !matches!(v.to_lowercase().as_str(), "1" | "y" | "yes")) + .unwrap_or(true); + let file = init_log_file(args)?; + Ok(init_tracing(file, color, filter, without_time)) +} + +fn init_tracing(file: File, color: bool, filter: LevelFilter, without_time: bool) -> WorkerGuard { + let (writer, guard) = tracing_appender::non_blocking(file); + let fmt_layer = tracing_subscriber::fmt::layer() + .compact() + .with_ansi(color) + .with_file(true) + .with_level(true) + .with_line_number(true) + .with_target(true) + .with_thread_ids(true) + .with_writer(writer); + if without_time { + let fmt_layer = fmt_layer.without_time(); + let registry = tracing_subscriber::registry().with(fmt_layer).with(filter); + #[cfg(debug_assertions)] + { + registry.with(ErrorLayer::default()).init(); + } + #[cfg(not(debug_assertions))] + { + registry.init(); + } + } else { + let registry = tracing_subscriber::registry().with(fmt_layer).with(filter); + #[cfg(debug_assertions)] + { + registry.with(ErrorLayer::default()).init(); + } + #[cfg(not(debug_assertions))] + { + registry.init(); + } + }; + guard +} + +fn init_log_file(args: &Args) -> Result { + let log = args + .log + .as_ref() + .filter(|l| { + l.canon() + .ok() + .and_then(|p| p.parent().map(Path::exists)) + .unwrap_or_default() + }) + .cloned() + .or_else(|| { + current(&args.config, args.command.as_build()) + .map(|c| Some(c.build_dir.clone().join("log"))) + .unwrap_or_default() + }) + .unwrap_or(PathBuf::from(TSDL_BUILD_DIR).join("log")); + let parent = log.parent().unwrap_or(Path::new(".")); + if !parent.exists() { + fs::create_dir_all(parent) + .into_diagnostic() + .wrap_err("Preparing log directory")?; + } + File::create(&log) + .into_diagnostic() + .wrap_err("Creating log file") +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..77b6e97 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,51 @@ +use clap::Parser; +use miette::Result; +use tracing::{error, info}; + +use tsdl::{args, build, config, display, logging}; + +fn main() -> Result<()> { + set_panic_hook(); + let args = args::Args::parse(); + let _guard = logging::init(&args)?; + info!("Starting"); + run(&args)?; + info!("Done"); + Ok(()) +} + +fn run(args: &args::Args) -> Result<()> { + match &args.command { + args::Command::Build(command) => build::run( + &config::current(&args.config, Some(command))?, + display::current(&args.progress), + ), + args::Command::Config { command } => config::run(command, &args.config), + } +} + +pub fn set_panic_hook() { + std::panic::set_hook(Box::new(move |info| { + #[cfg(not(debug_assertions))] + { + use human_panic::{handle_dump, print_msg, Metadata}; + let meta = Metadata::new(env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")) + .authors(env!("CARGO_PKG_AUTHORS").replace(':', ", ")) + .homepage(env!("CARGO_PKG_HOMEPAGE")); + + let file_path = handle_dump(&meta, info); + print_msg(file_path, &meta) + .expect("human-panic: printing error message to console failed"); + } + #[cfg(debug_assertions)] + { + better_panic::Settings::auto() + .most_recent_first(false) + .lineno_suffix(true) + .verbosity(better_panic::Verbosity::Full) + .create_panic_handler()(info); + } + error!("{}", info); + std::process::exit(1); + })); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..b117a92 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,306 @@ +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; + +use ignore::{overrides::OverrideBuilder, types::TypesBuilder, WalkBuilder}; +use miette::{IntoDiagnostic, Result}; +use tokio::{fs, process::Command, sync::mpsc}; +use url::Url; + +use crate::{ + display::{Handle, ProgressHandle}, + error, + git::{clone_fast, Ref}, + sh::{Exec, Script}, + SafeCanonicalize, +}; + +pub const NUM_STEPS: usize = 3; + +#[tracing::instrument] +pub async fn build_languages(languages: Vec) -> Result<()> { + let buffer = if languages.is_empty() { + 64 + } else { + languages.len() + }; + let (tx, mut rx) = mpsc::channel(buffer); + for mut language in languages { + let tx = tx.clone(); + tokio::spawn(async move { + language.process(tx).await; + }); + } + drop(tx); + let mut errs = Vec::new(); + while let Some(msg) = rx.recv().await { + if let Err(err) = msg { + errs.push(err.into()); + } + } + if errs.is_empty() { + Ok(()) + } else { + Err(error::Parser { related: errs }.into()) + } +} + +#[derive(Clone, Debug)] +pub struct Language { + build_dir: PathBuf, + build_script: Option, + git_ref: Ref, + handle: ProgressHandle, + name: String, + out: PathBuf, + repo: Url, + ts_cli: Arc, +} + +impl Language { + #[allow(clippy::too_many_arguments)] + #[must_use] + pub fn new( + build_dir: PathBuf, + build_script: Option, + git_ref: Ref, + handle: ProgressHandle, + name: String, + out: PathBuf, + repo: Url, + ts_cli: Arc, + ) -> Self { + Language { + build_dir, + build_script, + git_ref, + handle, + name, + out, + repo, + ts_cli, + } + } + + #[tracing::instrument] + async fn process(&mut self, tx: mpsc::Sender>) { + let res = self.steps().await; + if res.is_err() { + tx.send(res).await.unwrap(); + self.handle.err(self.git_ref.to_string()); + } else { + self.handle.fin(self.git_ref.to_string()); + tx.send(Ok(())).await.unwrap(); + } + } + + #[tracing::instrument] + async fn steps(&mut self) -> Result<()> { + self.handle.start(format!("Cloning {}", self.git_ref)); + self.clone().await?; + self.handle.step(format!("Generating {}", self.git_ref)); + // TODO: parallel + for dir in self.collect_grammars() { + let dir_name = dir.file_name().unwrap().to_str().unwrap().to_string(); + if self.name != dir_name { + let mut new_out = self.out.clone(); + new_out.pop(); + new_out.push( + self.out + .file_name() + .unwrap() + .to_str() + .unwrap() + .replace(&self.name, &dir_name), + ); + self.out = new_out; + } + self.handle + .msg(format!("Generating {} in {}", self.git_ref, dir_name)); + self.build_grammar(dir).await?; + #[cfg(target_os = "macos")] + self.macos_clean().await?; + } + Ok(()) + } + + #[cfg_attr(not(target_os = "macos"), allow(dead_code))] + #[inline] + #[tracing::instrument] + async fn macos_clean(&mut self) -> Result<()> { + self.handle.msg("Cleaning"); + let mut out_dsym = self.out.clone(); + out_dsym.set_extension(format!( + "{}.dSYM", + self.out.extension().unwrap().to_str().unwrap() + )); + if out_dsym.exists() && out_dsym.is_dir() { + fs::remove_dir_all(out_dsym).await.into_diagnostic()?; + } + Ok(()) + } + + #[tracing::instrument] + async fn build_grammar(&self, dir: PathBuf) -> Result<()> { + self.generate(&dir).await?; + self.handle.msg(format!( + "Building {} parser: {}", + self.git_ref, + dir.file_name().unwrap().to_str().unwrap(), + )); + self.build(&dir).await?; + Ok(()) + } + + #[tracing::instrument] + async fn build(&self, dir: &Path) -> Result<()> { + self.build_script + .as_ref() + .map_or_else( + || { + let mut cmd = Command::new(&*self.ts_cli); + cmd.args(["build", "-o", self.out.canon().unwrap().to_str().unwrap()]); + cmd + }, + |script| Command::from_str(script), + ) + .current_dir(dir) + .exec() + .await + .map_err(|err| { + error::Step { + name: self.name.clone(), + kind: error::ParserOp::Build { + dir: self.build_dir.clone(), + }, + source: err.into(), + } + .into() + }) + .and(Ok(())) + } + + fn collect_grammars(&self) -> Vec { + let mut types_builder = TypesBuilder::new(); + types_builder.add_def("js:*.js").unwrap(); + let types = types_builder.select("js").build().unwrap(); + let mut overrides_builder = OverrideBuilder::new(&self.build_dir); + overrides_builder.case_insensitive(true).unwrap(); + overrides_builder + .add("!(.github|bindings|doc|docs|examples|queries|script|scripts|test|tests)/**") + .unwrap(); + let overrides = overrides_builder.build().unwrap(); + let mut walker = WalkBuilder::new(&self.build_dir); + walker + .git_global(false) + .git_ignore(true) + .hidden(false) + .overrides(overrides) + .types(types); + walker + .build() + .filter_map(|entry| { + entry.ok().filter(|dir| { + dir.file_type().unwrap().is_file() && dir.file_name() == "grammar.js" + }) + }) + .map(|entry| { + entry + .path() + .to_path_buf() + .parent() + .unwrap() + .canon() + .unwrap() + }) + .collect() + } + + // async fn copy(&self, dir: impl Into) -> Result<(), StepError> { + // let dir = dir.into(); + // let name = dir.file_name().unwrap().to_str().unwrap(); + // let dst = self.out_dir.clone().join(format!("{name}.{DLL_EXTENSION}")); + + // let mut files = fs::read_dir(&dir).await.unwrap(); + // let mut dlls = Vec::with_capacity(1); + // while let Ok(Some(entry)) = files.next_entry().await { + // let file_name = entry.file_name(); + // let name = file_name.as_os_str().to_str().unwrap(); + // if entry.file_type().await.unwrap().is_file() + // && name.ends_with(&format!(".{DLL_EXTENSION}")) + // { + // dlls.push(dir.clone().join(name)); + // } + // } + + // if dlls.len() == 0 { + // return Err(StepError { + // name: self.name.clone(), + // kind: ParserOp::Copy { + // src: self.out_dir.clone(), + // dst, + // }, + // source: miette!("Couldn't find any {DLL_EXTENSION} file"), + // }); + // } else if dlls.len() > 1 { + // return Err(StepError { + // name: self.name.clone(), + // kind: ParserOp::Copy { + // src: self.out_dir.clone(), + // dst, + // }, + // source: miette!("Found many {DLL_EXTENSION} files: {dlls:?}"), + // }); + // } + + // fs::copy(&dlls[0], &dst) + // .await + // .wrap_err_with(|| format!("cp {} {}", dlls[0].display(), dst.display())) + // .map_err(|err| StepError { + // name: self.name.clone(), + // kind: ParserOp::Copy { + // src: dlls.pop().unwrap(), + // dst, + // }, + // source: err, + // }) + // .and(Ok(())) + // } + + #[tracing::instrument] + async fn clone(&self) -> Result<()> { + clone_fast(self.repo.as_str(), &self.git_ref, &self.build_dir) + .await + .map_err(|err| { + error::Step { + name: self.name.clone(), + kind: error::ParserOp::Clone { + dir: self.build_dir.clone(), + }, + source: err.into(), + } + .into() + }) + } + + #[tracing::instrument] + async fn generate(&self, dir: &Path) -> Result<()> { + Command::new(&*self.ts_cli) + .current_dir(dir) + .arg("generate") + .exec() + .await + .map_err(|err| { + error::Step { + name: self.name.clone(), + kind: error::ParserOp::Generate { + dir: self.build_dir.clone(), + }, + source: err.into(), + } + .into() + }) + .and(Ok(())) + } +} diff --git a/src/sh.rs b/src/sh.rs new file mode 100644 index 0000000..0c2c8b3 --- /dev/null +++ b/src/sh.rs @@ -0,0 +1,132 @@ +use std::{ + env, + fmt::Write, + os::unix::process::ExitStatusExt, + path::{Path, PathBuf}, + process::Output, +}; + +use miette::{miette, IntoDiagnostic, Result}; +use tokio::process::Command; +use tracing::error; + +use crate::{error, relative_to_cwd}; + +pub trait Exec { + fn exec(&mut self) -> impl std::future::Future>; + fn display(&self) -> String; +} + +pub trait Script { + fn from_str(script: &str) -> Command; +} + +impl Exec for Command { + #[tracing::instrument] + async fn exec(&mut self) -> Result { + let output = self.output().await.into_diagnostic()?; + if output.status.success() { + Ok(output) + } else { + let program = self.as_std().get_program().to_str().unwrap(); + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let msg = if let Some(code) = output.status.code() { + format!("{} faild with exit status {}.", self.display(), code) + } else { + format!( + "{} interrupted by signal {}.", + program, + output.status.signal().unwrap() + ) + }; + error!("{msg}\nStdOut:\n{stdout}\nStdErr\n{stderr}"); + Err(error::Command { + msg, + stderr, + stdout, + } + .into()) + } + } + + // This is needlessly complicated, trying to minimize allocations, like grown-ups, + // not because it's needed —I didn't even measure anything— but becauase I'm exercising my rust. + fn display(&self) -> String { + let program = self.as_std().get_program(); + let args = self.as_std().get_args(); + let cwd = self.as_std().get_current_dir(); + let capacity = program.len() + 1 + args.len() + 1; // + 1 for spaces + let mut res = String::with_capacity( + capacity + + cwd.map_or( + 0, + // + 3 = 2 brackets and a space. + // we always overallocate by 1 (alignment aside); see the formatting of args. + |a| a.to_str().unwrap().len() + 3, + ), + ); + if let Some(path) = cwd { + write!(res, "[{}] ", relative_to_cwd(path).to_str().unwrap()).unwrap(); + }; + write!(res, "{} ", program.to_str().unwrap()).unwrap(); + let mut args_iter = args.enumerate(); + if let Some((_, first_arg)) = args_iter.next() { + write!(res, "{}", first_arg.to_str().unwrap()).unwrap(); + for (_, arg) in args_iter { + write!(res, " {}", arg.to_str().unwrap()).unwrap(); + } + } + res + } +} + +impl Script for Command { + fn from_str(script: &str) -> Command { + let shell = env::var("SHELL").unwrap_or_else(|_| String::from("sh")); + let mut cmd = Command::new(shell); + cmd.arg("-c").arg(script); + cmd + } +} + +/// Your local hometown one-eyed which. +/// +/// stdin, stdout, and stderr are ignored. +#[tracing::instrument] +pub async fn which(prog: &str) -> Result { + let output = Command::new("which").arg(prog).exec().await?; + Ok(PathBuf::from( + String::from_utf8_lossy(&output.stdout).trim(), + )) +} + +#[tracing::instrument] +pub async fn chmod_x(prog: &Path) -> Result { + Command::new("chmod").arg("+x").arg(prog).exec().await +} + +#[tracing::instrument] +pub async fn download(out: &Path, url: &str) -> Result { + let which_prog = match which("curl").await { + Ok(path) => Ok(path), + Err(_) => which("wget").await, + }?; + let prog = which_prog + .file_name() + .and_then(|p| p.to_str()) + .ok_or(miette!("Could not find curl or wget"))?; + let out = out + .to_str() + .ok_or(miette!("Retrieving string from out path"))?; + match prog { + "curl" => Command::new(prog).args(["-o", out, "-L", url]).exec().await, + "wget" => Command::new(prog).args(["-O", out, url]).exec().await, + _ => unreachable!(), + } +} + +#[tracing::instrument] +pub async fn gunzip(gz: &Path) -> Result { + Command::new("gunzip").arg(gz).exec().await +} diff --git a/src/tree_sitter.rs b/src/tree_sitter.rs new file mode 100644 index 0000000..0a73b15 --- /dev/null +++ b/src/tree_sitter.rs @@ -0,0 +1,103 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; + +use miette::{miette, Context, IntoDiagnostic, Result}; +use tokio::process::Command; +use tracing::debug; +use url::Url; + +use crate::display::ProgressHandle; +use crate::git::{self, Ref}; +use crate::SafeCanonicalize; +use crate::{ + args::BuildCommand, + display::{Handle, Progress, ProgressState}, + git::{clone_fast, Tag}, + sh::{chmod_x, download, gunzip, Exec}, +}; + +#[allow(clippy::missing_panics_doc)] +#[tracing::instrument] +pub async fn tag(repo: &str, version: &str) -> Result { + let output = Command::new("git") + .args(["ls-remote", "--refs", "--tags", repo]) + .exec() + .await?; + let stdout = String::from_utf8(output.stdout).into_diagnostic()?; + let mut refs = HashMap::new(); + for line in stdout.lines() { + let ref_line = line.split('\t').map(str::trim).collect::>(); + let (sha1, full_ref) = (ref_line[0], ref_line[1]); + if let Some(tag) = full_ref.split('/').last() { + debug!("insert {tag} -> {sha1}"); + refs.insert(tag.to_string(), sha1.to_string()); + } + } + Ok(refs + .get_key_value(&format!("v{version}")) + .or_else(|| refs.get_key_value(version)) + .map_or_else( + || Tag::Ref(Ref::from_str(version).unwrap()), + |(k, v)| { + debug!("Found! {k} -> {v}"); + Tag::Exact { + sha1: Ref::from_str(v).unwrap(), + label: k.to_string(), + } + }, + )) +} + +#[tracing::instrument] +async fn cli(args: &BuildCommand, tag: &Tag, handle: &ProgressHandle) -> Result { + let build_dir = &args.build_dir; + let platform = &args.tree_sitter.platform; + let repo = &args.tree_sitter.repo; + let tag = match tag { + Tag::Exact { label, .. } => Cow::Borrowed(label), + Tag::Ref(git_ref) => { + handle.msg(format!("Figuring out the exact tag for {tag}",)); + let tree_sitter = PathBuf::new().join(build_dir).join("tree-sitter"); + clone_fast(repo, tag.git_ref(), &tree_sitter).await?; + git::fetch_tags(&tree_sitter).await?; + Cow::Owned(git::tag_for_ref(&tree_sitter, git_ref).await?) + } + }; + let cli = format!("tree-sitter-{platform}"); + let res = PathBuf::new().join(build_dir).join(&cli).canon()?; + if !res.exists() { + handle.msg(format!("Downloading {tag}",)); + let gz_basename = format!("{cli}.gz"); + let url = format!("{repo}/releases/download/{tag}/{gz_basename}"); + let gz = PathBuf::new().join(build_dir).join(gz_basename); + + download(&gz, &url).await?; + gunzip(&gz).await?; + chmod_x(&res).await?; + } + Ok(res) +} + +#[tracing::instrument] +pub async fn prepare(args: &BuildCommand, progress: Arc>) -> Result { + let mut handle = { + progress + .lock() + .map(|mut lock| lock.register("tree-sitter-cli", 3)) + .or(Err(miette!("Aquiring progress lock")))? + }; + + let repo = Url::parse(&args.tree_sitter.repo) + .into_diagnostic() + .wrap_err("Parsing the tree-sitter URL")?; + let version = &args.tree_sitter.version; + handle.start(format!("Figuring out tag from version {version}")); + let tag = tag(repo.as_str(), version).await?; + handle.step(format!("Fetching {tag}",)); + let cli = cli(args, &tag, &handle).await?; + handle.fin(format!("{tag}")); + Ok(cli) +} diff --git a/tests/cli.rs b/tests/cli.rs new file mode 100644 index 0000000..71d4a39 --- /dev/null +++ b/tests/cli.rs @@ -0,0 +1,21 @@ +mod cmd; + +use predicates::{self as p, prelude::PredicateBooleanExt}; + +use cmd::Sandbox; + +#[test] +fn empty_dir_no_command_shows_help() { + let mut sandbox = Sandbox::new(); + sandbox + .cmd + .assert() + .failure() + .stderr( + p::str::starts_with(env!("CARGO_PKG_DESCRIPTION")).and(p::str::contains(format!( + "Usage: {} [OPTIONS] ", + env!("CARGO_PKG_NAME") + ))), + ); + assert!(sandbox.is_empty()); +} diff --git a/tests/cmd/build.rs b/tests/cmd/build.rs new file mode 100644 index 0000000..4ff1437 --- /dev/null +++ b/tests/cmd/build.rs @@ -0,0 +1,205 @@ +use std::env::consts::DLL_EXTENSION; + +use assert_fs::prelude::*; +use indoc::indoc; +use predicates::{self as p}; +use rstest::*; + +use tsdl::consts::{ + TREE_SITTER_VERSION, TSDL_BUILD_DIR, TSDL_CONFIG_FILE, TSDL_OUT_DIR, TSDL_PREFIX, +}; + +use crate::cmd::Sandbox; + +#[rstest] +fn no_args_should_build_tree_sitter() { + let mut sandbox = Sandbox::new(); + sandbox.cmd.arg("build"); + sandbox + .cmd + .assert() + .success() + .stderr(p::str::contains(format!( + "tree-sitter-cli v{TREE_SITTER_VERSION} done" + ))); + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); +} + +#[rstest] +#[case::no_leading_v("0.22.0", "v0.22.0")] +#[case::leading_v("v0.22.0", "v0.22.0")] +#[case::sha1("12fb31826b8469cc7b9788e72bceee5af1cf0977", "12fb318")] +fn no_args_should_build_tree_sitter_with_specific_version( + #[case] requested: &str, + #[case] version: &str, +) { + let mut sandbox = Sandbox::new(); + sandbox + .cmd + .args(["build", "--tree-sitter-version", requested]); + sandbox + .cmd + .assert() + .success() + .stderr(p::str::contains(format!("tree-sitter-cli {version} done"))); + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); +} + +#[rstest] +#[case::gringo(vec!["gringo"])] +#[case::gringo_bringo(vec!["gringo", "bringo"])] +fn unknown_parser_should_fail(#[case] languages: Vec<&str>) { + let mut sandbox = Sandbox::new(); + sandbox.cmd.arg("build").args(&languages); + let mut assert = sandbox.cmd.assert().failure(); + for lang in &languages { + assert = assert.stderr(p::str::contains(format!("{lang} HEAD failed"))); + } + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); + sandbox + .tmp + .child(TSDL_OUT_DIR) + .assert(p::path::exists()) + .assert(p::path::is_dir()); + for lang in languages { + sandbox + .tmp + .child(TSDL_OUT_DIR) + .child(format!("{lang}.{DLL_EXTENSION}")) + .assert(p::path::missing()); + } +} + +#[rstest] +#[case::json(vec!["json"])] +#[case::json_rust(vec!["json", "rust"])] +fn no_config_should_build_valid_parser_from_head(#[case] languages: Vec<&str>) { + let mut sandbox = Sandbox::new(); + sandbox.cmd.arg("build").args(&languages); + let mut assert = sandbox.cmd.assert().success(); + for lang in &languages { + assert = assert.stderr(p::str::contains(format!("{lang} HEAD done"))); + } + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); + sandbox + .tmp + .child(TSDL_OUT_DIR) + .assert(p::path::exists()) + .assert(p::path::is_dir()); + for lang in &languages { + let dylib = sandbox + .tmp + .child(TSDL_OUT_DIR) + .child(format!("{TSDL_PREFIX}{lang}.{DLL_EXTENSION}")); + dylib.assert(p::path::exists()).assert(p::path::is_file()); + } +} + +#[rstest] +#[case::pinned_hash_and_from_cobol("cobol", "6a46906")] +#[case::pinned_no_leading_v_json("json", "v0.21.0")] +#[case::pinned_leading_v_java("java", "v0.21.0")] +#[case::unpinned_rust("rust", "HEAD")] +// #[case::pinned::cmd::typescript("typescript", "v0.21.0")] +fn build_pinned_and_unpinned(#[case] language: &str, #[case] version: &str) { + let config = indoc! { + r#" + [parsers] + json = "0.21.0" + java = "v0.21.0" + typescript = { ref = "0.21.0", cmd = "make" } + cobol = { ref = "6a469068cacb5e3955bb16ad8dfff0dd792883c9", from = "https://github.com/yutaro-sakamoto/tree-sitter-cobol" } + "# + }; + let mut sandbox = Sandbox::new(); + sandbox + .tmp + .child(TSDL_CONFIG_FILE) + .write_str(config) + .unwrap(); + sandbox + .cmd + .arg("build") + .arg(language) + .assert() + .success() + .stderr(p::str::contains(format!("{language} {version} done"))); + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); + sandbox + .tmp + .child(TSDL_OUT_DIR) + .assert(p::path::exists()) + .assert(p::path::is_dir()); + let dylib = sandbox + .tmp + .child(TSDL_OUT_DIR) + .child(format!("{TSDL_PREFIX}{language}.{DLL_EXTENSION}")); + dylib.assert(p::path::exists()).assert(p::path::is_file()); +} + +#[test] +fn multi_parsers_no_cmd() { + let php = "php"; + let version = "HEAD"; + let languages = ["php", "php_only"]; + let mut sandbox = Sandbox::new(); + let mut assert = sandbox.cmd.arg("build").arg(php).assert().success(); + for language in languages { + assert = assert.stderr(p::str::contains(format!( + "{php}: Building {version} parser: {language}" + ))); + } + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); + sandbox + .tmp + .child(TSDL_OUT_DIR) + .assert(p::path::exists()) + .assert(p::path::is_dir()); + for language in languages { + let dylib = sandbox + .tmp + .child(TSDL_OUT_DIR) + .child(format!("{TSDL_PREFIX}{language}.{DLL_EXTENSION}")); + dylib.assert(p::path::exists()).assert(p::path::is_file()); + } +} + +// TODO: +// #[case::pinned::cmd::typescript("typescript", "v0.21.0")] +// multi_parsers_cmd diff --git a/tests/cmd/cmd.rs b/tests/cmd/cmd.rs new file mode 100644 index 0000000..e69de29 diff --git a/tests/cmd/config.rs b/tests/cmd/config.rs new file mode 100644 index 0000000..95e5769 --- /dev/null +++ b/tests/cmd/config.rs @@ -0,0 +1,84 @@ +use assert_fs::prelude::*; +use indoc::formatdoc; +use predicates::{self as p}; + +use tsdl::{args::BuildCommand, consts::TSDL_BUILD_DIR}; + +use crate::cmd::Sandbox; + +#[test] +fn no_args_shows_help() { + let mut sandbox = Sandbox::new(); + sandbox + .cmd + .args(["config"]) + .assert() + .failure() + .stderr(p::str::starts_with("Configuration helpers")) + .stderr(p::str::contains(format!( + "Usage: {} config [OPTIONS] ", + env!("CARGO_PKG_NAME") + ))); + assert!(sandbox.is_empty()); +} + +#[test] +fn default_is_default_toml() { + let mut sandbox = Sandbox::new(); + sandbox.cmd.args(["config", "default"]); + sandbox.cmd.assert().success().stdout(p::str::contains( + toml::to_string(&BuildCommand::default()).unwrap(), + )); + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); +} + +#[test] +fn current_uses_default() { + let mut sandbox = Sandbox::new(); + sandbox.cmd.args(["config", "current"]); + sandbox + .cmd + .assert() + .success() + .stdout(p::str::contains(toml::to_string(&sandbox.build).unwrap())); + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(TSDL_BUILD_DIR) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); +} + +#[test] +fn current_uses_config_file() { + let build_dir = "build-dir"; + let out_dir = "out-dir"; + let config = formatdoc! { + r#" + build-dir = "{build_dir}" + out = "{out_dir}" + "# + }; + let mut sandbox = Sandbox::new(); + sandbox.config(&config); + sandbox.cmd.args(["config", "current"]); + sandbox + .cmd + .assert() + .success() + .stdout(p::str::contains(toml::to_string(&sandbox.build).unwrap())); + assert!(!sandbox.is_empty()); + sandbox + .tmp + .child(build_dir) + .child("log") + .assert(p::path::exists()) + .assert(p::path::is_file()); +} diff --git a/tests/cmd/mod.rs b/tests/cmd/mod.rs new file mode 100644 index 0000000..2ffd610 --- /dev/null +++ b/tests/cmd/mod.rs @@ -0,0 +1,52 @@ +#[cfg(test)] +mod build; +#[cfg(test)] +mod config; + +use std::{fs, path::Path}; + +use assert_cmd::Command; +use assert_fs::TempDir; +use figment::{ + providers::{Format, Serialized, Toml}, + Figment, +}; + +use tsdl::{args::BuildCommand, consts::TSDL_CONFIG_FILE}; + +pub struct Sandbox { + pub build: BuildCommand, + pub cmd: Command, + pub tmp: TempDir, +} + +impl Sandbox { + pub fn new() -> Self { + let tmp = TempDir::new().unwrap(); + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + cmd.current_dir(tmp.path()); + Sandbox { + build: BuildCommand::default(), + cmd, + tmp, + } + } + + pub fn config(&mut self, config: &str) -> &mut Self { + self.config_at(config, &self.tmp.path().join(TSDL_CONFIG_FILE)) + } + + pub fn config_at(&mut self, config: &str, dst: &Path) -> &mut Self { + self.build = Figment::new() + .merge(Serialized::defaults(BuildCommand::default())) + .merge(Toml::string(config)) + .extract() + .unwrap(); + fs::write(dst, config).unwrap(); + self + } + + pub fn is_empty(&self) -> bool { + fs::read_dir(&self.tmp).is_ok_and(|mut dir| dir.next().is_none()) + } +} diff --git a/tests/config.rs b/tests/config.rs new file mode 100644 index 0000000..c9cf3e9 --- /dev/null +++ b/tests/config.rs @@ -0,0 +1,109 @@ +use assert_fs::prelude::*; +use indoc::{formatdoc, indoc}; +use miette::{IntoDiagnostic, Result}; +#[cfg(test)] +use pretty_assertions::{assert_eq, assert_ne}; + +use tsdl::{ + args::BuildCommand, + config, + consts::{ + TREE_SITTER_PLATFORM, TREE_SITTER_REPO, TREE_SITTER_VERSION, TSDL_BUILD_DIR, TSDL_FRESH, + TSDL_OUT_DIR, TSDL_SHOW_CONFIG, + }, +}; + +#[test] +fn current_from_generated_default() -> Result<()> { + let temp = assert_fs::TempDir::new().into_diagnostic()?; + let generated = temp.child("generated.toml"); + let def = BuildCommand::default(); + generated + .write_str(&toml::to_string(&def).into_diagnostic()?) + .into_diagnostic()?; + assert_eq!(def, config::current(&generated, None).unwrap()); + Ok(()) +} + +#[test] +fn current_from_empty() -> Result<()> { + let temp = assert_fs::TempDir::new().into_diagnostic()?; + let generated = temp.child("generated.toml"); + let def = BuildCommand::default(); + generated.touch().into_diagnostic()?; + assert_eq!(def, config::current(&generated, None).unwrap()); + Ok(()) +} + +#[test] +fn current_preserve_languages() -> Result<()> { + let temp = assert_fs::TempDir::new().into_diagnostic()?; + let generated = temp.child("generated.toml"); + let mut def = BuildCommand::default(); + generated.touch().into_diagnostic()?; + def.languages = None; + assert_eq!(def, config::current(&generated, Some(&def)).unwrap()); + def.languages = Some(vec![]); + assert_eq!(def, config::current(&generated, Some(&def)).unwrap()); + def.languages = Some(vec!["rust".to_string()]); + assert_eq!(def, config::current(&generated, Some(&def)).unwrap()); + def.languages = Some(vec!["rust".to_string(), "ruby".to_string()]); + assert_eq!(def, config::current(&generated, Some(&def)).unwrap()); + Ok(()) +} + +#[test] +fn current_default_is_default() -> Result<()> { + let config = formatdoc! { + r#" + build-dir = "{}" + fresh = {} + out = "{}" + show-config = {} + + [tree-sitter] + version = "{}" + repo = "{}" + platform = "{}" + "#, + TSDL_BUILD_DIR, + TSDL_FRESH, + TSDL_OUT_DIR, + TSDL_SHOW_CONFIG, + TREE_SITTER_VERSION, + TREE_SITTER_REPO, + TREE_SITTER_PLATFORM, + }; + let temp = assert_fs::TempDir::new().into_diagnostic()?; + let generated = temp.child("generated.toml"); + let def = BuildCommand::default(); + generated.write_str(&config).into_diagnostic()?; + assert_eq!(def, config::current(&generated, None).unwrap()); + assert_eq!(def, config::current(&generated, Some(&def)).unwrap()); + Ok(()) +} + +#[test] +fn current_overrides_default() -> Result<()> { + let config = indoc! { + r#" + build-dir = "/root" + fresh = true + out = "tree-sitter-parsers" + show-config = true + + [tree-sitter] + version = "1.0.0" + repo = "https://gitlab.com/tree-sitter/tree-sitter" + platform = "linux-arm64" + "# + }; + let temp = assert_fs::TempDir::new().into_diagnostic()?; + let generated = temp.child("generated.toml"); + let def = BuildCommand::default(); + generated.write_str(config).into_diagnostic()?; + generated.assert(config); + assert_ne!(def, config::current(&generated, None).unwrap()); + assert_ne!(def, config::current(&generated, Some(&def)).unwrap()); + Ok(()) +}