Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script to backfill team alumni #1125

Closed
dtolnay opened this issue Nov 27, 2023 · 0 comments
Closed

Script to backfill team alumni #1125

dtolnay opened this issue Nov 27, 2023 · 0 comments

Comments

@dtolnay
Copy link
Member

dtolnay commented Nov 27, 2023

I put this together for due diligence in #1123. It looks at the history of https://github.com/rust-lang/team and https://github.com/rust-lang/prev.rust-lang.org to find former team members who are not listed as alumni in the teams they were members of.

The output looks like this:

Kimundi ["libs-api"]
alexcrichton ["core", "devtools", "infra", "libs-api", "release", "rustup", "wg-security-response", "wg-wasm"]
aturon ["cargo", "core", "infra", "lang", "libs-api", "wg-wasm"]
brson ["community", "core", "devtools", "infra", "libs-api", "style"]
huonw ["core", "lang", "libs-api"]
// [dependencies]
// anyhow = "1"
// chrono = "0.4"
// git2 = "0.18"
// serde = { version = "1", features = ["derive"] }
// serde_yaml = "0.9"
// toml = "0.8"

use anyhow::{Context, Result};
use chrono::{DateTime, NaiveDate};
use git2::{Commit, Object, ObjectType, Repository};
use serde::Deserialize;
use std::borrow::Cow;
use std::collections::{BTreeMap as Map, BTreeSet as Set};
use std::path::Path;
use std::str;

const TEAM_REPO: &str = "/path/to/rust-lang/team";

const OLD_WEBSITE_REPO: &str = "/path/to/rust-lang/prev.rust-lang.org";

#[derive(Deserialize, Debug)]
struct Toml {
    name: String,
    people: TomlTeam,
}

#[derive(Deserialize, Debug)]
struct TomlTeam {
    members: Vec<String>,
    #[serde(default)]
    alumni: Vec<String>,
}

#[derive(Deserialize, Debug)]
struct Yaml {
    #[serde(default)]
    teams: Vec<YamlTeam>,
}

#[derive(Deserialize, Debug)]
struct YamlTeam {
    name: String,
    members: Vec<String>,
}

fn main() -> Result<()> {
    // person -> set of teams
    let mut historical_membership = Map::<String, Set<String>>::new();
    let mut present_membership = historical_membership.clone();

    let team_repo = Repository::open(TEAM_REPO)?;
    let master_object = team_repo.revparse_single("origin/master")?;
    for_each_commit(&master_object, |commit| {
        let tree = commit.tree()?;
        let Some(teams_dir) = tree.get_name("teams") else {
            return Ok(());
        };
        let object = teams_dir.to_object(&team_repo)?;
        let mut nested_trees = vec![(String::new(), object.into_tree().unwrap())];
        let timestamp = DateTime::from_timestamp(commit.committer().when().seconds(), 0)
            .unwrap()
            .date_naive();
        while let Some((prefix, nested_tree)) = nested_trees.pop() {
            for entry in &nested_tree {
                let object = entry.to_object(&team_repo)?;
                match entry.kind() {
                    Some(ObjectType::Blob) => {}
                    Some(ObjectType::Tree) => {
                        assert_eq!(prefix, "");
                        let nested_prefix = entry.name().unwrap().to_owned();
                        nested_trees.push((nested_prefix, object.into_tree().unwrap()));
                        continue;
                    }
                    _ => unreachable!(),
                }
                let blob = object.into_blob().unwrap();
                let content = str::from_utf8(blob.content())?;
                let de: Toml = toml::from_str(content)
                    .with_context(|| format!("failed to parse {}", entry.name().unwrap()))?;
                let team_name = if prefix.is_empty() {
                    Cow::Borrowed(&de.name)
                } else {
                    Cow::Owned(format!("{}/{}", prefix, de.name))
                };
                let Some(team_name) = normalize_team(&team_name, timestamp) else {
                    continue;
                };
                for member in de.people.members.into_iter().chain(de.people.alumni) {
                    historical_membership
                        .entry(member)
                        .or_insert_with(Set::new)
                        .insert(team_name.to_owned());
                }
            }
        }
        if commit.id() == master_object.id() {
            present_membership = historical_membership.clone();
        }
        Ok(())
    })?;

    let old_website_repo = Repository::open(OLD_WEBSITE_REPO)?;
    let master_object = old_website_repo.revparse_single("origin/master")?;
    for_each_commit(&master_object, |commit| {
        let de: Yaml = if let Some(yaml) = read_path(&old_website_repo, commit, "_data/team.yml")? {
            serde_yaml::from_slice(&yaml)
        } else if let Some(content) = read_path(&old_website_repo, commit, "en-US/team.md")? {
            let content = str::from_utf8(&content)?;
            let (yaml, _markdown) = content.split_once("\n---").unwrap();
            serde_yaml::from_str(yaml)
        } else if let Some(content) = read_path(&old_website_repo, commit, "team.md")? {
            let content = str::from_utf8(&content)?;
            let (yaml, _markdown) = content.split_once("\n---").unwrap();
            serde_yaml::from_str(yaml)
        } else {
            return Ok(());
        }?;
        let timestamp = DateTime::from_timestamp(commit.committer().when().seconds(), 0)
            .unwrap()
            .date_naive();
        for team in de.teams {
            let Some(team_name) = normalize_team(&team.name, timestamp) else {
                continue;
            };
            for member in team.members {
                historical_membership
                    .entry(member)
                    .or_insert_with(Set::new)
                    .insert(team_name.to_owned());
            }
        }
        Ok(())
    })?;

    for (member, historical_teams) in &historical_membership {
        let uncredited = if let Some(present_teams) = present_membership.get(member) {
            historical_teams
                .difference(present_teams)
                .filter(|team| !eclipsed(team, present_teams))
                .collect()
        } else {
            Vec::from_iter(historical_teams)
        };
        if !uncredited.is_empty() {
            println!("{} {:?}", member, uncredited);
        }
    }

    Ok(())
}

fn normalize_team(name: &str, when: NaiveDate) -> Option<&str> {
    match name.strip_suffix(" team").unwrap_or(name) {
        "Cargo" => Some("cargo"),
        "Community" => Some("community"),
        "Compiler" => Some("compiler"),
        "Core" => Some("core"),
        "Crates.io" => Some("crates-io"),
        "Dev tools peers" => Some("archive/devtools-peers"),
        "Dev tools" | "Tools" => Some("devtools"),
        "Documentation peers" | "docs-peers" => Some("archive/docs-peers"),
        "Documentation" | "docs" => Some("archive/docs"),
        "IDEs and editors" | "ides" => Some("archive/ides"),
        "Infrastructure" | "Tooling and infrastructure" => Some("infra"),
        "Language team shepherds" => Some("archive/lang-shepherds"),
        "Language" | "Language design" => Some("lang"),
        "Library team shepherds" => Some("libs-contributors"),
        "Library" => Some("libs-api"),
        "Moderation" => Some("mods"),
        "Release" => Some("release"),
        "Rust team alumni" | "alumni" => None,
        "Rustdoc" => Some("rustdoc"),
        "Style" => Some("style"),
        "aarch64" => Some("arm"),
        "cloud-compute" => None,
        "core-observers" => Some("archive/core-observers"),
        "docsrs-ops" => Some("docs-rs"),
        "ecosystem" => Some("archive/ecosystem"),
        "infra-admins" => Some("archive/infra-admins"),
        "interim-leadership-chat" => Some("archive/interim-leadership-chat"),
        "lang-shepherds" => Some("archive/lang-shepherds"),
        "leads" | "wg-leads" => None,
        "libc" => Some("crate-maintainers"),
        "library-reviewers" => Some("libs-contributors"),
        "libs" if when < NaiveDate::from_ymd_opt(2021, 6, 13).unwrap() => Some("libs-api"),
        "mods-discord" => Some("archive/mods-discord"),
        "production" => Some("archive/production"),
        "project-foundation" => Some("archive/project-foundation"),
        "reference" => Some("archive/reference"),
        "rust-analyzer-contributors" | "wg-rls-2-triage" => Some("rust-analyzer"),
        "rust-by-example" => Some("wg-rust-by-example"),
        "security" | "wg-security" => Some("wg-security-response"),
        "test-tools" => Some("testing-devex"),
        "wg-async-await" | "wg-async-foundations" => Some("wg-async"),
        "wg-clippy" => Some("clippy"),
        "wg-codegen" => Some("archive/wg-codegen"),
        "wg-governance" => Some("archive/wg-governance"),
        "wg-learning" => Some("archive/wg-learning"),
        "wg-localization" => Some("community-localization"),
        "wg-meta" => Some("archive/wg-meta"),
        "wg-net" => Some("archive/wg-net"),
        "wg-net-async" => Some("wg-async"),
        "wg-net-embedded" => Some("wg-embedded"),
        "wg-net-web" => Some("archive/wg-net-web"),
        "wg-nll" | "wg-compiler-nll" => Some("archive/wg-nll"),
        "wg-parselib" => Some("archive/wg-parselib"),
        "wg-rls-2" | "wg-rls-2.0" => Some("rust-analyzer"),
        "wg-rustfix" => Some("archive/wg-rustfix"),
        "wg-rustfmt" => Some("rustfmt"),
        "wg-rustup" => Some("rustup"),
        "wg-traits" => Some("types"),
        "wg-unsafe-code-guidelines" => Some("opsem"),
        _ => Some(name),
    }
}

fn eclipsed(team: &str, present_teams: &Set<String>) -> bool {
    match team {
        "compiler-contributors" => present_teams.contains("compiler"),
        "libs-contributors" => present_teams.contains("libs") || present_teams.contains("libs-api"),
        _ => false,
    }
}

fn for_each_commit(head: &Object, mut f: impl FnMut(&Commit) -> Result<()>) -> Result<()> {
    let mut commit = Cow::Borrowed(head.as_commit().unwrap());
    loop {
        if let Err(err) = f(&commit) {
            eprintln!("at commit {}: {:#}", commit.id(), err);
        }
        if commit.parent_count() == 0 {
            return Ok(());
        }
        commit = Cow::Owned(commit.parent(0)?);
    }
}

fn read_path(
    repo: &Repository,
    commit: &Commit,
    path: impl AsRef<Path>,
) -> Result<Option<Vec<u8>>> {
    let tree = commit.tree()?;
    let Ok(tree_entry) = tree.get_path(path.as_ref()) else {
        return Ok(None);
    };
    let object = tree_entry.to_object(repo)?;
    let blob = object.into_blob().unwrap();
    Ok(Some(blob.content().to_owned()))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant