From 71b1627aacb86be387a90174f530ea930f5f5e72 Mon Sep 17 00:00:00 2001
From: Tobias Bieniek <tobias@bieniek.cloud>
Date: Thu, 16 Sep 2021 03:03:46 +0200
Subject: [PATCH 1/2] CI: Ensure that tests from all workspace projects are run

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index de025eefc78..e8dbc319727 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -218,11 +218,11 @@ jobs:
 
       - name: Run tests (with coverage report)
         if: matrix.rust == 'stable'
-        run: cargo tarpaulin --avoid-cfg-tarpaulin
+        run: cargo tarpaulin --avoid-cfg-tarpaulin --workspace
 
       - name: Run tests
         if: matrix.rust != 'stable'
-        run: cargo test
+        run: cargo test --workspace
 
       - name: Prune unnecessary cache
         run: script/ci/prune-cache.sh

From f9871c2e5be24fa8a280f548fade046011fc688b Mon Sep 17 00:00:00 2001
From: Tobias Bieniek <tobias@bieniek.cloud>
Date: Thu, 16 Sep 2021 03:08:01 +0200
Subject: [PATCH 2/2] Extract `cio_markdown` subpackage

This allows us to isolate our markdown rendering code in a dedicated package, which could potentially have a positive effect on the compile times of the full project. It also means we can iterate on this part of the code independent from the other parts, and with a well-specified API interface.
---
 Cargo.lock                  |  14 +-
 Cargo.toml                  |   4 +-
 src/admin/render_readmes.rs |   2 +-
 src/markdown/Cargo.toml     |  18 ++
 src/markdown/lib.rs         | 559 ++++++++++++++++++++++++++++++++++++
 src/render.rs               | 558 +----------------------------------
 6 files changed, 591 insertions(+), 564 deletions(-)
 create mode 100644 src/markdown/Cargo.toml
 create mode 100644 src/markdown/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 3cda25be285..1193e9a619c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -243,14 +243,13 @@ checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
 name = "cargo-registry"
 version = "0.2.2"
 dependencies = [
- "ammonia",
  "anyhow",
  "base64",
  "cargo-registry-s3",
  "chrono",
+ "cio_markdown",
  "claim",
  "clap",
- "comrak",
  "conduit",
  "conduit-conditional-get",
  "conduit-cookie",
@@ -274,7 +273,6 @@ dependencies = [
  "git2",
  "handlebars",
  "hex",
- "htmlescape",
  "http",
  "hyper",
  "hyper-tls",
@@ -346,6 +344,16 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "cio_markdown"
+version = "0.0.0"
+dependencies = [
+ "ammonia",
+ "comrak",
+ "htmlescape",
+ "url",
+]
+
 [[package]]
 name = "cipher"
 version = "0.2.5"
diff --git a/Cargo.toml b/Cargo.toml
index 04cfa632163..0cc0359ead4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,13 +30,12 @@ rustdoc-args = [
 ]
 
 [dependencies]
-ammonia = "3.0.0"
 anyhow = "1.0"
 base64 = "0.13"
 cargo-registry-s3 = { path = "src/s3", version = "0.2.0" }
 chrono = { version = "0.4.0", features = ["serde"] }
+cio_markdown = { path = "src/markdown" }
 clap = "=3.0.0-beta.4"
-comrak = { version = "0.10.1", default-features = false }
 
 conduit = "0.9.0-alpha.5"
 conduit-conditional-get = "0.9.0-alpha.3"
@@ -61,7 +60,6 @@ futures-util = "0.3"
 git2 = "0.13.0"
 handlebars = "4.1.3"
 hex = "0.4"
-htmlescape = "0.3.1"
 http = "0.2"
 hyper = { version = "0.14", features = ["client", "http1"] }
 indexmap = { version = "1.0.2", features = ["serde-1"] }
diff --git a/src/admin/render_readmes.rs b/src/admin/render_readmes.rs
index 957fb87add9..1d89dcbcba8 100644
--- a/src/admin/render_readmes.rs
+++ b/src/admin/render_readmes.rs
@@ -1,13 +1,13 @@
 use crate::{
     config, db,
     models::Version,
-    render::readme_to_html,
     schema::{crates, readme_renderings, versions},
     uploaders::Uploader,
 };
 use std::{io::Read, path::Path, sync::Arc, thread};
 
 use chrono::{TimeZone, Utc};
+use cio_markdown::readme_to_html;
 use clap::Clap;
 use diesel::{dsl::any, prelude::*};
 use flate2::read::GzDecoder;
diff --git a/src/markdown/Cargo.toml b/src/markdown/Cargo.toml
new file mode 100644
index 00000000000..c5831846327
--- /dev/null
+++ b/src/markdown/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+
+name = "cio_markdown"
+version = "0.0.0"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-lang/crates.io"
+description = "crates.io markdown renderer"
+edition = "2018"
+resolver = "2"
+
+[lib]
+path = "lib.rs"
+
+[dependencies]
+ammonia = "3.1.2"
+comrak = { version = "0.10.1", default-features = false }
+htmlescape = "0.3.1"
+url = "2.2.2"
diff --git a/src/markdown/lib.rs b/src/markdown/lib.rs
new file mode 100644
index 00000000000..089e96347af
--- /dev/null
+++ b/src/markdown/lib.rs
@@ -0,0 +1,559 @@
+//! Render README files to HTML.
+
+use ammonia::{Builder, UrlRelative, UrlRelativeEvaluate};
+use comrak::nodes::{AstNode, NodeValue};
+use htmlescape::encode_minimal;
+use std::borrow::Cow;
+use std::path::Path;
+use url::Url;
+
+/// Context for markdown to HTML rendering.
+struct MarkdownRenderer<'a> {
+    html_sanitizer: Builder<'a>,
+}
+
+impl<'a> MarkdownRenderer<'a> {
+    /// Creates a new renderer instance.
+    ///
+    /// Per `readme_to_html`, `base_url` is the base URL prepended to any
+    /// relative links in the input document.  See that function for more detail.
+    fn new(base_url: Option<&'a str>, base_dir: &'a str) -> MarkdownRenderer<'a> {
+        let allowed_classes = hashmap(&[(
+            "code",
+            hashset(&[
+                "language-bash",
+                "language-clike",
+                "language-glsl",
+                "language-go",
+                "language-ini",
+                "language-javascript",
+                "language-json",
+                "language-markup",
+                "language-protobuf",
+                "language-ruby",
+                "language-rust",
+                "language-scss",
+                "language-sql",
+                "language-toml",
+                "language-yaml",
+            ]),
+        )]);
+        let sanitize_url = UrlRelative::Custom(Box::new(SanitizeUrl::new(base_url, base_dir)));
+
+        let mut html_sanitizer = Builder::default();
+        html_sanitizer
+            .add_tags(&["input"])
+            .link_rel(Some("nofollow noopener noreferrer"))
+            .add_generic_attributes(&["align"])
+            .add_tag_attributes("a", &["id", "target"])
+            .add_tag_attributes("input", &["checked", "disabled", "type"])
+            .allowed_classes(allowed_classes)
+            .url_relative(sanitize_url)
+            .id_prefix(Some("user-content-"));
+        MarkdownRenderer { html_sanitizer }
+    }
+
+    /// Renders the given markdown to HTML using the current settings.
+    fn to_html(&self, text: &str) -> String {
+        use comrak::{
+            format_html, parse_document, Arena, ComrakExtensionOptions, ComrakOptions,
+            ComrakRenderOptions,
+        };
+
+        let options = ComrakOptions {
+            render: ComrakRenderOptions {
+                unsafe_: true, // The output will be sanitized with `ammonia`
+                ..ComrakRenderOptions::default()
+            },
+            extension: ComrakExtensionOptions {
+                autolink: true,
+                strikethrough: true,
+                table: true,
+                tagfilter: true,
+                tasklist: true,
+                header_ids: Some("user-content-".to_string()),
+                ..ComrakExtensionOptions::default()
+            },
+            ..ComrakOptions::default()
+        };
+
+        let arena = Arena::new();
+        let root = parse_document(&arena, text, &options);
+
+        // Tweak annotations of code blocks.
+        iter_nodes(root, &|node| {
+            if let NodeValue::CodeBlock(ref mut ncb) = node.data.borrow_mut().value {
+                // If annot includes invalid UTF-8 char, do nothing.
+                if let Ok(mut orig_annot) = String::from_utf8(ncb.info.to_vec()) {
+                    // Ignore characters after a comma for syntax highlighting to work correctly.
+                    if let Some(offset) = orig_annot.find(',') {
+                        let _ = orig_annot.drain(offset..orig_annot.len());
+                        ncb.info = orig_annot.as_bytes().to_vec();
+                    }
+                }
+            }
+        });
+
+        let mut html = Vec::new();
+        format_html(root, &options, &mut html).unwrap();
+        let rendered = String::from_utf8(html).unwrap();
+        self.html_sanitizer.clean(&rendered).to_string()
+    }
+}
+
+/// Iterate the nodes in the CommonMark AST, used in comrak.
+fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
+where
+    F: Fn(&'a AstNode<'a>),
+{
+    f(node);
+    for c in node.children() {
+        iter_nodes(c, f);
+    }
+}
+
+/// Add trailing slash and remove `.git` suffix of base URL.
+fn canon_base_url(mut base_url: String) -> String {
+    if !base_url.ends_with('/') {
+        base_url.push('/');
+    }
+    if base_url.ends_with(".git/") {
+        let offset = base_url.len() - 5;
+        base_url.drain(offset..offset + 4);
+    }
+    base_url
+}
+
+/// Sanitize relative URLs in README files.
+struct SanitizeUrl {
+    base_url: Option<String>,
+    base_dir: String,
+}
+
+impl SanitizeUrl {
+    fn new(base_url: Option<&str>, base_dir: &str) -> Self {
+        let base_url = base_url
+            .and_then(|base_url| Url::parse(base_url).ok())
+            .and_then(|url| match url.host_str() {
+                Some("github.com") | Some("gitlab.com") | Some("bitbucket.org") => {
+                    Some(canon_base_url(url.into()))
+                }
+                _ => None,
+            });
+        Self {
+            base_url,
+            base_dir: base_dir.to_owned(),
+        }
+    }
+}
+
+/// Groups media-related URL info
+struct MediaUrl {
+    is_media: bool,
+    add_sanitize_query: bool,
+}
+
+/// Determine whether the given URL has a media file extension.
+/// Also check if `sanitize=true` must be added to the query string,
+/// which is required to load SVGs properly from GitHub.
+fn is_media_url(url: &str) -> MediaUrl {
+    Path::new(url)
+        .extension()
+        .and_then(std::ffi::OsStr::to_str)
+        .map_or(
+            MediaUrl {
+                is_media: false,
+                add_sanitize_query: false,
+            },
+            |e| match e {
+                "svg" => MediaUrl {
+                    is_media: true,
+                    add_sanitize_query: true,
+                },
+                "png" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" | "webp" => MediaUrl {
+                    is_media: true,
+                    add_sanitize_query: false,
+                },
+                _ => MediaUrl {
+                    is_media: false,
+                    add_sanitize_query: false,
+                },
+            },
+        )
+}
+
+impl UrlRelativeEvaluate for SanitizeUrl {
+    fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> {
+        if url.starts_with('#') {
+            // Always allow fragment URLs.
+            return Some(Cow::Borrowed(url));
+        }
+        self.base_url.as_ref().map(|base_url| {
+            let mut new_url = base_url.clone();
+            // Assumes GitHub’s URL scheme. GitHub renders text and markdown
+            // better in the "blob" view, but images need to be served raw.
+            let MediaUrl {
+                is_media,
+                add_sanitize_query,
+            } = is_media_url(url);
+            new_url += if is_media { "raw/HEAD" } else { "blob/HEAD" };
+            if !self.base_dir.is_empty() {
+                new_url += "/";
+                new_url += &self.base_dir;
+            }
+            if !url.starts_with('/') {
+                new_url.push('/');
+            }
+            new_url += url;
+            if add_sanitize_query {
+                if let Ok(mut parsed_url) = Url::parse(&new_url) {
+                    parsed_url.query_pairs_mut().append_pair("sanitize", "true");
+                    new_url = parsed_url.into();
+                }
+            }
+            Cow::Owned(new_url)
+        })
+    }
+}
+
+/// Renders Markdown text to sanitized HTML with a given `base_url`.
+/// See `readme_to_html` for the interpretation of `base_url`.
+fn markdown_to_html(text: &str, base_url: Option<&str>, base_dir: &str) -> String {
+    let renderer = MarkdownRenderer::new(base_url, base_dir);
+    renderer.to_html(text)
+}
+
+/// Any readme with a filename ending in one of these extensions will be rendered as Markdown.
+/// Note we also render a readme as Markdown if _no_ extension is on the filename.
+static MARKDOWN_EXTENSIONS: [&str; 7] =
+    ["md", "markdown", "mdown", "mdwn", "mkd", "mkdn", "mkdown"];
+
+/// Renders a readme to sanitized HTML.  An appropriate rendering method is chosen depending
+/// on the extension of the supplied `filename`.
+///
+/// The returned text will not contain any harmful HTML tag or attribute (such as iframe,
+/// onclick, onmouseover, etc.).
+///
+/// The `base_url` parameter will be used as the base for any relative links found in the
+/// Markdown, as long as its host part is github.com, gitlab.com, or bitbucket.org.  The
+/// supplied URL will be used as a directory base whether or not the relative link is
+/// prefixed with '/'.  If `None` is passed, relative links will be omitted.
+///
+/// # Examples
+///
+/// ```
+/// use cio_markdown::readme_to_html;
+///
+/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
+/// let rendered = readme_to_html(text, "README.md", None);
+/// ```
+pub fn readme_to_html(text: &str, readme_path: &str, base_url: Option<&str>) -> String {
+    let readme_path = Path::new(readme_path);
+    let readme_dir = readme_path.parent().and_then(|p| p.to_str()).unwrap_or("");
+
+    if readme_path.extension().is_none() {
+        return markdown_to_html(text, base_url, readme_dir);
+    }
+
+    if let Some(ext) = readme_path.extension().and_then(|ext| ext.to_str()) {
+        if MARKDOWN_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
+            return markdown_to_html(text, base_url, readme_dir);
+        }
+    }
+
+    encode_minimal(text).replace("\n", "<br>\n")
+}
+
+/// Helper function to build a new `HashSet` from the items slice.
+fn hashset<T>(items: &[T]) -> std::collections::HashSet<T>
+where
+    T: Clone + Eq + std::hash::Hash,
+{
+    items.iter().cloned().collect()
+}
+
+/// Helper function to build a new `HashMap` from a slice of key-value pairs.
+fn hashmap<K, V>(items: &[(K, V)]) -> std::collections::HashMap<K, V>
+where
+    K: Clone + Eq + std::hash::Hash,
+    V: Clone,
+{
+    items.iter().cloned().collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty_text() {
+        let text = "";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(result, "");
+    }
+
+    #[test]
+    fn text_with_script_tag() {
+        let text = "foo_readme\n\n<script>alert('Hello World')</script>";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<p>foo_readme</p>\n&lt;script&gt;alert(\'Hello World\')&lt;/script&gt;\n"
+        );
+    }
+
+    #[test]
+    fn text_with_iframe_tag() {
+        let text = "foo_readme\n\n<iframe>alert('Hello World')</iframe>";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<p>foo_readme</p>\n&lt;iframe&gt;alert(\'Hello World\')&lt;/iframe&gt;\n"
+        );
+    }
+
+    #[test]
+    fn text_with_unknown_tag() {
+        let text = "foo_readme\n\n<unknown>alert('Hello World')</unknown>";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(result, "<p>foo_readme</p>\n<p>alert(\'Hello World\')</p>\n");
+    }
+
+    #[test]
+    fn text_with_inline_javascript() {
+        let text = r#"foo_readme\n\n<a href="https://crates.io/crates/cargo-registry" onclick="window.alert('Got you')">Crate page</a>"#;
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<p>foo_readme\\n\\n<a href=\"https://crates.io/crates/cargo-registry\" rel=\"nofollow noopener noreferrer\">Crate page</a></p>\n"
+        );
+    }
+
+    // See https://github.com/kivikakk/comrak/issues/37. This panic happened
+    // in comrak 0.1.8 but was fixed in 0.1.9.
+    #[test]
+    fn text_with_fancy_single_quotes() {
+        let text = "wb’";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(result, "<p>wb’</p>\n");
+    }
+
+    #[test]
+    fn code_block_with_syntax_highlighting() {
+        let code_block = r#"```rust \
+                            println!("Hello World"); \
+                           ```"#;
+        let result = markdown_to_html(code_block, None, "");
+        assert!(result.contains("<code class=\"language-rust\">"));
+    }
+
+    #[test]
+    fn code_block_with_syntax_highlighting_even_if_annot_has_no_run() {
+        let code_block = r#"```rust  ,  no_run \
+                            println!("Hello World"); \
+                           ```"#;
+        let result = markdown_to_html(code_block, None, "");
+        assert!(result.contains("<code class=\"language-rust\">"));
+    }
+
+    #[test]
+    fn text_with_forbidden_class_attribute() {
+        let text = "<p class='bad-class'>Hello World!</p>";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(result, "<p>Hello World!</p>\n");
+    }
+
+    #[test]
+    fn relative_links() {
+        let absolute = "[hi](/hi)";
+        let relative = "[there](there)";
+        let image = "![alt](img.png)";
+        let html_image = "<img src=\"img.png\" alt=\"alt\">";
+        let svg = "![alt](sanitize.svg)";
+
+        for host in &["github.com", "gitlab.com", "bitbucket.org"] {
+            for (&extra_slash, &dot_git) in [true, false].iter().zip(&[true, false]) {
+                let url = format!(
+                    "https://{}/rust-lang/test{}{}",
+                    host,
+                    if dot_git { ".git" } else { "" },
+                    if extra_slash { "/" } else { "" },
+                );
+
+                let result = markdown_to_html(absolute, Some(&url), "");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<p><a href=\"https://{}/rust-lang/test/blob/HEAD/hi\" rel=\"nofollow noopener noreferrer\">hi</a></p>\n",
+                        host
+                    )
+                );
+
+                let result = markdown_to_html(relative, Some(&url), "");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<p><a href=\"https://{}/rust-lang/test/blob/HEAD/there\" rel=\"nofollow noopener noreferrer\">there</a></p>\n",
+                        host
+                    )
+                );
+
+                let result = markdown_to_html(image, Some(&url), "");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/img.png\" alt=\"alt\"></p>\n",
+                        host
+                    )
+                );
+
+                let result = markdown_to_html(html_image, Some(&url), "");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<img src=\"https://{}/rust-lang/test/raw/HEAD/img.png\" alt=\"alt\">\n",
+                        host
+                    )
+                );
+
+                let result = markdown_to_html(svg, Some(&url), "");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/sanitize.svg?sanitize=true\" alt=\"alt\"></p>\n",
+                        host
+                    )
+                );
+
+                let result = markdown_to_html(svg, Some(&url), "subdir");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/subdir/sanitize.svg?sanitize=true\" alt=\"alt\"></p>\n",
+                        host
+                    )
+                );
+
+                let result = markdown_to_html(svg, Some(&url), "subdir1/subdir2");
+                assert_eq!(
+                    result,
+                    format!(
+                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/subdir1/subdir2/sanitize.svg?sanitize=true\" alt=\"alt\"></p>\n",
+                        host
+                    )
+                );
+            }
+        }
+
+        let result = markdown_to_html(absolute, Some("https://google.com/"), "");
+        assert_eq!(
+            result,
+            "<p><a rel=\"nofollow noopener noreferrer\">hi</a></p>\n"
+        );
+    }
+
+    #[test]
+    fn absolute_links_dont_get_resolved() {
+        let readme_text =
+            "[![Crates.io](https://img.shields.io/crates/v/clap.svg)](https://crates.io/crates/clap)";
+        let repository = "https://github.com/kbknapp/clap-rs/";
+        let result = markdown_to_html(readme_text, Some(repository), "");
+
+        assert_eq!(
+            result,
+            "<p><a href=\"https://crates.io/crates/clap\" rel=\"nofollow noopener noreferrer\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"Crates.io\"></a></p>\n"
+        );
+    }
+
+    #[test]
+    fn readme_to_html_renders_markdown() {
+        for f in &[
+            "README",
+            "readme.md",
+            "README.MARKDOWN",
+            "whatever.mkd",
+            "s/readme.md",
+            "s1/s2/readme.md",
+        ] {
+            assert_eq!(
+                readme_to_html("*lobster*", f, None),
+                "<p><em>lobster</em></p>\n"
+            );
+        }
+
+        assert_eq!(
+            readme_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test")),
+            "<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
+        );
+        assert_eq!(
+            readme_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test")),
+            "<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
+        );
+        assert_eq!(
+            readme_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test")),
+            "<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
+        );
+    }
+
+    #[test]
+    fn readme_to_html_renders_other_things() {
+        for f in &["readme.exe", "readem.org", "blah.adoc"] {
+            assert_eq!(
+                readme_to_html("<script>lobster</script>\n\nis my friend\n", f, None),
+                "&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
+            );
+        }
+    }
+
+    #[test]
+    fn header_has_tags() {
+        let text = "# My crate\n\nHello, world!\n";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
+        );
+    }
+
+    #[test]
+    fn manual_anchor_is_sanitized() {
+        let text =
+            "<h1><a href=\"#my-crate\" id=\"my-crate\"></a>My crate</h1>\n<p>Hello, world!</p>\n";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
+        );
+    }
+
+    #[test]
+    fn tables_with_rowspan_and_colspan() {
+        let text = "<table><tr><th rowspan=\"1\" colspan=\"2\">Target</th></tr></table>\n";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<table><tbody><tr><th rowspan=\"1\" colspan=\"2\">Target</th></tr></tbody></table>\n"
+        );
+    }
+
+    #[test]
+    fn text_alignment() {
+        let text = "<h1 align=\"center\">foo-bar</h1>\n<h5 align=\"center\">Hello World!</h5>\n";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<h1 align=\"center\">foo-bar</h1>\n<h5 align=\"center\">Hello World!</h5>\n"
+        );
+    }
+
+    #[test]
+    fn image_alignment() {
+        let text =
+            "<p align=\"center\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"\"></p>\n";
+        let result = markdown_to_html(text, None, "");
+        assert_eq!(
+            result,
+            "<p align=\"center\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"\"></p>\n"
+        );
+    }
+}
diff --git a/src/render.rs b/src/render.rs
index f5383605cbe..3ba19b52321 100644
--- a/src/render.rs
+++ b/src/render.rs
@@ -1,273 +1,11 @@
 //! Render README files to HTML.
 
-use ammonia::{Builder, UrlRelative, UrlRelativeEvaluate};
-use comrak::nodes::{AstNode, NodeValue};
-use htmlescape::encode_minimal;
-use std::borrow::Cow;
-use std::path::Path;
+use cio_markdown::readme_to_html;
 use swirl::PerformError;
-use url::Url;
 
 use crate::background_jobs::Environment;
 use crate::models::Version;
 
-/// Context for markdown to HTML rendering.
-struct MarkdownRenderer<'a> {
-    html_sanitizer: Builder<'a>,
-}
-
-impl<'a> MarkdownRenderer<'a> {
-    /// Creates a new renderer instance.
-    ///
-    /// Per `readme_to_html`, `base_url` is the base URL prepended to any
-    /// relative links in the input document.  See that function for more detail.
-    fn new(base_url: Option<&'a str>, base_dir: &'a str) -> MarkdownRenderer<'a> {
-        let allowed_classes = hashmap(&[(
-            "code",
-            hashset(&[
-                "language-bash",
-                "language-clike",
-                "language-glsl",
-                "language-go",
-                "language-ini",
-                "language-javascript",
-                "language-json",
-                "language-markup",
-                "language-protobuf",
-                "language-ruby",
-                "language-rust",
-                "language-scss",
-                "language-sql",
-                "language-toml",
-                "language-yaml",
-            ]),
-        )]);
-        let sanitize_url = UrlRelative::Custom(Box::new(SanitizeUrl::new(base_url, base_dir)));
-
-        let mut html_sanitizer = Builder::default();
-        html_sanitizer
-            .add_tags(&["input"])
-            .link_rel(Some("nofollow noopener noreferrer"))
-            .add_generic_attributes(&["align"])
-            .add_tag_attributes("a", &["id", "target"])
-            .add_tag_attributes("input", &["checked", "disabled", "type"])
-            .allowed_classes(allowed_classes)
-            .url_relative(sanitize_url)
-            .id_prefix(Some("user-content-"));
-        MarkdownRenderer { html_sanitizer }
-    }
-
-    /// Renders the given markdown to HTML using the current settings.
-    fn to_html(&self, text: &str) -> String {
-        use comrak::{
-            format_html, parse_document, Arena, ComrakExtensionOptions, ComrakOptions,
-            ComrakRenderOptions,
-        };
-
-        let options = ComrakOptions {
-            render: ComrakRenderOptions {
-                unsafe_: true, // The output will be sanitized with `ammonia`
-                ..ComrakRenderOptions::default()
-            },
-            extension: ComrakExtensionOptions {
-                autolink: true,
-                strikethrough: true,
-                table: true,
-                tagfilter: true,
-                tasklist: true,
-                header_ids: Some("user-content-".to_string()),
-                ..ComrakExtensionOptions::default()
-            },
-            ..ComrakOptions::default()
-        };
-
-        let arena = Arena::new();
-        let root = parse_document(&arena, text, &options);
-
-        // Tweak annotations of code blocks.
-        iter_nodes(root, &|node| {
-            if let NodeValue::CodeBlock(ref mut ncb) = node.data.borrow_mut().value {
-                // If annot includes invalid UTF-8 char, do nothing.
-                if let Ok(mut orig_annot) = String::from_utf8(ncb.info.to_vec()) {
-                    // Ignore characters after a comma for syntax highlighting to work correctly.
-                    if let Some(offset) = orig_annot.find(',') {
-                        let _ = orig_annot.drain(offset..orig_annot.len());
-                        ncb.info = orig_annot.as_bytes().to_vec();
-                    }
-                }
-            }
-        });
-
-        let mut html = Vec::new();
-        format_html(root, &options, &mut html).unwrap();
-        let rendered = String::from_utf8(html).unwrap();
-        self.html_sanitizer.clean(&rendered).to_string()
-    }
-}
-
-/// Iterate the nodes in the CommonMark AST, used in comrak.
-fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
-where
-    F: Fn(&'a AstNode<'a>),
-{
-    f(node);
-    for c in node.children() {
-        iter_nodes(c, f);
-    }
-}
-
-/// Add trailing slash and remove `.git` suffix of base URL.
-fn canon_base_url(mut base_url: String) -> String {
-    if !base_url.ends_with('/') {
-        base_url.push('/');
-    }
-    if base_url.ends_with(".git/") {
-        let offset = base_url.len() - 5;
-        base_url.drain(offset..offset + 4);
-    }
-    base_url
-}
-
-/// Sanitize relative URLs in README files.
-struct SanitizeUrl {
-    base_url: Option<String>,
-    base_dir: String,
-}
-
-impl SanitizeUrl {
-    fn new(base_url: Option<&str>, base_dir: &str) -> Self {
-        let base_url = base_url
-            .and_then(|base_url| Url::parse(base_url).ok())
-            .and_then(|url| match url.host_str() {
-                Some("github.com") | Some("gitlab.com") | Some("bitbucket.org") => {
-                    Some(canon_base_url(url.into()))
-                }
-                _ => None,
-            });
-        Self {
-            base_url,
-            base_dir: base_dir.to_owned(),
-        }
-    }
-}
-
-/// Groups media-related URL info
-struct MediaUrl {
-    is_media: bool,
-    add_sanitize_query: bool,
-}
-
-/// Determine whether the given URL has a media file extension.
-/// Also check if `sanitize=true` must be added to the query string,
-/// which is required to load SVGs properly from GitHub.
-fn is_media_url(url: &str) -> MediaUrl {
-    Path::new(url)
-        .extension()
-        .and_then(std::ffi::OsStr::to_str)
-        .map_or(
-            MediaUrl {
-                is_media: false,
-                add_sanitize_query: false,
-            },
-            |e| match e {
-                "svg" => MediaUrl {
-                    is_media: true,
-                    add_sanitize_query: true,
-                },
-                "png" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" | "webp" => MediaUrl {
-                    is_media: true,
-                    add_sanitize_query: false,
-                },
-                _ => MediaUrl {
-                    is_media: false,
-                    add_sanitize_query: false,
-                },
-            },
-        )
-}
-
-impl UrlRelativeEvaluate for SanitizeUrl {
-    fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> {
-        if url.starts_with('#') {
-            // Always allow fragment URLs.
-            return Some(Cow::Borrowed(url));
-        }
-        self.base_url.as_ref().map(|base_url| {
-            let mut new_url = base_url.clone();
-            // Assumes GitHub’s URL scheme. GitHub renders text and markdown
-            // better in the "blob" view, but images need to be served raw.
-            let MediaUrl {
-                is_media,
-                add_sanitize_query,
-            } = is_media_url(url);
-            new_url += if is_media { "raw/HEAD" } else { "blob/HEAD" };
-            if !self.base_dir.is_empty() {
-                new_url += "/";
-                new_url += &self.base_dir;
-            }
-            if !url.starts_with('/') {
-                new_url.push('/');
-            }
-            new_url += url;
-            if add_sanitize_query {
-                if let Ok(mut parsed_url) = Url::parse(&new_url) {
-                    parsed_url.query_pairs_mut().append_pair("sanitize", "true");
-                    new_url = parsed_url.into();
-                }
-            }
-            Cow::Owned(new_url)
-        })
-    }
-}
-
-/// Renders Markdown text to sanitized HTML with a given `base_url`.
-/// See `readme_to_html` for the interpretation of `base_url`.
-fn markdown_to_html(text: &str, base_url: Option<&str>, base_dir: &str) -> String {
-    let renderer = MarkdownRenderer::new(base_url, base_dir);
-    renderer.to_html(text)
-}
-
-/// Any readme with a filename ending in one of these extensions will be rendered as Markdown.
-/// Note we also render a readme as Markdown if _no_ extension is on the filename.
-static MARKDOWN_EXTENSIONS: [&str; 7] =
-    ["md", "markdown", "mdown", "mdwn", "mkd", "mkdn", "mkdown"];
-
-/// Renders a readme to sanitized HTML.  An appropriate rendering method is chosen depending
-/// on the extension of the supplied `filename`.
-///
-/// The returned text will not contain any harmful HTML tag or attribute (such as iframe,
-/// onclick, onmouseover, etc.).
-///
-/// The `base_url` parameter will be used as the base for any relative links found in the
-/// Markdown, as long as its host part is github.com, gitlab.com, or bitbucket.org.  The
-/// supplied URL will be used as a directory base whether or not the relative link is
-/// prefixed with '/'.  If `None` is passed, relative links will be omitted.
-///
-/// # Examples
-///
-/// ```
-/// use render::render_to_html;
-///
-/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
-/// let rendered = readme_to_html(text, "README.md", None)?;
-/// ```
-pub fn readme_to_html(text: &str, readme_path: &str, base_url: Option<&str>) -> String {
-    let readme_path = Path::new(readme_path);
-    let readme_dir = readme_path.parent().and_then(|p| p.to_str()).unwrap_or("");
-
-    if readme_path.extension().is_none() {
-        return markdown_to_html(text, base_url, readme_dir);
-    }
-
-    if let Some(ext) = readme_path.extension().and_then(|ext| ext.to_str()) {
-        if MARKDOWN_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
-            return markdown_to_html(text, base_url, readme_dir);
-        }
-    }
-
-    encode_minimal(text).replace("\n", "<br>\n")
-}
-
 #[swirl::background_job]
 pub fn render_and_upload_readme(
     conn: &PgConnection,
@@ -294,297 +32,3 @@ pub fn render_and_upload_readme(
         Ok(())
     })
 }
-
-/// Helper function to build a new `HashSet` from the items slice.
-fn hashset<T>(items: &[T]) -> std::collections::HashSet<T>
-where
-    T: Clone + Eq + std::hash::Hash,
-{
-    items.iter().cloned().collect()
-}
-
-/// Helper function to build a new `HashMap` from a slice of key-value pairs.
-fn hashmap<K, V>(items: &[(K, V)]) -> std::collections::HashMap<K, V>
-where
-    K: Clone + Eq + std::hash::Hash,
-    V: Clone,
-{
-    items.iter().cloned().collect()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn empty_text() {
-        let text = "";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(result, "");
-    }
-
-    #[test]
-    fn text_with_script_tag() {
-        let text = "foo_readme\n\n<script>alert('Hello World')</script>";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<p>foo_readme</p>\n&lt;script&gt;alert(\'Hello World\')&lt;/script&gt;\n"
-        );
-    }
-
-    #[test]
-    fn text_with_iframe_tag() {
-        let text = "foo_readme\n\n<iframe>alert('Hello World')</iframe>";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<p>foo_readme</p>\n&lt;iframe&gt;alert(\'Hello World\')&lt;/iframe&gt;\n"
-        );
-    }
-
-    #[test]
-    fn text_with_unknown_tag() {
-        let text = "foo_readme\n\n<unknown>alert('Hello World')</unknown>";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(result, "<p>foo_readme</p>\n<p>alert(\'Hello World\')</p>\n");
-    }
-
-    #[test]
-    fn text_with_inline_javascript() {
-        let text = r#"foo_readme\n\n<a href="https://crates.io/crates/cargo-registry" onclick="window.alert('Got you')">Crate page</a>"#;
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<p>foo_readme\\n\\n<a href=\"https://crates.io/crates/cargo-registry\" rel=\"nofollow noopener noreferrer\">Crate page</a></p>\n"
-        );
-    }
-
-    // See https://github.com/kivikakk/comrak/issues/37. This panic happened
-    // in comrak 0.1.8 but was fixed in 0.1.9.
-    #[test]
-    fn text_with_fancy_single_quotes() {
-        let text = "wb’";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(result, "<p>wb’</p>\n");
-    }
-
-    #[test]
-    fn code_block_with_syntax_highlighting() {
-        let code_block = r#"```rust \
-                            println!("Hello World"); \
-                           ```"#;
-        let result = markdown_to_html(code_block, None, "");
-        assert!(result.contains("<code class=\"language-rust\">"));
-    }
-
-    #[test]
-    fn code_block_with_syntax_highlighting_even_if_annot_has_no_run() {
-        let code_block = r#"```rust  ,  no_run \
-                            println!("Hello World"); \
-                           ```"#;
-        let result = markdown_to_html(code_block, None, "");
-        assert!(result.contains("<code class=\"language-rust\">"));
-    }
-
-    #[test]
-    fn text_with_forbidden_class_attribute() {
-        let text = "<p class='bad-class'>Hello World!</p>";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(result, "<p>Hello World!</p>\n");
-    }
-
-    #[test]
-    fn relative_links() {
-        let absolute = "[hi](/hi)";
-        let relative = "[there](there)";
-        let image = "![alt](img.png)";
-        let html_image = "<img src=\"img.png\" alt=\"alt\">";
-        let svg = "![alt](sanitize.svg)";
-
-        for host in &["github.com", "gitlab.com", "bitbucket.org"] {
-            for (&extra_slash, &dot_git) in [true, false].iter().zip(&[true, false]) {
-                let url = format!(
-                    "https://{}/rust-lang/test{}{}",
-                    host,
-                    if dot_git { ".git" } else { "" },
-                    if extra_slash { "/" } else { "" },
-                );
-
-                let result = markdown_to_html(absolute, Some(&url), "");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<p><a href=\"https://{}/rust-lang/test/blob/HEAD/hi\" rel=\"nofollow noopener noreferrer\">hi</a></p>\n",
-                        host
-                    )
-                );
-
-                let result = markdown_to_html(relative, Some(&url), "");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<p><a href=\"https://{}/rust-lang/test/blob/HEAD/there\" rel=\"nofollow noopener noreferrer\">there</a></p>\n",
-                        host
-                    )
-                );
-
-                let result = markdown_to_html(image, Some(&url), "");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/img.png\" alt=\"alt\"></p>\n",
-                        host
-                    )
-                );
-
-                let result = markdown_to_html(html_image, Some(&url), "");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<img src=\"https://{}/rust-lang/test/raw/HEAD/img.png\" alt=\"alt\">\n",
-                        host
-                    )
-                );
-
-                let result = markdown_to_html(svg, Some(&url), "");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/sanitize.svg?sanitize=true\" alt=\"alt\"></p>\n",
-                        host
-                    )
-                );
-
-                let result = markdown_to_html(svg, Some(&url), "subdir");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/subdir/sanitize.svg?sanitize=true\" alt=\"alt\"></p>\n",
-                        host
-                    )
-                );
-
-                let result = markdown_to_html(svg, Some(&url), "subdir1/subdir2");
-                assert_eq!(
-                    result,
-                    format!(
-                        "<p><img src=\"https://{}/rust-lang/test/raw/HEAD/subdir1/subdir2/sanitize.svg?sanitize=true\" alt=\"alt\"></p>\n",
-                        host
-                    )
-                );
-            }
-        }
-
-        let result = markdown_to_html(absolute, Some("https://google.com/"), "");
-        assert_eq!(
-            result,
-            "<p><a rel=\"nofollow noopener noreferrer\">hi</a></p>\n"
-        );
-    }
-
-    #[test]
-    fn absolute_links_dont_get_resolved() {
-        let readme_text =
-            "[![Crates.io](https://img.shields.io/crates/v/clap.svg)](https://crates.io/crates/clap)";
-        let repository = "https://github.com/kbknapp/clap-rs/";
-        let result = markdown_to_html(readme_text, Some(repository), "");
-
-        assert_eq!(
-            result,
-            "<p><a href=\"https://crates.io/crates/clap\" rel=\"nofollow noopener noreferrer\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"Crates.io\"></a></p>\n"
-        );
-    }
-
-    #[test]
-    fn readme_to_html_renders_markdown() {
-        for f in &[
-            "README",
-            "readme.md",
-            "README.MARKDOWN",
-            "whatever.mkd",
-            "s/readme.md",
-            "s1/s2/readme.md",
-        ] {
-            assert_eq!(
-                readme_to_html("*lobster*", f, None),
-                "<p><em>lobster</em></p>\n"
-            );
-        }
-
-        assert_eq!(
-            readme_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test")),
-            "<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
-        );
-        assert_eq!(
-            readme_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test")),
-            "<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
-        );
-        assert_eq!(
-            readme_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test")),
-            "<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
-        );
-    }
-
-    #[test]
-    fn readme_to_html_renders_other_things() {
-        for f in &["readme.exe", "readem.org", "blah.adoc"] {
-            assert_eq!(
-                readme_to_html("<script>lobster</script>\n\nis my friend\n", f, None),
-                "&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
-            );
-        }
-    }
-
-    #[test]
-    fn header_has_tags() {
-        let text = "# My crate\n\nHello, world!\n";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
-        );
-    }
-
-    #[test]
-    fn manual_anchor_is_sanitized() {
-        let text =
-            "<h1><a href=\"#my-crate\" id=\"my-crate\"></a>My crate</h1>\n<p>Hello, world!</p>\n";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
-        );
-    }
-
-    #[test]
-    fn tables_with_rowspan_and_colspan() {
-        let text = "<table><tr><th rowspan=\"1\" colspan=\"2\">Target</th></tr></table>\n";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<table><tbody><tr><th rowspan=\"1\" colspan=\"2\">Target</th></tr></tbody></table>\n"
-        );
-    }
-
-    #[test]
-    fn text_alignment() {
-        let text = "<h1 align=\"center\">foo-bar</h1>\n<h5 align=\"center\">Hello World!</h5>\n";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<h1 align=\"center\">foo-bar</h1>\n<h5 align=\"center\">Hello World!</h5>\n"
-        );
-    }
-
-    #[test]
-    fn image_alignment() {
-        let text =
-            "<p align=\"center\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"\"></p>\n";
-        let result = markdown_to_html(text, None, "");
-        assert_eq!(
-            result,
-            "<p align=\"center\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"\"></p>\n"
-        );
-    }
-}