From 0217cf19b3ba5c974f14ac70fdbc722b705a591d Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 24 Jan 2024 20:37:00 -0300 Subject: [PATCH] Improve benchmark files (#528) * Add benchee_html reporter for Benchee * Prepare to receive more benchmark files * Add example of bench file * Improve benchmark files and add new ones --- .formatter.exs | 1 + .gitignore | 2 ++ benchs/finder.exs | 54 +++++++++++++++++++++++++++++++++++++++ benchs/parse_document.exs | 35 ++++++++++++++++++++++--- benchs/raw_html.exs | 51 ++++++++++++++++++++++++++++++++++++ benchs/tokenizers.exs | 33 ++++++++++++++++++++++-- mix.exs | 1 + mix.lock | 2 ++ 8 files changed, 174 insertions(+), 5 deletions(-) create mode 100644 benchs/finder.exs create mode 100644 benchs/raw_html.exs diff --git a/.formatter.exs b/.formatter.exs index 8632f811..6f926492 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -2,6 +2,7 @@ inputs: [ "lib/**/*.{ex,exs}", "test/**/*.{ex,exs}", + "benchs/*.exs", "mix.exs" ] ] diff --git a/.gitignore b/.gitignore index 093fef8f..9c526431 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ erl_crash.dump /priv/plts/*.plt /priv/plts/*.plt.hash /benchs/*.html +/benchs/assets +/benchs/results diff --git a/benchs/finder.exs b/benchs/finder.exs new file mode 100644 index 00000000..7299de63 --- /dev/null +++ b/benchs/finder.exs @@ -0,0 +1,54 @@ +tag = + case System.cmd("git", ["describe", "--tags"]) do + {reference, 0} -> + String.trim_trailing(reference) + + {error, _other} -> + IO.puts("cannot get human readable name from git: #{inspect(error)}") + "unknown" + end + +IO.puts("tag in use for this benchmark is: #{tag}") + +read_file = fn name -> + __ENV__.file + |> Path.dirname() + |> Path.join(name) + |> File.read!() + |> Floki.parse_document!() +end + +inputs = %{ + "big" => read_file.("big.html"), + "medium" => read_file.("medium.html"), + "small" => read_file.("small.html") +} + +Benchee.run( + %{ + "class" => fn doc -> Floki.find(doc, ".class-mw-redirect") end, + "class multiple" => fn doc -> Floki.find(doc, ".class-mw-redirect, .reference") end, + "tag name (type)" => fn doc -> Floki.find(doc, "a") end, + "id" => fn doc -> Floki.find(doc, "#cite_note-15") end + }, + time: 10, + inputs: inputs, + save: [path: "benchs/results/finder-#{tag}.benchee", tag: tag], + memory_time: 2 +) + +results = Path.wildcard("benchs/results/finder-*.benchee") + +if Enum.count(results) > 1 and function_exported?(Benchee, :report, 1) do + html_path = "benchs/results/finder.html" + + Benchee.report( + load: results, + formatters: [ + Benchee.Formatters.Console, + {Benchee.Formatters.HTML, file: html_path, auto_open: true} + ] + ) + + IO.puts("open the HTML version in: #{html_path}") +end diff --git a/benchs/parse_document.exs b/benchs/parse_document.exs index 542d483b..cd0e9c8a 100644 --- a/benchs/parse_document.exs +++ b/benchs/parse_document.exs @@ -1,3 +1,15 @@ +tag = + case System.cmd("git", ["describe", "--tags"]) do + {reference, 0} -> + String.trim_trailing(reference) + + {error, _other} -> + IO.puts("cannot get human readable name from git: #{inspect(error)}") + "unknown" + end + +IO.puts("tag in use for this benchmark is: #{tag}") + read_file = fn name -> __ENV__.file |> Path.dirname() @@ -15,15 +27,32 @@ Application.ensure_all_started(:fast_html) Benchee.run( %{ - "mochiweb" => fn input -> Floki.parse_document(input) end, + "mochiweb" => fn input -> Floki.parse_document!(input) end, "html5ever" => fn input -> - Floki.parse_document(input, html_parser: Floki.HTMLParser.Html5ever) + Floki.parse_document!(input, html_parser: Floki.HTMLParser.Html5ever) end, "fast_html" => fn input -> - Floki.parse_document(input, html_parser: Floki.HTMLParser.FastHtml) + Floki.parse_document!(input, html_parser: Floki.HTMLParser.FastHtml) end }, time: 10, inputs: inputs, + save: [path: "benchs/results/parse-document-#{tag}.benchee", tag: tag], memory_time: 2 ) + +results = Path.wildcard("benchs/results/parse-document-*.benchee") + +if Enum.count(results) > 1 and function_exported?(Benchee, :report, 1) do + html_path = "benchs/results/parse-document.html" + + Benchee.report( + load: results, + formatters: [ + Benchee.Formatters.Console, + {Benchee.Formatters.HTML, file: html_path, auto_open: true} + ] + ) + + IO.puts("open the HTML version in: #{html_path}") +end diff --git a/benchs/raw_html.exs b/benchs/raw_html.exs new file mode 100644 index 00000000..f7058335 --- /dev/null +++ b/benchs/raw_html.exs @@ -0,0 +1,51 @@ +tag = + case System.cmd("git", ["describe", "--tags"]) do + {reference, 0} -> + String.trim_trailing(reference) + + {error, _other} -> + IO.puts("cannot get human readable name from git: #{inspect(error)}") + "unknown" + end + +IO.puts("tag in use for this benchmark is: #{tag}") + +read_file = fn name -> + __ENV__.file + |> Path.dirname() + |> Path.join(name) + |> File.read!() + |> Floki.parse_document!() +end + +inputs = %{ + "big" => read_file.("big.html"), + "medium" => read_file.("medium.html"), + "small" => read_file.("small.html") +} + +Benchee.run( + %{ + "bench" => fn doc -> Floki.raw_html(doc, pretty: true) end + }, + time: 10, + inputs: inputs, + save: [path: "benchs/results/raw-html-#{tag}.benchee", tag: tag], + memory_time: 2 +) + +results = Path.wildcard("benchs/results/raw-html-*.benchee") + +if Enum.count(results) > 1 and function_exported?(Benchee, :report, 1) do + html_path = "benchs/results/raw-html.html" + + Benchee.report( + load: results, + formatters: [ + Benchee.Formatters.Console, + {Benchee.Formatters.HTML, file: html_path, auto_open: true} + ] + ) + + IO.puts("open the HTML version in: #{html_path}") +end diff --git a/benchs/tokenizers.exs b/benchs/tokenizers.exs index 7b492657..db1ed22f 100644 --- a/benchs/tokenizers.exs +++ b/benchs/tokenizers.exs @@ -10,6 +10,18 @@ # mix run benchs/tokenizers.exs # +tag = + case System.cmd("git", ["describe", "--tags"]) do + {reference, 0} -> + String.trim_trailing(reference) + + {error, _other} -> + IO.puts("cannot get human readable name from git: #{inspect(error)}") + "unknown" + end + +IO.puts("tag in use for this benchmark is: #{tag}") + read_file = fn name -> __ENV__.file |> Path.dirname() @@ -28,7 +40,24 @@ Benchee.run( "mochiweb" => fn input -> :floki_mochi_html.tokens(input) end, "floki" => fn input -> Floki.HTML.Tokenizer.tokenize(input) end }, - time: 20, + time: 10, inputs: inputs, - memory_time: 4 + save: [path: "benchs/results/tokenizers-#{tag}.benchee", tag: tag], + memory_time: 2 ) + +results = Path.wildcard("benchs/results/tokenizers-*.benchee") + +if Enum.count(results) > 1 and function_exported?(Benchee, :report, 1) do + html_path = "benchs/results/tokenizers.html" + + Benchee.report( + load: results, + formatters: [ + Benchee.Formatters.Console, + {Benchee.Formatters.HTML, file: html_path, auto_open: true} + ] + ) + + IO.puts("open the HTML version in: #{html_path}") +end diff --git a/mix.exs b/mix.exs index 5794b408..b942a298 100644 --- a/mix.exs +++ b/mix.exs @@ -47,6 +47,7 @@ defmodule Floki.Mixfile do {:earmark, "~> 1.2", only: :dev}, {:ex_doc, "~> 0.31.0", only: :dev, runtime: false}, {:benchee, "~> 1.3.0", only: :dev}, + {:benchee_html, "~> 1.0", only: :dev}, {:credo, ">= 0.0.0", only: [:dev, :test]}, {:dialyxir, "~> 1.0", only: [:dev], runtime: false}, {:html5ever, ">= 0.8.0", optional: true, only: [:dev, :test]}, diff --git a/mix.lock b/mix.lock index 6b7c67e2..93f5919d 100644 --- a/mix.lock +++ b/mix.lock @@ -1,5 +1,7 @@ %{ "benchee": {:hex, :benchee, "1.3.0", "f64e3b64ad3563fa9838146ddefb2d2f94cf5b473bdfd63f5ca4d0657bf96694", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:statistex, "~> 1.0", [hex: :statistex, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "34f4294068c11b2bd2ebf2c59aac9c7da26ffa0068afdf3419f1b176e16c5f81"}, + "benchee_html": {:hex, :benchee_html, "1.0.1", "1e247c0886c3fdb0d3f4b184b653a8d6fb96e4ad0d0389267fe4f36968772e24", [:mix], [{:benchee, ">= 0.99.0 and < 2.0.0", [hex: :benchee, repo: "hexpm", optional: false]}, {:benchee_json, "~> 1.0", [hex: :benchee_json, repo: "hexpm", optional: false]}], "hexpm", "b00a181af7152431901e08f3fc9f7197ed43ff50421a8347b0c80bf45d5b3fef"}, + "benchee_json": {:hex, :benchee_json, "1.0.0", "cc661f4454d5995c08fe10dd1f2f72f229c8f0fb1c96f6b327a8c8fc96a91fe5", [:mix], [{:benchee, ">= 0.99.0 and < 2.0.0", [hex: :benchee, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "da05d813f9123505f870344d68fb7c86a4f0f9074df7d7b7e2bb011a63ec231c"}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.4", "ff4d0fb2e6411c0479b1d965a814ea6d00e51eb2f58697446e9c41a97d940b28", [:mix], [], "hexpm", "9418c1b8144e11656f0be99943db4caf04612e3eaecefb5dae9a2a87565584f8"}, "credo": {:hex, :credo, "1.7.3", "05bb11eaf2f2b8db370ecaa6a6bda2ec49b2acd5e0418bc106b73b07128c0436", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "35ea675a094c934c22fb1dca3696f3c31f2728ae6ef5a53b5d648c11180a4535"},