Skip to content

Commit

Permalink
Speed up the lexer for Ruby 3.4+ (#1832)
Browse files Browse the repository at this point in the history
* Speed up lexing

* Bump msrv to 3.0 (from 2.7)

* Normalize test for ruby-head compat

* Fix bug when parsing negative numbers
  • Loading branch information
ianks authored Oct 23, 2024
1 parent b233b3d commit b355378
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 82 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/liquid.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:
strategy:
matrix:
entry:
- { ruby: 2.7, allowed-failure: false } # minimum supported
- { ruby: 3.0, allowed-failure: false } # minimum supported
- { ruby: 3.2, allowed-failure: false }
- { ruby: 3.3, allowed-failure: false } # latest
- { ruby: ruby-head, allowed-failure: true }
- { ruby: ruby-head, allowed-failure: false }
name: Test Ruby ${{ matrix.entry.ruby }}
steps:
- uses: actions/checkout@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ pkg
.rvmrc
.bundle
.byebug_history
Gemfile.lock
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.3.0
3.3.4
75 changes: 0 additions & 75 deletions Gemfile.lock

This file was deleted.

8 changes: 8 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ namespace :benchmark do
task :strict do
ruby "./performance/benchmark.rb strict"
end

desc "Run unit benchmarks"
task :unit do
Dir["./performance/unit/*_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end
end

namespace :profile do
Expand Down
156 changes: 155 additions & 1 deletion lib/liquid/lexer.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# frozen_string_literal: true

require "strscan"

module Liquid
class Lexer
class Lexer1
SPECIALS = {
'|' => :pipe,
'.' => :dot,
Expand Down Expand Up @@ -58,4 +59,157 @@ def tokenize
@output << [:end_of_string]
end
end

class Lexer2
CLOSE_ROUND = [:close_round, ")"].freeze
CLOSE_SQUARE = [:close_square, "]"].freeze
COLON = [:colon, ":"].freeze
COMMA = [:comma, ","].freeze
COMPARISION_NOT_EQUAL = [:comparison, "!="].freeze
COMPARISON_CONTAINS = [:comparison, "contains"].freeze
COMPARISON_EQUAL = [:comparison, "=="].freeze
COMPARISON_GREATER_THAN = [:comparison, ">"].freeze
COMPARISON_GREATER_THAN_OR_EQUAL = [:comparison, ">="].freeze
COMPARISON_LESS_THAN = [:comparison, "<"].freeze
COMPARISON_LESS_THAN_OR_EQUAL = [:comparison, "<="].freeze
COMPARISON_NOT_EQUAL_ALT = [:comparison, "<>"].freeze
CONTAINS = /contains(?=\s)/
DASH = [:dash, "-"].freeze
DOT = [:dot, "."].freeze
DOTDOT = [:dotdot, ".."].freeze
DOT_ORD = ".".ord
DOUBLE_STRING_LITERAL = /"[^\"]*"/
EOS = [:end_of_string].freeze
IDENTIFIER = /[a-zA-Z_][\w-]*\??/
NUMBER_LITERAL = /-?\d+(\.\d+)?/
OPEN_ROUND = [:open_round, "("].freeze
OPEN_SQUARE = [:open_square, "["].freeze
PIPE = [:pipe, "|"].freeze
QUESTION = [:question, "?"].freeze
RUBY_WHITESPACE = [" ", "\t", "\r", "\n", "\f"].freeze
SINGLE_STRING_LITERAL = /'[^\']*'/
WHITESPACE_OR_NOTHING = /\s*/

COMPARISON_JUMP_TABLE = [].tap do |table|
table["=".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_EQUAL
sub_table.freeze
end
table["!".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISION_NOT_EQUAL
sub_table.freeze
end
table["<".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL
sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT
RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_LESS_THAN }
sub_table.freeze
end
table[">".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL
RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_GREATER_THAN }
sub_table.freeze
end
table.freeze
end

NEXT_MATCHER_JUMP_TABLE = [].tap do |table|
"a".upto("z") do |c|
table[c.ord] = [:id, IDENTIFIER].freeze
table[c.upcase.ord] = [:id, IDENTIFIER].freeze
end
table["_".ord] = [:id, IDENTIFIER].freeze

"0".upto("9") do |c|
table[c.ord] = [:number, NUMBER_LITERAL].freeze
end
table["-".ord] = [:number, NUMBER_LITERAL].freeze

table["'".ord] = [:string, SINGLE_STRING_LITERAL].freeze
table["\"".ord] = [:string, DOUBLE_STRING_LITERAL].freeze
table.freeze
end

SPECIAL_TABLE = [].tap do |table|
table["|".ord] = PIPE
table[".".ord] = DOT
table[":".ord] = COLON
table[",".ord] = COMMA
table["[".ord] = OPEN_SQUARE
table["]".ord] = CLOSE_SQUARE
table["(".ord] = OPEN_ROUND
table[")".ord] = CLOSE_ROUND
table["?".ord] = QUESTION
table["-".ord] = DASH
end

NUMBER_TABLE = [].tap do |table|
"0".upto("9") do |c|
table[c.ord] = true
end
table.freeze
end

def initialize(input)
@ss = StringScanner.new(input)
end

# rubocop:disable Metrics/BlockNesting
def tokenize
@output = []

until @ss.eos?
@ss.skip(WHITESPACE_OR_NOTHING)

break if @ss.eos?

peeked = @ss.peek_byte

if (special = SPECIAL_TABLE[peeked])
@ss.scan_byte
# Special case for ".."
if special == DOT && @ss.peek_byte == DOT_ORD
@ss.scan_byte
@output << DOTDOT
elsif special == DASH
# Special case for negative numbers
if NUMBER_TABLE[@ss.peek_byte]
@ss.pos -= 1
@output << [:number, @ss.scan(NUMBER_LITERAL)]
else
@output << special
end
else
@output << special
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
@ss.scan_byte
if (found = sub_table[@ss.peek_byte])
@output << found
@ss.scan_byte
else
raise SyntaxError, "Unexpected character #{peeked.chr}"
end
else
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]

if type && (t = @ss.scan(pattern))
# Special case for "contains"
@output << if type == :id && t == "contains"
COMPARISON_CONTAINS
else
[type, t]
end
else
raise SyntaxError, "Unexpected character #{peeked.chr}"
end
end
end
# rubocop:enable Metrics/BlockNesting

@output << EOS
end
end

Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1
end
2 changes: 1 addition & 1 deletion lib/liquid/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def expression
str = consume
str << variable_lookups
when :open_square
str = consume
str = consume.dup
str << expression
str << consume(:close_square)
str << variable_lookups
Expand Down
5 changes: 4 additions & 1 deletion liquid.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Gem::Specification.new do |s|
s.license = "MIT"
# s.description = "A secure, non-evaling end user template engine with aesthetic markup."

s.required_ruby_version = ">= 2.7.0"
s.required_ruby_version = ">= 3.0.0"
s.required_rubygems_version = ">= 1.3.7"

s.metadata['allowed_push_host'] = 'https://rubygems.org'
Expand All @@ -28,6 +28,9 @@ Gem::Specification.new do |s|

s.require_path = "lib"

s.add_dependency("strscan")
s.add_dependency("bigdecimal")

s.add_development_dependency('rake', '~> 13.0')
s.add_development_dependency('minitest')
end
1 change: 1 addition & 0 deletions performance/benchmark.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
require 'benchmark/ips'
require_relative 'theme_runner'

RubyVM::YJIT.enable if defined?(RubyVM::YJIT)
Liquid::Template.error_mode = ARGV.first.to_sym if ARGV.first
profiler = ThemeRunner.new

Expand Down
62 changes: 62 additions & 0 deletions performance/unit/lexer_benchmark.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# frozen_string_literal: true

require "benchmark/ips"

# benchmark liquid lexing

require 'liquid'

RubyVM::YJIT.enable

EXPRESSIONS = [
"foo[1..2].baz",
"12.0",
"foo.bar.based",
"21 - 62",
"foo.bar.baz",
"foo > 12",
"foo < 12",
"foo <= 12",
"foo >= 12",
"foo <> 12",
"foo == 12",
"foo != 12",
"foo contains 12",
"foo contains 'bar'",
"foo != 'bar'",
"'foo' contains 'bar'",
'234089',
"foo | default: -1",
]

EXPRESSIONS.each do |expr|
lexer_1_result = Liquid::Lexer1.new(expr).tokenize
lexer_2_result = Liquid::Lexer2.new(expr).tokenize

next if lexer_1_result == lexer_2_result

warn "Lexer1 and Lexer2 results are different for expression: #{expr}"
warn "expected: #{lexer_1_result}"
warn "got: #{lexer_2_result}"
abort
end

Benchmark.ips do |x|
x.config(time: 10, warmup: 5)

x.report("Liquid::Lexer1#tokenize") do
EXPRESSIONS.each do |expr|
l = Liquid::Lexer1.new(expr)
l.tokenize
end
end

x.report("Liquid::Lexer2#tokenize") do
EXPRESSIONS.each do |expr|
l = Liquid::Lexer2.new(expr)
l.tokenize
end
end

x.compare!
end
2 changes: 1 addition & 1 deletion test/integration/standard_filter_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def initialize(value:)
attr_reader :value

def registers
{ @value => @context.registers[@value] }
"{#{@value.inspect}=>#{@context.registers[@value].inspect}}"
end
end

Expand Down
Loading

0 comments on commit b355378

Please sign in to comment.