Skip to content

Commit

Permalink
store StringScanner in ParseContext and reuse it through parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
ggmichaelgo committed Nov 18, 2024
1 parent bd5098c commit a37bdde
Show file tree
Hide file tree
Showing 18 changed files with 124 additions and 96 deletions.
1 change: 0 additions & 1 deletion lib/liquid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ module Liquid
require "liquid/version"
require "liquid/deprecations"
require "liquid/const"
require "liquid/string_scanner_pool"
require 'liquid/standardfilters'
require 'liquid/file_system'
require 'liquid/parser_switching'
Expand Down
6 changes: 5 additions & 1 deletion lib/liquid/context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro
@global_filter = nil
@disabled_tags = {}

# Instead of constructing new StringScanner objects for each Expression parse,
# we recycle the same one.
@string_scanner = StringScanner.new("")

@registers.static[:cached_partials] ||= {}
@registers.static[:file_system] ||= environment.file_system
@registers.static[:template_factory] ||= Liquid::TemplateFactory.new
Expand Down Expand Up @@ -176,7 +180,7 @@ def []=(key, value)
# Example:
# products == empty #=> products.empty?
def [](expression)
evaluate(Expression.parse(expression))
evaluate(Expression.parse(expression, @string_scanner))
end

def key?(key)
Expand Down
40 changes: 19 additions & 21 deletions lib/liquid/expression.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Expression1
# malicious input as described in https://github.com/Shopify/liquid/issues/1357
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/

def self.parse(markup)
def self.parse(markup, _ss = nil)
return nil unless markup

markup = markup.strip
Expand Down Expand Up @@ -58,7 +58,7 @@ class Expression2
'false' => false,
'blank' => '',
'empty' => '',
'-' => VariableLookup.parse("-")
'-' => VariableLookup.parse("-", nil),
}.freeze

DOT = ".".ord
Expand All @@ -72,7 +72,7 @@ class Expression2
CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression

class << self
def parse(markup)
def parse(markup, ss = StringScanner.new(""))
return unless markup

markup = markup.strip # markup can be a frozen string
Expand All @@ -86,33 +86,36 @@ def parse(markup)

return CACHE[markup] if CACHE.key?(markup)

CACHE[markup] = inner_parse(markup)
CACHE[markup] = inner_parse(markup, ss)
end

def inner_parse(markup)
def inner_parse(markup, ss)
if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX
return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2))
return RangeLookup.parse(
Regexp.last_match(1),
Regexp.last_match(2),
ss,
)
end

if (num = parse_number(markup))
if (num = parse_number(markup, ss))
num
else
VariableLookup.parse(markup)
VariableLookup.parse(markup, ss)
end
end

def parse_number(markup)
ss = StringScannerPool.pop(markup)

is_integer = true
last_dot_pos = nil
num_end_pos = nil

def parse_number(markup, ss)
ss.string = markup
# the first byte must be a digit, a period, or a dash
byte = ss.scan_byte

return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE)

is_integer = true
last_dot_pos = nil
num_end_pos = nil

while (byte = ss.scan_byte)
return false if byte != DOT && (byte < ZERO || byte > NINE)

Expand All @@ -136,14 +139,9 @@ def parse_number(markup)
if num_end_pos
# number ends with a number "123.123"
markup.byteslice(0, num_end_pos).to_f
elsif last_dot_pos
markup.byteslice(0, last_dot_pos).to_f
else
# we should never reach this point
false
markup.byteslice(0, last_dot_pos).to_f
end
ensure
StringScannerPool.release(ss)
end
end
end
Expand Down
10 changes: 4 additions & 6 deletions lib/liquid/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class Lexer1
WHITESPACE_OR_NOTHING = /\s*/

class << self
def tokenize(input)
ss = StringScanner.new(input)
def tokenize(input, ss = StringScanner.new(""))
ss.string = input
output = []

until ss.eos?
Expand Down Expand Up @@ -158,8 +158,8 @@ class Lexer2

# rubocop:disable Metrics/BlockNesting
class << self
def tokenize(input)
ss = StringScannerPool.pop(input)
def tokenize(input, ss)
ss.string = input
output = []

until ss.eos?
Expand Down Expand Up @@ -220,8 +220,6 @@ def tokenize(input)
end
# rubocop:enable Metrics/BlockNesting
output << EOS
ensure
StringScannerPool.release(ss)
end

def raise_syntax_error(start_pos, ss)
Expand Down
21 changes: 17 additions & 4 deletions lib/liquid/parse_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
module Liquid
class ParseContext
attr_accessor :locale, :line_number, :trim_whitespace, :depth
attr_reader :partial, :warnings, :error_mode, :environment
attr_reader :partial, :warnings, :error_mode, :environment, :string_scanner

def initialize(options = Const::EMPTY_HASH)
@environment = options.fetch(:environment, Environment.default)
Expand All @@ -12,6 +12,10 @@ def initialize(options = Const::EMPTY_HASH)
@locale = @template_options[:locale] ||= I18n.new
@warnings = []

# constructing new StringScanner in Lexer, Tokenizer, etc is expensive
# This StringScanner will be shared by all of them
@string_scanner = StringScanner.new("")

self.depth = 0
self.partial = false
end
Expand All @@ -24,12 +28,21 @@ def new_block_body
Liquid::BlockBody.new
end

def new_tokenizer(markup, start_line_number: nil, for_liquid_tag: false)
Tokenizer.new(markup, line_number: start_line_number, for_liquid_tag: for_liquid_tag)
def new_parser(input)
Parser.new(input, @string_scanner)
end

def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false)
Tokenizer.new(
source: source,
string_scanner: @string_scanner,
line_number: start_line_number,
for_liquid_tag: for_liquid_tag,
)
end

def parse_expression(markup)
Expression.parse(markup)
Expression.parse(markup, string_scanner)
end

def partial=(value)
Expand Down
4 changes: 2 additions & 2 deletions lib/liquid/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

module Liquid
class Parser
def initialize(input)
@tokens = Lexer.tokenize(input)
def initialize(input, string_scanner)
@tokens = Lexer.tokenize(input, string_scanner)
@p = 0 # pointer to current location
end

Expand Down
6 changes: 3 additions & 3 deletions lib/liquid/range_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

module Liquid
class RangeLookup
def self.parse(start_markup, end_markup)
start_obj = Expression.parse(start_markup)
end_obj = Expression.parse(end_markup)
def self.parse(start_markup, end_markup, string_scanner)
start_obj = Expression.parse(start_markup, string_scanner)
end_obj = Expression.parse(end_markup, string_scanner)
if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate)
new(start_obj, end_obj)
else
Expand Down
24 changes: 0 additions & 24 deletions lib/liquid/string_scanner_pool.rb

This file was deleted.

2 changes: 1 addition & 1 deletion lib/liquid/tags/for.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def lax_parse(markup)
end

def strict_parse(markup)
p = Parser.new(markup)
p = @parse_context.new_parser(markup)
@variable_name = p.consume(:id)
raise SyntaxError, options[:locale].t("errors.syntax.for_invalid_in") unless p.id?('in')

Expand Down
2 changes: 1 addition & 1 deletion lib/liquid/tags/if.rb
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def lax_parse(markup)
end

def strict_parse(markup)
p = Parser.new(markup)
p = @parse_context.new_parser(markup)
condition = parse_binary_comparisons(p)
p.consume(:end_of_string)
condition
Expand Down
32 changes: 23 additions & 9 deletions lib/liquid/tokenizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ module Liquid
class Tokenizer1
attr_reader :line_number, :for_liquid_tag

def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false)
def initialize(
source:,
string_scanner:, # this is not used
line_numbers: false,
line_number: nil,
for_liquid_tag: false
)
@source = source
@line_number = line_number || (line_numbers ? 1 : nil)
@for_liquid_tag = for_liquid_tag
Expand Down Expand Up @@ -56,12 +62,22 @@ class Tokenizer2
CLOSE_CURLEY = "}".ord
PERCENTAGE = "%".ord

def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false)
@line_number = line_number || (line_numbers ? 1 : nil)
def initialize(
source:,
string_scanner:,
line_numbers: false,
line_number: nil,
for_liquid_tag: false
)
@line_number = line_number || (line_numbers ? 1 : nil)
@for_liquid_tag = for_liquid_tag
@source = source
@offset = 0
@tokens = []
@source = source
@offset = 0
@tokens = []

@ss = string_scanner
@ss.string = @source

tokenize
end

Expand All @@ -85,13 +101,11 @@ def tokenize
if @for_liquid_tag
@tokens = @source.split("\n")
else
@ss = StringScannerPool.pop(@source)
@tokens << shift_normal until @ss.eos?
end

@source = nil
ensure
StringScannerPool.release(@ss) if @ss
@ss = nil
end

def shift_normal
Expand Down
2 changes: 1 addition & 1 deletion lib/liquid/variable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def lax_parse(markup)

def strict_parse(markup)
@filters = []
p = Parser.new(markup)
p = @parse_context.new_parser(markup)

return if p.look(:end_of_string)

Expand Down
16 changes: 11 additions & 5 deletions lib/liquid/variable_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ class VariableLookup

attr_reader :name, :lookups

def self.parse(markup)
new(markup)
def self.parse(markup, string_scanner)
new(markup, string_scanner)
end

def initialize(markup)
def initialize(markup, string_scanner = StringScanner.new(""))
lookups = markup.scan(VariableParser)

name = lookups.shift
if name&.start_with?('[') && name&.end_with?(']')
name = Expression.parse(name[1..-2])
name = Expression.parse(
name[1..-2],
string_scanner,
)
end
@name = name

Expand All @@ -25,7 +28,10 @@ def initialize(markup)
@lookups.each_index do |i|
lookup = lookups[i]
if lookup&.start_with?('[') && lookup&.end_with?(']')
lookups[i] = Expression.parse(lookup[1..-2])
lookups[i] = Expression.parse(
lookup[1..-2],
string_scanner,
)
elsif COMMAND_METHODS.include?(lookup)
@command_flags |= 1 << i
end
Expand Down
7 changes: 6 additions & 1 deletion performance/theme_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,13 @@ def compile

# `tokenize` will just test the tokenizen portion of liquid without any templates
def tokenize
ss = StringScanner.new("")
@tests.each do |test_hash|
tokenizer = Liquid::Tokenizer.new(test_hash[:liquid], true)
tokenizer = Liquid::Tokenizer.new(
source: test_hash[:liquid],
string_scanner: ss,
line_numbers: true,
)
while tokenizer.shift; end
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/integration/expression_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_range
end

def test_quirky_negative_sign_expression_markup
result = Expression.parse("-")
result = Expression.parse("-", nil)
assert(result.is_a?(VariableLookup))
assert_equal("-", result.name)

Expand Down
2 changes: 1 addition & 1 deletion test/unit/lexer_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression
private

def tokenize(input)
Lexer.tokenize(input)
Lexer.tokenize(input, StringScanner.new(""))
end
end
Loading

0 comments on commit a37bdde

Please sign in to comment.