Skip to content

Commit

Permalink
Optimize BaseParser#unnormalize method
Browse files Browse the repository at this point in the history
## Benchmark
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.3/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin22]
Calculating -------------------------------------
                         before       after  before(YJIT)  after(YJIT)
                 dom     17.704      18.106        34.215       33.806 i/s -     100.000 times in 5.648398s 5.523110s 2.922698s 2.958036s
                 sax     25.664      25.302        48.429       48.602 i/s -     100.000 times in 3.896488s 3.952289s 2.064859s 2.057537s
                pull     28.966      29.215        61.710       62.068 i/s -     100.000 times in 3.452275s 3.422901s 1.620480s 1.611129s
              stream     28.291      28.426        53.860       55.548 i/s -     100.000 times in 3.534716s 3.517884s 1.856667s 1.800247s

Comparison:
                              dom
        before(YJIT):        34.2 i/s
         after(YJIT):        33.8 i/s - 1.01x  slower
               after:        18.1 i/s - 1.89x  slower
              before:        17.7 i/s - 1.93x  slower

                              sax
         after(YJIT):        48.6 i/s
        before(YJIT):        48.4 i/s - 1.00x  slower
              before:        25.7 i/s - 1.89x  slower
               after:        25.3 i/s - 1.92x  slower

                             pull
         after(YJIT):        62.1 i/s
        before(YJIT):        61.7 i/s - 1.01x  slower
               after:        29.2 i/s - 2.12x  slower
              before:        29.0 i/s - 2.14x  slower

                           stream
         after(YJIT):        55.5 i/s
        before(YJIT):        53.9 i/s - 1.03x  slower
               after:        28.4 i/s - 1.95x  slower
              before:        28.3 i/s - 1.96x  slower

```

- YJIT=ON : 1.00x - 1.03x faster
- YJIT=OFF : 0.98x - 1.02x faster
  • Loading branch information
naitoh committed Jun 24, 2024
1 parent 683e156 commit 2a7f8a4
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ module Private
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
DEFAULT_ENTITIES_PATTERNS = {}
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
default_entities.each do |term|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
end
end
private_constant :Private

Expand Down Expand Up @@ -504,10 +511,10 @@ def normalize( input, entities=nil, entity_filter=nil )

# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
rv = string.gsub( /\r\n?/, "\n" )
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
rv.gsub!( Private::CHARACTER_REFERENCES ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
Expand All @@ -518,15 +525,15 @@ def unnormalize( string, entities=nil, filter=nil )
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = /&#{entity_reference};/
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
end
rv.gsub!( /&/, '&' )
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
end
rv
end
Expand Down

0 comments on commit 2a7f8a4

Please sign in to comment.