From 255aeb2485096f25d59603842432676016c2a381 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Mon, 12 Jan 2026 14:21:42 +0100 Subject: [PATCH] Correctly expose ripper state It is for example used by `irb`, `rdoc`, `syntax_suggest` --- lib/prism/lex_compat.rb | 72 ++++----------------------- lib/prism/translation/ripper.rb | 25 ++++++++++ lib/prism/translation/ripper/lexer.rb | 46 +++++++++++++++++ prism.gemspec | 1 + rakelib/typecheck.rake | 1 + test/prism/ruby/ripper_test.rb | 11 ++-- 6 files changed, 88 insertions(+), 68 deletions(-) create mode 100644 lib/prism/translation/ripper/lexer.rb diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index ebfb19e56d..46f6130357 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -198,58 +198,6 @@ def deconstruct_keys(keys) "__END__": :on___end__ }.freeze - # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states - # to reimplement to_s without using Ripper. - class State - # Ripper-internal bitflags. - ALL = %i[ - BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM - ].map.with_index.to_h { |name, i| [2 ** i, name] } - ALL[0] = :NONE - ALL.freeze - ALL.each { |value, name| const_set(name, value) } - - # :stopdoc: - - attr_reader :to_int, :to_s - - def initialize(i) - @to_int = i - @to_s = state_name(i) - freeze - end - - def [](index) - case index - when 0, :to_int - @to_int - when 1, :to_s - @to_s - else - nil - end - end - - alias to_i to_int - alias inspect to_s - def pretty_print(q) q.text(to_s) end - def ==(i) super or to_int == i end - def &(i) self.class.new(to_int & i) end - def |(i) self.class.new(to_int | i) end - def allbits?(i) to_int.allbits?(i) end - def anybits?(i) to_int.anybits?(i) end - def nobits?(i) to_int.nobits?(i) end - - # :startdoc: - - private - - # Convert the state flags into the format exposed by ripper. - def state_name(bits) - ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|") - end - end - # When we produce tokens, we produce the same arrays that Ripper does. # However, we add a couple of convenience methods onto them to make them a # little easier to work with. We delegate all other methods to the array. @@ -300,8 +248,8 @@ def ==(other) # :nodoc: class IdentToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == State::LABEL | State::END) || - (other[3] & (State::ARG | State::CMDARG) != 0) + (other[3] == Translation::Ripper::EXPR_LABEL | Translation::Ripper::EXPR_END) || + (other[3] & (Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_CMDARG) != 0) ) end end @@ -312,8 +260,8 @@ class IgnoredNewlineToken < Token def ==(other) # :nodoc: return false unless self[0...-1] == other[0...-1] - if self[3] == State::ARG | State::LABELED - other[3] & State::ARG | State::LABELED != 0 + if self[3] == Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED + other[3] & Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED != 0 else self[3] == other[3] end @@ -331,8 +279,8 @@ def ==(other) # :nodoc: class ParamToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == State::END) || - (other[3] == State::END | State::LABEL) + (other[3] == Translation::Ripper::EXPR_END) || + (other[3] == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL) ) end end @@ -727,7 +675,7 @@ def result event = RIPPER.fetch(token.type) value = token.value - lex_state = State.new(lex_state) + lex_state = Translation::Ripper::Lexer::State.new(lex_state) token = case event @@ -741,7 +689,7 @@ def result last_heredoc_end = token.location.end_offset IgnoreStateToken.new([[lineno, column], event, value, lex_state]) when :on_ident - if lex_state == State::END + if lex_state == Translation::Ripper::EXPR_END # If we have an identifier that follows a method name like: # # def foo bar @@ -751,7 +699,7 @@ def result # yet. We do this more accurately, so we need to allow comparing # against both END and END|LABEL. ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == State::END | State::LABEL + elsif lex_state == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL # In the event that we're comparing identifiers, we're going to # allow a little divergence. Ripper doesn't account for local # variables introduced through named captures in regexes, and we @@ -791,7 +739,7 @@ def result counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - State.new(result_value[current_index][1]) + Translation::Ripper::Lexer::State.new(result_value[current_index][1]) else previous_state end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 00d5f80af4..a901a72692 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -424,9 +424,34 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) end end + autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" + # :stopdoc: + # This is not part of the public API but used by some gems. + + # Ripper-internal bitflags. + LEX_STATE_NAMES = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze + private_constant :LEX_STATE_NAMES + + LEX_STATE_NAMES.each do |value, key| + const_set("EXPR_#{key}", value) + end + EXPR_NONE = 0 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS + EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG + EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN + + def self.lex_state_name(state) + LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|") + end + + # :startdoc: + # The source that is being parsed. attr_reader :source diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb new file mode 100644 index 0000000000..ed02e96574 --- /dev/null +++ b/lib/prism/translation/ripper/lexer.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true +# :markup: markdown + +require_relative "../ripper" + +module Prism + module Translation + class Ripper + class Lexer # :nodoc: + # :stopdoc: + class State + + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = Ripper.lex_state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + end + # :startdoc: + end + end + end +end diff --git a/prism.gemspec b/prism.gemspec index a45e0d93e7..b6d1f16719 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -108,6 +108,7 @@ Gem::Specification.new do |spec| "lib/prism/translation/parser/compiler.rb", "lib/prism/translation/parser/lexer.rb", "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/lexer.rb", "lib/prism/translation/ripper/sexp.rb", "lib/prism/translation/ripper/shim.rb", "lib/prism/translation/ruby_parser.rb", diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake index 4a83bad7d0..439af9a8fa 100644 --- a/rakelib/typecheck.rake +++ b/rakelib/typecheck.rake @@ -26,6 +26,7 @@ namespace :typecheck do - ./lib/prism/visitor.rb - ./lib/prism/translation/parser/lexer.rb - ./lib/prism/translation/ripper.rb + - ./lib/prism/translation/ripper/lexer.rb - ./lib/prism/translation/ripper/sexp.rb - ./lib/prism/translation/ruby_parser.rb - ./lib/prism/inspect_visitor.rb diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 9d64c5c70c..bbd85585a9 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -65,13 +65,12 @@ class RipperTest < TestCase # Check that the hardcoded values don't change without us noticing. def test_internals - actual = LexCompat::State::ALL - expected = Ripper.constants.select { |name| name.start_with?("EXPR_") } - expected -= %i[EXPR_VALUE EXPR_BEG_ANY EXPR_ARG_ANY EXPR_END_ANY] + actual = Translation::Ripper.constants.select { |name| name.start_with?("EXPR_") }.sort + expected = Ripper.constants.select { |name| name.start_with?("EXPR_") }.sort - assert_equal(expected.size, actual.size) - expected.each do |const_name| - assert_equal(const_name.to_s.delete_prefix("EXPR_").to_sym, actual[Ripper.const_get(const_name)]) + assert_equal(expected, actual) + expected.zip(actual).each do |ripper, prism| + assert_equal(Ripper.const_get(ripper), Translation::Ripper.const_get(prism)) end end