REXML 3.1.7.4

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@67937 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
usa 2021-04-05 11:45:31 +00:00
parent d6d2f179b0
commit 4870620450
10 changed files with 783 additions and 147 deletions

View File

@ -7,6 +7,39 @@ require 'rexml/attlistdecl'
require 'rexml/xmltokens' require 'rexml/xmltokens'
module REXML module REXML
class ReferenceWriter
def initialize(id_type,
public_id_literal,
system_literal)
@id_type = id_type
@public_id_literal = public_id_literal
@system_literal = system_literal
@default_quote = "\""
end
def write(output)
output << " #{@id_type}"
if @public_id_literal
if @public_id_literal.include?("'")
quote = "\""
else
quote = @default_quote
end
output << " #{quote}#{@public_id_literal}#{quote}"
end
if @system_literal
if @system_literal.include?("'")
quote = "\""
elsif @system_literal.include?("\"")
quote = "'"
else
quote = @default_quote
end
output << " #{quote}#{@system_literal}#{quote}"
end
end
end
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
# being used to declare entities used in the document. # being used to declare entities used in the document.
@ -50,6 +83,8 @@ module REXML
super( parent ) super( parent )
@name = first.name @name = first.name
@external_id = first.external_id @external_id = first.external_id
@long_name = first.instance_variable_get(:@long_name)
@uri = first.instance_variable_get(:@uri)
elsif first.kind_of? Array elsif first.kind_of? Array
super( parent ) super( parent )
@name = first[0] @name = first[0]
@ -112,9 +147,12 @@ module REXML
output << START output << START
output << ' ' output << ' '
output << @name output << @name
output << " #@external_id" if @external_id if @external_id
output << " #{@long_name.inspect}" if @long_name reference_writer = ReferenceWriter.new(@external_id,
output << " #{@uri.inspect}" if @uri @long_name,
@uri)
reference_writer.write(output)
end
unless @children.empty? unless @children.empty?
output << ' [' output << ' ['
@children.each { |child| @children.each { |child|
@ -249,9 +287,9 @@ module REXML
end end
def to_s def to_s
notation = "<!NOTATION #{@name} #{@middle}" notation = "<!NOTATION #{@name}"
notation << " #{@public.inspect}" if @public reference_writer = ReferenceWriter.new(@middle, @public, @system)
notation << " #{@system.inspect}" if @system reference_writer.write(notation)
notation << ">" notation << ">"
notation notation
end end

View File

@ -1,4 +1,7 @@
# frozen_string_literal: false # frozen_string_literal: false
require "strscan"
require 'rexml/parseexception' require 'rexml/parseexception'
require 'rexml/undefinednamespaceexception' require 'rexml/undefinednamespaceexception'
require 'rexml/source' require 'rexml/source'
@ -32,8 +35,12 @@ module REXML
COMBININGCHAR = '' # TODO COMBININGCHAR = '' # TODO
EXTENDER = '' # TODO EXTENDER = '' # TODO
NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*" NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
QNAME = /(#{QNAME_STR})/
# Just for backward compatibility. For example, kramdown uses this.
# It's not used in REXML.
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\.:]' NAMECHAR = '[\-\w\.:]'
@ -45,8 +52,7 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_END = /\A\s*\]\s*>/um DOCTYPE_END = /\A\s*\]\s*>/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u CDATA_START = /\A<!\[CDATA\[/u
@ -56,15 +62,14 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
ENTITY_START = /\A\s*<!ENTITY/ ENTITY_START = /\A\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /\A\s*<!ELEMENT/um ELEMENTDECL_START = /\A\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@ -78,9 +83,6 @@ module REXML
ATTDEF_RE = /#{ATTDEF}/ ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /\A\s*<!ATTLIST/um ATTLISTDECL_START = /\A\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /\A\s*<!NOTATION/um
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
TEXT_PATTERN = /\A([^<]*)/um TEXT_PATTERN = /\A([^<]*)/um
@ -98,6 +100,11 @@ module REXML
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
NOTATIONDECL_START = /\A\s*<!NOTATION/um
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
EREFERENCE = /&(?!#{NAME};)/ EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = { DEFAULT_ENTITIES = {
@ -112,7 +119,7 @@ module REXML
# These are patterns to identify common markup errors, to make the # These are patterns to identify common markup errors, to make the
# error messages more informative. # error messages more informative.
###################################################################### ######################################################################
MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um
def initialize( source ) def initialize( source )
self.stream = source self.stream = source
@ -197,11 +204,9 @@ module REXML
return [ :end_document ] if empty? return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0 return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding #STDERR.puts @source.encoding
@source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}" #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil if @document_status == nil
#@source.consume( /^\s*/um ) word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
word = word[1] unless word.nil? word = word[1] unless word.nil?
#STDERR.puts "WORD = #{word.inspect}" #STDERR.puts "WORD = #{word.inspect}"
case word case word
@ -226,38 +231,49 @@ module REXML
when INSTRUCTION_START when INSTRUCTION_START
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true ) base_error_message = "Malformed DOCTYPE"
@source.match(DOCTYPE_START, true)
@nsstack.unshift(curr_ns=Set.new) @nsstack.unshift(curr_ns=Set.new)
identity = md[1] name = parse_name(base_error_message)
close = md[2] if @source.match(/\A\s*\[/um, true)
identity =~ IDENTITY id = [nil, nil, nil]
name = $1
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip
long_name = $4.nil? ? nil : $4.strip
uri = $6.nil? ? nil : $6.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">"
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/^\s*/um, true)
@stack << [ :end_doctype ]
else
@document_status = :in_doctype @document_status = :in_doctype
elsif @source.match(/\A\s*>/um, true)
id = [nil, nil, nil]
@document_status = :after_doctype
else
id = parse_id(base_error_message,
accept_external_id: true,
accept_public_id: false)
if id[0] == "SYSTEM"
# For backward compatibility
id[1], id[2] = id[2], nil
end
if @source.match(/\A\s*\[/um, true)
@document_status = :in_doctype
elsif @source.match(/\A\s*>/um, true)
@document_status = :after_doctype
else
message = "#{base_error_message}: garbage after external ID"
raise REXML::ParseException.new(message, @source)
end
end
args = [:start_doctype, name, *id]
if @document_status == :after_doctype
@source.match(/\A\s*/um, true)
@stack << [ :end_doctype ]
end end
return args return args
when /^\s+/ when /\A\s+/
else else
@document_status = :after_doctype @document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true)
if @source.encoding == "UTF-8" if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8) @source.buffer.force_encoding(::Encoding::UTF_8)
end end
end end
end end
if @document_status == :in_doctype if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um) md = @source.match(/\A\s*(.*?>)/um)
case md[1] case md[1]
when SYSTEMENTITY when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1] match = @source.match( SYSTEMENTITY, true )[1]
@ -314,33 +330,50 @@ module REXML
end end
return [ :attlistdecl, element, pairs, contents ] return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START when NOTATIONDECL_START
md = nil base_error_message = "Malformed notation declaration"
if @source.match( PUBLIC ) unless @source.match(/\A\s*<!NOTATION\s+/um, true)
md = @source.match( PUBLIC, true ) if @source.match(/\A\s*<!NOTATION\s*>/um)
vals = [md[1],md[2],md[4],md[6]] message = "#{base_error_message}: name is missing"
elsif @source.match( SYSTEM ) else
md = @source.match( SYSTEM, true ) message = "#{base_error_message}: invalid declaration name"
vals = [md[1],md[2],nil,md[4]] end
else raise REXML::ParseException.new(message, @source)
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
end end
return [ :notationdecl, *vals ] name = parse_name(base_error_message)
id = parse_id(base_error_message,
accept_external_id: true,
accept_public_id: true)
unless @source.match(/\A\s*>/um, true)
message = "#{base_error_message}: garbage before end >"
raise REXML::ParseException.new(message, @source)
end
return [:notationdecl, name, *id]
when DOCTYPE_END when DOCTYPE_END
@document_status = :after_doctype @document_status = :after_doctype
@source.match( DOCTYPE_END, true ) @source.match( DOCTYPE_END, true )
return [ :end_doctype ] return [ :end_doctype ]
end end
end end
if @document_status == :after_doctype
@source.match(/\A\s*/um, true)
end
begin begin
@source.read if @source.buffer.size<2
if @source.buffer[0] == ?< if @source.buffer[0] == ?<
if @source.buffer[1] == ?/ if @source.buffer[1] == ?/
@nsstack.shift @nsstack.shift
last_tag = @tags.pop last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH) #md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true ) md = @source.match( CLOSE_MATCH, true )
raise REXML::ParseException.new( "Missing end tag for "+ if md and !last_tag
"'#{last_tag}' (got \"#{md[1]}\")", message = "Unexpected top-level end tag (got '#{md[1]}')"
@source) unless last_tag == md[1] raise REXML::ParseException.new(message, @source)
end
if md.nil? or last_tag != md[1]
message = "Missing end tag for '#{last_tag}'"
message << " (got '#{md[1]}')" if md
raise REXML::ParseException.new(message, @source)
end
return [ :end_element, last_tag ] return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?! elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um) md = @source.match(/\A(\s*[^>]*>)/um)
@ -374,40 +407,11 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source) raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end end
attributes = {} @document_status = :in_element
prefixes = Set.new prefixes = Set.new
prefixes << md[2] if md[2] prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new) @nsstack.unshift(curr_ns=Set.new)
if md[4].size > 0 attributes, closed = parse_attributes(prefixes, curr_ns)
attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
attrs.each do |attr_name, prefix, local_part, quote, value|
if prefix == "xmlns"
if local_part == "xml"
if value != "http://www.w3.org/XML/1998/namespace"
msg = "The 'xml' prefix must not be bound to any other namespace "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self )
end
elsif local_part == "xmlns"
msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self)
end
curr_ns << local_part
elsif prefix
prefixes << prefix unless prefix == "xml"
end
if attributes.has_key?(attr_name)
msg = "Duplicate attribute #{attr_name.inspect}"
raise REXML::ParseException.new(msg, @source, self)
end
attributes[attr_name] = value
end
end
# Verify that all of the prefixes have been defined # Verify that all of the prefixes have been defined
for prefix in prefixes for prefix in prefixes
unless @nsstack.find{|k| k.member?(prefix)} unless @nsstack.find{|k| k.member?(prefix)}
@ -415,7 +419,7 @@ module REXML
end end
end end
if md[6] if closed
@closed = md[1] @closed = md[1]
@nsstack.shift @nsstack.shift
else else
@ -508,6 +512,169 @@ module REXML
return false if /\AUTF-16\z/i =~ xml_declaration_encoding return false if /\AUTF-16\z/i =~ xml_declaration_encoding
true true
end end
def parse_name(base_error_message)
md = @source.match(/\A\s*#{NAME}/um, true)
unless md
if @source.match(/\A\s*\S/um)
message = "#{base_error_message}: invalid name"
else
message = "#{base_error_message}: name is missing"
end
raise REXML::ParseException.new(message, @source)
end
md[1]
end
def parse_id(base_error_message,
accept_external_id:,
accept_public_id:)
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
pubid = system = nil
pubid_literal = md[1]
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
system_literal = md[2]
system = system_literal[1..-2] if system_literal # Remove quote
["PUBLIC", pubid, system]
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
pubid = system = nil
pubid_literal = md[1]
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
["PUBLIC", pubid, nil]
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
system = nil
system_literal = md[1]
system = system_literal[1..-2] if system_literal # Remove quote
["SYSTEM", nil, system]
else
details = parse_id_invalid_details(accept_external_id: accept_external_id,
accept_public_id: accept_public_id)
message = "#{base_error_message}: #{details}"
raise REXML::ParseException.new(message, @source)
end
end
def parse_id_invalid_details(accept_external_id:,
accept_public_id:)
public = /\A\s*PUBLIC/um
system = /\A\s*SYSTEM/um
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
return "public ID literal is missing"
end
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
return "invalid public ID literal"
end
if accept_public_id
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
return "system ID literal is missing"
end
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
return "invalid system literal"
end
"garbage after system literal"
else
"garbage after public ID literal"
end
elsif accept_external_id and @source.match(/#{system}/um)
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
return "system literal is missing"
end
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
return "invalid system literal"
end
"garbage after system literal"
else
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
return "invalid ID type"
end
"ID type is missing"
end
end
def parse_attributes(prefixes, curr_ns)
attributes = {}
closed = false
match_data = @source.match(/^(.*?)(\/)?>/um, true)
if match_data.nil?
message = "Start tag isn't ended"
raise REXML::ParseException.new(message, @source)
end
raw_attributes = match_data[1]
closed = !match_data[2].nil?
return attributes, closed if raw_attributes.nil?
return attributes, closed if raw_attributes.empty?
scanner = StringScanner.new(raw_attributes)
until scanner.eos?
if scanner.scan(/\s+/)
break if scanner.eos?
end
pos = scanner.pos
loop do
break if scanner.scan(ATTRIBUTE_PATTERN)
unless scanner.scan(QNAME)
message = "Invalid attribute name: <#{scanner.rest}>"
raise REXML::ParseException.new(message, @source)
end
name = scanner[0]
unless scanner.scan(/\s*=\s*/um)
message = "Missing attribute equal: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
quote = scanner.scan(/['"]/)
unless quote
message = "Missing attribute value start quote: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
match_data = @source.match(/^(.*?)(\/)?>/um, true)
if match_data
scanner << "/" if closed
scanner << ">"
scanner << match_data[1]
scanner.pos = pos
closed = !match_data[2].nil?
next
end
message =
"Missing attribute value end quote: <#{name}>: <#{quote}>"
raise REXML::ParseException.new(message, @source)
end
end
name = scanner[1]
prefix = scanner[2]
local_part = scanner[3]
# quote = scanner[4]
value = scanner[5]
if prefix == "xmlns"
if local_part == "xml"
if value != "http://www.w3.org/XML/1998/namespace"
msg = "The 'xml' prefix must not be bound to any other namespace "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self )
end
elsif local_part == "xmlns"
msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self)
end
curr_ns << local_part
elsif prefix
prefixes << prefix unless prefix == "xml"
end
if attributes.has_key?(name)
msg = "Duplicate attribute #{name.inspect}"
raise REXML::ParseException.new(msg, @source, self)
end
attributes[name] = value
end
return attributes, closed
end
end end
end end
end end

View File

@ -24,7 +24,7 @@
module REXML module REXML
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>" COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
DATE = "2008/019" DATE = "2008/019"
VERSION = "3.1.7.3" VERSION = "3.1.7.4"
REVISION = %w$Revision$[1] || '' REVISION = %w$Revision$[1] || ''
Copyright = COPYRIGHT Copyright = COPYRIGHT

View File

@ -5,17 +5,187 @@ require "rexml/document"
module REXMLTests module REXMLTests
class TestParseDocumentTypeDeclaration < Test::Unit::TestCase class TestParseDocumentTypeDeclaration < Test::Unit::TestCase
private private
def xml(internal_subset) def parse(doctype)
<<-XML REXML::Document.new(<<-XML).doctype
<!DOCTYPE r SYSTEM "urn:x-rexml:test" [ #{doctype}
#{internal_subset}
]>
<r/> <r/>
XML XML
end end
def parse(internal_subset) class TestName < self
REXML::Document.new(xml(internal_subset)).doctype def test_valid
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r>
DOCTYPE
assert_equal("r", doctype.name)
end
def test_garbage_plus_before_name_at_line_start
exception = assert_raise(REXML::ParseException) do
parse(<<-DOCTYPE)
<!DOCTYPE +
r SYSTEM "urn:x-rexml:test" [
]>
DOCTYPE
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: invalid name
Line: 5
Position: 51
Last 80 unconsumed characters:
+ r SYSTEM "urn:x-rexml:test" [ ]> <r/>
DETAIL
end
end
class TestExternalID < self
class TestSystem < self
def test_left_bracket_in_system_literal
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r SYSTEM "urn:x-rexml:[test" [
]>
DOCTYPE
assert_equal([
"r",
"SYSTEM",
nil,
"urn:x-rexml:[test",
],
[
doctype.name,
doctype.external_id,
doctype.public,
doctype.system,
])
end
def test_greater_than_in_system_literal
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r SYSTEM "urn:x-rexml:>test" [
]>
DOCTYPE
assert_equal([
"r",
"SYSTEM",
nil,
"urn:x-rexml:>test",
],
[
doctype.name,
doctype.external_id,
doctype.public,
doctype.system,
])
end
def test_no_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-DOCTYPE)
<!DOCTYPE r SYSTEM>
DOCTYPE
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: system literal is missing
Line: 3
Position: 26
Last 80 unconsumed characters:
SYSTEM> <r/>
DETAIL
end
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-DOCTYPE)
<!DOCTYPE r SYSTEM 'r.dtd'x'>
DOCTYPE
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: garbage after external ID
Line: 3
Position: 36
Last 80 unconsumed characters:
x'> <r/>
DETAIL
end
def test_single_quote
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r SYSTEM 'r".dtd'>
DOCTYPE
assert_equal("r\".dtd", doctype.system)
end
def test_double_quote
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r SYSTEM "r'.dtd">
DOCTYPE
assert_equal("r'.dtd", doctype.system)
end
end
class TestPublic < self
class TestPublicIDLiteral < self
def test_content_double_quote
exception = assert_raise(REXML::ParseException) do
parse(<<-DOCTYPE)
<!DOCTYPE r PUBLIC 'double quote " is invalid' "r.dtd">
DOCTYPE
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: invalid public ID literal
Line: 3
Position: 62
Last 80 unconsumed characters:
PUBLIC 'double quote " is invalid' "r.dtd"> <r/>
DETAIL
end
def test_single_quote
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r PUBLIC 'public-id-literal' "r.dtd">
DOCTYPE
assert_equal("public-id-literal", doctype.public)
end
def test_double_quote
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r PUBLIC "public'-id-literal" "r.dtd">
DOCTYPE
assert_equal("public'-id-literal", doctype.public)
end
end
class TestSystemLiteral < self
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-DOCTYPE)
<!DOCTYPE r PUBLIC 'public-id-literal' 'system-literal'x'>
DOCTYPE
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed DOCTYPE: garbage after external ID
Line: 3
Position: 65
Last 80 unconsumed characters:
x'> <r/>
DETAIL
end
def test_single_quote
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r PUBLIC "public-id-literal" 'system"-literal'>
DOCTYPE
assert_equal("system\"-literal", doctype.system)
end
def test_double_quote
doctype = parse(<<-DOCTYPE)
<!DOCTYPE r PUBLIC "public-id-literal" "system'-literal">
DOCTYPE
assert_equal("system'-literal", doctype.system)
end
end
end
end end
class TestMixed < self class TestMixed < self
@ -45,6 +215,15 @@ module REXMLTests
assert_equal([REXML::NotationDecl, REXML::AttlistDecl], assert_equal([REXML::NotationDecl, REXML::AttlistDecl],
doctype.children.collect(&:class)) doctype.children.collect(&:class))
end end
private
def parse(internal_subset)
super(<<-DOCTYPE)
<!DOCTYPE r SYSTEM "urn:x-rexml:test" [
#{internal_subset}
]>
DOCTYPE
end
end end
end end
end end

View File

@ -23,10 +23,100 @@ module REXMLTests
doctype = parse("<!NOTATION name PUBLIC 'urn:public-id'>") doctype = parse("<!NOTATION name PUBLIC 'urn:public-id'>")
assert_equal("name", doctype.notation("name").name) assert_equal("name", doctype.notation("name").name)
end end
def test_no_name
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: name is missing
Line: 5
Position: 72
Last 80 unconsumed characters:
<!NOTATION> ]> <r/>
DETAIL
end
def test_invalid_name
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION '>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid name
Line: 5
Position: 74
Last 80 unconsumed characters:
'> ]> <r/>
DETAIL
end
def test_no_id_type
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid ID type
Line: 5
Position: 77
Last 80 unconsumed characters:
> ]> <r/>
DETAIL
end
def test_invalid_id_type
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name INVALID>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid ID type
Line: 5
Position: 85
Last 80 unconsumed characters:
INVALID> ]> <r/>
DETAIL
end
end end
class TestExternalID < self class TestExternalID < self
class TestSystem < self class TestSystem < self
def test_no_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: system literal is missing
Line: 5
Position: 84
Last 80 unconsumed characters:
SYSTEM> ]> <r/>
DETAIL
end
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM 'system-literal'x'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage before end >
Line: 5
Position: 103
Last 80 unconsumed characters:
x'> ]> <r/>
DETAIL
end
def test_single_quote def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET) doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM 'system-literal'> <!NOTATION name SYSTEM 'system-literal'>
@ -44,6 +134,21 @@ module REXMLTests
class TestPublic < self class TestPublic < self
class TestPublicIDLiteral < self class TestPublicIDLiteral < self
def test_content_double_quote
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal">
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid public ID literal
Line: 5
Position: 129
Last 80 unconsumed characters:
PUBLIC 'double quote " is invalid' "system-literal"> ]> <r/>
DETAIL
end
def test_single_quote def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET) doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal' "system-literal"> <!NOTATION name PUBLIC 'public-id-literal' "system-literal">
@ -60,6 +165,21 @@ module REXMLTests
end end
class TestSystemLiteral < self class TestSystemLiteral < self
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage before end >
Line: 5
Position: 123
Last 80 unconsumed characters:
x'> ]> <r/>
DETAIL
end
def test_single_quote def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET) doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC "public-id-literal" 'system-literal'> <!NOTATION name PUBLIC "public-id-literal" 'system-literal'>
@ -96,5 +216,66 @@ module REXMLTests
end end
end end
end end
class TestPublicID < self
def test_no_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: public ID literal is missing
Line: 5
Position: 84
Last 80 unconsumed characters:
PUBLIC> ]> <r/>
DETAIL
end
def test_literal_content_double_quote
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'double quote " is invalid in PubidLiteral'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: invalid public ID literal
Line: 5
Position: 128
Last 80 unconsumed characters:
PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> <r/>
DETAIL
end
def test_garbage_after_literal
exception = assert_raise(REXML::ParseException) do
parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal'x'>
INTERNAL_SUBSET
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
Malformed notation declaration: garbage before end >
Line: 5
Position: 106
Last 80 unconsumed characters:
x'> ]> <r/>
DETAIL
end
def test_literal_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal'>
INTERNAL_SUBSET
assert_equal("public-id-literal", doctype.notation("name").public)
end
def test_literal_double_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC "public-id-literal">
INTERNAL_SUBSET
assert_equal("public-id-literal", doctype.notation("name").public)
end
end
end end
end end

View File

@ -12,7 +12,7 @@ class TestTreeParser < Test::Unit::TestCase
parse(xml) parse(xml)
end end
assert_equal(<<-MESSAGE, exception.to_s) assert_equal(<<-MESSAGE, exception.to_s)
Missing end tag for 'root' (got "not-root") Missing end tag for 'root' (got 'not-root')
Line: 1 Line: 1
Position: #{xml.bytesize} Position: #{xml.bytesize}
Last 80 unconsumed characters: Last 80 unconsumed characters:

View File

@ -16,7 +16,6 @@ class TestUltraLightParser < Test::Unit::TestCase
nil, nil,
[:entitydecl, "name", "value"] [:entitydecl, "name", "value"]
], ],
[:text, "\n"],
[:start_element, :parent, "root", {}], [:start_element, :parent, "root", {}],
[:text, "\n"], [:text, "\n"],
], ],

View File

@ -1,4 +1,4 @@
# coding: binary # coding: utf-8
# frozen_string_literal: false # frozen_string_literal: false
require_relative "rexml_test_utils" require_relative "rexml_test_utils"
@ -995,7 +995,7 @@ EOL
document.write(s) document.write(s)
## XML Doctype ## XML Doctype
str = '<!DOCTYPE foo "bar">' str = '<!DOCTYPE foo SYSTEM "bar">'
source = REXML::Source.new(str) source = REXML::Source.new(str)
doctype = REXML::DocType.new(source) doctype = REXML::DocType.new(source)
document.add(doctype) document.add(doctype)
@ -1274,14 +1274,15 @@ EOL
def test_ticket_21 def test_ticket_21
src = "<foo bar=value/>" src = "<foo bar=value/>"
assert_raise( ParseException, "invalid XML should be caught" ) { exception = assert_raise(ParseException) do
Document.new(src) Document.new(src)
}
begin
Document.new(src)
rescue
assert_match( /missing attribute quote/, $!.message )
end end
assert_equal(<<-DETAIL, exception.to_s)
Missing attribute value start quote: <bar>
Line: 1
Position: 16
Last 80 unconsumed characters:
DETAIL
end end
def test_ticket_63 def test_ticket_63

View File

@ -4,65 +4,111 @@ require 'rexml/document'
module REXMLTests module REXMLTests
class TestDocTypeAccessor < Test::Unit::TestCase class TestDocTypeAccessor < Test::Unit::TestCase
def setup def setup
@sysid = "urn:x-test:sysid1" @sysid = "urn:x-test:sysid1"
@notid1 = "urn:x-test:notation1" @notation_id1 = "urn:x-test:notation1"
@notid2 = "urn:x-test:notation2" @notation_id2 = "urn:x-test:notation2"
document_string1 = <<-"XMLEND" xml_system = <<-XML
<!DOCTYPE r SYSTEM "#{@sysid}" [ <!DOCTYPE root SYSTEM "#{@sysid}" [
<!NOTATION n1 SYSTEM "#{@notid1}"> <!NOTATION n1 SYSTEM "#{@notation_id1}">
<!NOTATION n2 SYSTEM "#{@notid2}"> <!NOTATION n2 SYSTEM "#{@notation_id2}">
]> ]>
<r/> <root/>
XMLEND XML
@doctype1 = REXML::Document.new(document_string1).doctype @doc_type_system = REXML::Document.new(xml_system).doctype
@pubid = "TEST_ID" @pubid = "TEST_ID"
document_string2 = <<-"XMLEND" xml_public_system = <<-XML
<!DOCTYPE r PUBLIC "#{@pubid}"> <!DOCTYPE root PUBLIC "#{@pubid}" "#{@sysid}">
<r/> <root/>
XMLEND XML
@doctype2 = REXML::Document.new(document_string2).doctype @doc_type_public_system = REXML::Document.new(xml_public_system).doctype
document_string3 = <<-"XMLEND"
<!DOCTYPE r PUBLIC "#{@pubid}" "#{@sysid}">
<r/>
XMLEND
@doctype3 = REXML::Document.new(document_string3).doctype
end end
def test_public def test_public
assert_equal(nil, @doctype1.public) assert_equal([
assert_equal(@pubid, @doctype2.public) nil,
assert_equal(@pubid, @doctype3.public) @pubid,
],
[
@doc_type_system.public,
@doc_type_public_system.public,
])
end
def test_to_s
assert_equal("<!DOCTYPE root PUBLIC \"#{@pubid}\" \"#{@sysid}\">",
@doc_type_public_system.to_s)
end end
def test_system def test_system
assert_equal(@sysid, @doctype1.system) assert_equal([
assert_equal(nil, @doctype2.system) @sysid,
assert_equal(@sysid, @doctype3.system) @sysid,
],
[
@doc_type_system.system,
@doc_type_public_system.system,
])
end end
def test_notation def test_notation
assert_equal(@notid1, @doctype1.notation("n1").system) assert_equal([
assert_equal(@notid2, @doctype1.notation("n2").system) @notation_id1,
@notation_id2,
],
[
@doc_type_system.notation("n1").system,
@doc_type_system.notation("n2").system,
])
end end
def test_notations def test_notations
notations = @doctype1.notations notations = @doc_type_system.notations
assert_equal(2, notations.length) assert_equal([
assert_equal(@notid1, find_notation(notations, "n1").system) @notation_id1,
assert_equal(@notid2, find_notation(notations, "n2").system) @notation_id2,
],
notations.collect(&:system))
end end
end
def find_notation(notations, name) class TestDocType < Test::Unit::TestCase
notations.find { |notation| class TestExternalID < self
name == notation.name class TestSystem < self
} class TestSystemLiteral < self
def test_to_s
doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root.dtd"])
assert_equal("<!DOCTYPE root SYSTEM \"root.dtd\">",
doctype.to_s)
end
end
end
class TestPublic < self
class TestPublicIDLiteral < self
def test_to_s
doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"])
assert_equal("<!DOCTYPE root PUBLIC \"pub\" \"root.dtd\">",
doctype.to_s)
end
end
class TestSystemLiteral < self
def test_to_s
doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"])
assert_equal("<!DOCTYPE root PUBLIC \"pub\" \"root.dtd\">",
doctype.to_s)
end
def test_to_s_double_quote
doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root\".dtd"])
assert_equal("<!DOCTYPE root PUBLIC \"pub\" 'root\".dtd'>",
doctype.to_s)
end
end
end
end end
end end
class TestNotationDeclPublic < Test::Unit::TestCase class TestNotationDeclPublic < Test::Unit::TestCase
@ -77,11 +123,26 @@ module REXMLTests
decl(@id, nil).to_s) decl(@id, nil).to_s)
end end
def test_to_s_pubid_literal_include_apostrophe
assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}'\">",
decl("#{@id}'", nil).to_s)
end
def test_to_s_with_uri def test_to_s_with_uri
assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}\" \"#{@uri}\">", assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}\" \"#{@uri}\">",
decl(@id, @uri).to_s) decl(@id, @uri).to_s)
end end
def test_to_s_system_literal_include_apostrophe
assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}\" \"system'literal\">",
decl(@id, "system'literal").to_s)
end
def test_to_s_system_literal_include_double_quote
assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}\" 'system\"literal'>",
decl(@id, "system\"literal").to_s)
end
private private
def decl(id, uri) def decl(id, uri)
REXML::NotationDecl.new(@name, "PUBLIC", id, uri) REXML::NotationDecl.new(@name, "PUBLIC", id, uri)
@ -99,9 +160,19 @@ module REXMLTests
decl(@id).to_s) decl(@id).to_s)
end end
def test_to_s_include_apostrophe
assert_equal("<!NOTATION #{@name} SYSTEM \"#{@id}'\">",
decl("#{@id}'").to_s)
end
def test_to_s_include_double_quote
assert_equal("<!NOTATION #{@name} SYSTEM '#{@id}\"'>",
decl("#{@id}\"").to_s)
end
private private
def decl(id) def decl(id)
REXML::NotationDecl.new(@name, "SYSTEM", id, nil) REXML::NotationDecl.new(@name, "SYSTEM", nil, id)
end end
end end
end end

View File

@ -1,10 +1,10 @@
#define RUBY_VERSION "2.5.9" #define RUBY_VERSION "2.5.9"
#define RUBY_RELEASE_DATE "2021-02-16" #define RUBY_RELEASE_DATE "2021-04-05"
#define RUBY_PATCHLEVEL 228 #define RUBY_PATCHLEVEL 229
#define RUBY_RELEASE_YEAR 2021 #define RUBY_RELEASE_YEAR 2021
#define RUBY_RELEASE_MONTH 2 #define RUBY_RELEASE_MONTH 4
#define RUBY_RELEASE_DAY 16 #define RUBY_RELEASE_DAY 5
#include "ruby/version.h" #include "ruby/version.h"