From aaf76b081b5dde7d88bd3f63349bb8c5884e2083 Mon Sep 17 00:00:00 2001 From: "Davis W. Frank" Date: Thu, 14 Nov 2024 13:53:21 -0600 Subject: [PATCH 1/2] Updates Node#canonicalize for keyword args; Adds a full test; Still needs the Document#canonicalize C func to be updated --- lib/nokogiri/xml/node.rb | 2 +- test/xml/test_c14n.rb | 107 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index 2c9d7f12d26..c90a64cabc0 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -1489,7 +1489,7 @@ def write_xml_to(io, options = {}) write_to(io, options) end - def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false) + def canonicalize(mode_ = XML::XML_C14N_1_0, inclusive_namespaces_ = nil, with_comments_ = false, mode: mode_, inclusive_namespaces: inclusive_namespaces_, with_comments: with_comments_) c14n_root = self document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent| tn = node.is_a?(XML::Node) ? node : parent diff --git a/test/xml/test_c14n.rb b/test/xml/test_c14n.rb index e41e051bcc7..237049ae5df 100644 --- a/test/xml/test_c14n.rb +++ b/test/xml/test_c14n.rb @@ -32,6 +32,13 @@ def test_3_1 assert_match(/Comment/, c14n) c14n = doc.canonicalize(nil, nil, false) refute_match(/Comment/, c14n) + + # with keyword args + # TODO: when Document#canonicalize is updated to keyword args, these tests should pass + # c14n = doc.canonicalize(mode: nil, inclusive_namespaces: nil, with_comments: true) + # assert_match(/Comment/, c14n) + # c14n = doc.canonicalize(mode: nil, inclusive_namespaces: nil, with_comments: false) + # refute_match(/Comment/, c14n) end def test_exclude_block_params @@ -199,6 +206,102 @@ def test_c14n_modes end end + def test_c14n_modes_with_keyword_args + # http://www.w3.org/TR/xml-exc-c14n/#sec-Enveloping + + doc1 = Nokogiri.XML(<<~EOXML) + + + + + + EOXML + node1 = doc1.at_xpath("//n1:elem2", { "n1" => "http://example.net" }) + + doc2 = Nokogiri.XML(<<~EOXML) + + + + + + + EOXML + node2 = doc2.at_xpath("//n1:elem2", { "n1" => "http://example.net" }) + + expected = <<~EOF.strip + + + + EOF + c14n = node1.canonicalize + assert_equal(expected, c14n) + + expected = <<~EOF.strip + + + + + EOF + c14n = node2.canonicalize + assert_equal(expected, c14n) + c14n = node2.canonicalize(mode: XML::XML_C14N_1_0) + assert_equal(expected, c14n) + assert_raises(RuntimeError) do + node2.canonicalize(mode: XML::XML_C14N_1_0, inclusive_namespaces: ["n2"]) + end + + expected = <<~EOF.strip + + + + EOF + c14n = node1.canonicalize(mode: XML::XML_C14N_EXCLUSIVE_1_0) + assert_equal(expected, c14n) + + expected = <<~EOF.strip + + + + + EOF + c14n = node2.canonicalize(mode: XML::XML_C14N_EXCLUSIVE_1_0) + assert_equal(expected, c14n) + + expected = <<~EOF.strip + + + + + EOF + c14n = node2.canonicalize(mode: XML::XML_C14N_EXCLUSIVE_1_0, inclusive_namespaces: ["n2"]) + assert_equal(expected, c14n) + + expected = <<~EOF.strip + + + + + EOF + c14n = node2.canonicalize(mode: XML::XML_C14N_EXCLUSIVE_1_0, inclusive_namespaces: ["n2", "n4"]) + assert_equal(expected, c14n) + + expected = <<~EOF.strip + + + + + EOF + c14n = node2.canonicalize(mode: XML::XML_C14N_1_1) + assert_equal(expected, c14n) + assert_raises(RuntimeError) do + node2.canonicalize(mode: XML::XML_C14N_1_1, inclusive_namespaces: ["n2"]) + end + end + def test_wrong_params xml = "" doc = Nokogiri.XML(xml) @@ -206,6 +309,10 @@ def test_wrong_params assert_raises(TypeError) { doc.canonicalize(:wrong_type) } assert_raises(TypeError) { doc.canonicalize(nil, :wrong_type) } doc.canonicalize(nil, nil, :wrong_type) + + # with keyword args + # assert_raises(TypeError) { doc.canonicalize(nil, inclusive_namespaces: :wrong_type) } + # doc.canonicalize(nil, inclusive_namespaces: nil, with_comments: :wrong_type) end end end From 8363add00784dbae22e15b8e36842abb2f3577ae Mon Sep 17 00:00:00 2001 From: "Davis W. Frank" Date: Thu, 14 Nov 2024 14:33:33 -0600 Subject: [PATCH 2/2] Updates Node#parse with keyword arguments; updates tests to test parsing HTML4 and XML parsing w/ positional and keyword args --- lib/nokogiri/xml/node.rb | 2 +- test/xml/test_node.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index c90a64cabc0..41538a80ed9 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -1102,7 +1102,7 @@ def fragment(tags) # Parse +string_or_io+ as a document fragment within the context of # *this* node. Returns a XML::NodeSet containing the nodes parsed from # +string_or_io+. - def parse(string_or_io, options = nil) + def parse(string_or_io, options_ = nil, options: options_) ## # When the current node is unparented and not an element node, use the # document as the parsing context instead. Otherwise, the in-context diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb index abb11161a34..af8539825bd 100644 --- a/test/xml/test_node.rb +++ b/test/xml/test_node.rb @@ -153,6 +153,36 @@ def test_node_context_parsing_of_malformed_html_fragment_without_recover_is_not_ end end + def test_node_context_parsing_of_malformed_html_fragment_without_recover_is_not_corrected_keyword + skip("libxml2 2.14.0 no longer raises this error") if Nokogiri.uses_libxml?(">= 2.14.0") + + doc = HTML4.parse("
") + context_node = doc.at_css("div") + assert_raises(Nokogiri::XML::SyntaxError) do + context_node.parse("
", options: ParseOptions.new) + end + end + + def test_node_context_parsing_of_malformed_xml_fragment_without_recover_is_not_corrected + skip("libxml2 2.14.0 no longer raises this error") if Nokogiri.uses_libxml?(">= 2.14.0") + + doc = XML.parse("
", &:strict) + end + end + + def test_node_context_parsing_of_malformed_xml_fragment_without_recover_is_not_corrected_keyword + skip("libxml2 2.14.0 no longer raises this error") if Nokogiri.uses_libxml?(">= 2.14.0") + + doc = XML.parse("
", options: ParseOptions.new) + end + end + def test_node_context_parsing_of_malformed_xml_fragment_uses_the_right_class_to_recover doc = XML.parse("
") context_node = doc.at_css("div")