Improve Utils.normalize_uri (#1719)

* Improve Utils.normalize_uri

Globally replacing generally unsafe characters in a URL would not fix
invalid authorities and paths, so use Addressable::URI to normalize them
when necessary.

This should fix #1701.

* Remove an unused function

* Fix the test case to make sure an IPv6 address is supported
This commit is contained in:
Akinori MUSHA 2016-10-05 14:17:04 +09:00 committed by GitHub
parent 4150b1e29c
commit 7e79d576b5
3 changed files with 33 additions and 8 deletions

View file

@ -1,5 +1,6 @@
require 'jsonpath'
require 'cgi'
require 'addressable/uri'
module Utils
def self.unindent(s)
@ -25,11 +26,29 @@ module Utils
begin
URI(uri)
rescue URI::Error
URI(uri.to_s.gsub(/[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]+/) { |unsafe|
unsafe.bytes.each_with_object(String.new) { |uc, s|
s << sprintf('%%%02X', uc)
}
}.force_encoding(Encoding::US_ASCII))
begin
URI(uri.to_s.gsub(/[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]+/) { |unsafe|
unsafe.bytes.each_with_object(String.new) { |uc, s|
s << sprintf('%%%02X', uc)
}
}.force_encoding(Encoding::US_ASCII))
rescue URI::Error => e
begin
auri = Addressable::URI.parse(uri.to_s)
rescue
# Do not leak Addressable::URI::InvalidURIError which
# callers might not expect.
raise e
else
# Addressable::URI#normalize! modifies the query and
# fragment components beyond escaping unsafe characters, so
# avoid using it. Otherwise `?a[]=%2F` would be normalized
# as `?a%5B%5D=/`, for example.
auri.site = auri.normalized_site
auri.path = auri.normalized_path
URI(auri.to_s)
end
end
end
end

View file

@ -12,6 +12,7 @@
<li><a href="https://www.google.ca/search?q=위키백과:대문">unicode param</a></li>
<li><a href="http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8">percent encoded url</a></li>
<li><a href="https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8">percent encoded param</a></li>
<li><a href="http://[::1]/path[]?query[]=foo">brackets</a></li>
</ul>
</body>
</html>
</html>

View file

@ -1105,8 +1105,8 @@ fire: hot
describe "#check" do
before do
expect { @checker.check }.to change { Event.count }.by(7)
@events = Event.last(7)
expect { @checker.check }.to change { Event.count }.by(8)
@events = Event.last(8)
end
it "should check hostname" do
@ -1143,6 +1143,11 @@ fire: hot
event = @events[6]
expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
end
it "should check url with unescaped brackets in the path component" do
event = @events[7]
expect(event.payload['url']).to eq("http://[::1]/path%5B%5D?query[]=foo")
end
end
end
end