mirror of
https://github.com/Fishwaldo/huginn.git
synced 2025-03-15 19:31:26 +00:00
Merge pull request #1813 from cantino/fix_decoding_in_rss_agent
Fix a double-decoding problem in RssAgent
This commit is contained in:
commit
b8d88aa9a3
3 changed files with 41 additions and 1 deletions
|
@ -132,7 +132,7 @@ module Agents
|
|||
begin
|
||||
response = faraday.get(url)
|
||||
if response.success?
|
||||
feed = Feedjira::Feed.parse(response.body)
|
||||
feed = Feedjira::Feed.parse(preprocessed_body(response))
|
||||
new_events.concat feed_to_events(feed)
|
||||
else
|
||||
error "Failed to fetch #{url}: #{response.inspect}"
|
||||
|
@ -170,6 +170,20 @@ module Agents
|
|||
require 'feedjira_extension'
|
||||
end
|
||||
|
||||
def preprocessed_body(response)
|
||||
body = response.body
|
||||
case body.encoding
|
||||
when Encoding::ASCII_8BIT
|
||||
# Encoding is unknown from the Content-Type, so let the SAX
|
||||
# parser detect it from the content.
|
||||
else
|
||||
# Encoding is already known, so do not let the parser detect
|
||||
# it from the XML declaration in the content.
|
||||
body.sub!(/(\A\u{FEFF}?\s*<\?xml(?:\s+\w+\s*=\s*(['"]).*?\2)*)\s+encoding\s*=\s*(['"]).*?\3/, '\\1')
|
||||
end
|
||||
body
|
||||
end
|
||||
|
||||
def feed_data(feed)
|
||||
type =
|
||||
case feed.class.name
|
||||
|
|
13
spec/data_fixtures/iso-8859-1.rss
Normal file
13
spec/data_fixtures/iso-8859-1.rss
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1" ?>
|
||||
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<title>Zeuhl</title>
|
||||
<link>http://example.net/</link>
|
||||
<item>
|
||||
<title>Mëkanïk Zaïn</title>
|
||||
<link>http://example.net/post/1</link>
|
||||
<guid>http://example.net/post/1</guid>
|
||||
<pubDate>Mon, 21 Nov 2016 17:00:10 +0100</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -12,6 +12,7 @@ describe Agents::RssAgent do
|
|||
stub_request(:any, /SlickdealsnetFP/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/slickdeals.atom")), :status => 200)
|
||||
stub_request(:any, /onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")), status: 200)
|
||||
stub_request(:any, /bad.onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")).gsub(/(?<=<link>)[^<]*/, ''), status: 200)
|
||||
stub_request(:any, /iso-8859-1/).to_return(body: File.binread(Rails.root.join("spec/data_fixtures/iso-8859-1.rss")), headers: { 'Content-Type' => 'application/rss+xml; charset=ISO-8859-1' }, status: 200)
|
||||
end
|
||||
|
||||
let(:agent) do
|
||||
|
@ -283,6 +284,18 @@ describe Agents::RssAgent do
|
|||
expect(event.payload['links']).to eq([])
|
||||
end
|
||||
end
|
||||
|
||||
context 'with the encoding declared in both headers and the content' do
|
||||
before do
|
||||
@valid_options['url'] = 'http://example.org/iso-8859-1.rss'
|
||||
end
|
||||
|
||||
it "decodes the content properly" do
|
||||
agent.check
|
||||
event = agent.events.first
|
||||
expect(event.payload['title']).to eq('Mëkanïk Zaïn')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'logging errors with the feed url' do
|
||||
|
|
Loading…
Add table
Reference in a new issue