Merge pull request #1754 from cantino/ignore_empty_author

Ignore empty author and link entries in RssAgent.

Fixes #1753.
This commit is contained in:
Akinori MUSHA 2016-10-27 13:07:21 +09:00 committed by GitHub
commit 4f93db60e7
3 changed files with 76 additions and 15 deletions

View file

@ -8,9 +8,13 @@ module FeedjiraExtension
ENCLOSURE_ATTRS = %i[url type length]
class Author < Struct.new(*AUTHOR_ATTRS)
def empty?
all?(&:nil?)
end
def to_json(options = nil)
members.flat_map { |key|
if value = self[key].presence
each_pair.flat_map { |key, value|
if value.presence
case key
when :email
"<#{value}>"
@ -45,8 +49,8 @@ module FeedjiraExtension
rescue
self.name = content
else
self.name = addr.name
self.email = addr.address
self.name = addr.name rescue nil
self.email = addr.address rescue nil
end
end
@ -76,6 +80,12 @@ module FeedjiraExtension
attribute attr
end
def empty?
LINK_ATTRS.all? { |attr|
__send__(attr).nil?
}
end
def to_json(options = nil)
LINK_ATTRS.each_with_object({}) { |key, hash|
if value = __send__(key)
@ -90,10 +100,20 @@ module FeedjiraExtension
value :href
def empty?
!href.is_a?(String)
end
def to_json(options = nil)
{
href: href
}.to_json(options)
case href
when String
{ href: href }
else
# Ignore non-string values, because SaxMachine leaks its
# internal value :no_buffer when the content of an element
# is empty.
{}
end.to_json(options)
end
end
@ -110,10 +130,14 @@ module FeedjiraExtension
].each do |name|
sax_config.top_level_elements[name].clear
elements name, class: RssAuthor, as: :authors
elements name, class: RssAuthor, as: :_authors
end
else
elements :author, class: AtomAuthor, as: :authors
elements :author, class: AtomAuthor, as: :_authors
end
def authors
_authors.reject(&:empty?)
end
def alternate_link
@ -166,14 +190,18 @@ module FeedjiraExtension
when /FeedBurner/
elements :'atok10:link', class: AtomLink, as: :atom_links
def links
@links ||= [*rss_links, *atom_links]
end
def _links
[*rss_links, *atom_links]
end
else
alias_method :links, :rss_links
alias_method :_links, :rss_links
end
else
elements :link, class: AtomLink, as: :links
elements :link, class: AtomLink, as: :_links
end
def links
_links.reject(&:empty?)
end
def alternate_link

View file

@ -15,6 +15,7 @@
<category>csv</category>
<category>crossplatform</category>
<category>utilities</category>
<dc:creator></dc:creator>
</item>
<item>
<title>Gemini</title>

View file

@ -8,8 +8,10 @@ describe Agents::RssAgent do
}
stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200)
stub_request(:any, /bad.github.com/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")).gsub(/<link [^>]+\/>/, '<link/>'), status: 200)
stub_request(:any, /SlickdealsnetFP/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/slickdeals.atom")), :status => 200)
stub_request(:any, /onethingwell.org/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/onethingwell.atom")), :status => 200)
stub_request(:any, /onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")), status: 200)
stub_request(:any, /bad.onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")).gsub(/(?<=<link>)[^<]*/, ''), status: 200)
end
let(:agent) do
@ -251,6 +253,36 @@ describe Agents::RssAgent do
expect(event.payload['enclosure']).to eq({ "url" => "http://c.1tw.org/images/2015/itsy.png", "type" => "image/png", "length" => "48249" })
expect(event.payload['image']).to eq("http://c.1tw.org/images/2015/itsy.png")
end
it "ignores an empty author" do
agent.check
event = agent.events.first
expect(event.payload['authors']).to eq([])
end
context 'with an empty link in RSS' do
before do
@valid_options['url'] = 'http://bad.onethingwell.org/rss'
end
it "does not leak :no_buffer" do
agent.check
event = agent.events.first
expect(event.payload['links']).to eq([])
end
end
context 'with an empty link in RSS' do
before do
@valid_options['url'] = "https://bad.github.com/cantino/huginn/commits/master.atom"
end
it "does not leak :no_buffer" do
agent.check
event = agent.events.first
expect(event.payload['links']).to eq([])
end
end
end
describe 'logging errors with the feed url' do