From cb0e8f68f93a9b354cb367e90711679217c9d671 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 07:30:08 +0900 Subject: [PATCH 1/7] Rename onethingwell.atom to .rss because it is actually an RSS file --- spec/data_fixtures/{onethingwell.atom => onethingwell.rss} | 0 spec/models/agents/rss_agent_spec.rb | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename spec/data_fixtures/{onethingwell.atom => onethingwell.rss} (100%) diff --git a/spec/data_fixtures/onethingwell.atom b/spec/data_fixtures/onethingwell.rss similarity index 100% rename from spec/data_fixtures/onethingwell.atom rename to spec/data_fixtures/onethingwell.rss diff --git a/spec/models/agents/rss_agent_spec.rb b/spec/models/agents/rss_agent_spec.rb index a0f7b90e..4a99cf66 100644 --- a/spec/models/agents/rss_agent_spec.rb +++ b/spec/models/agents/rss_agent_spec.rb @@ -9,7 +9,7 @@ describe Agents::RssAgent do stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200) stub_request(:any, /SlickdealsnetFP/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/slickdeals.atom")), :status => 200) - stub_request(:any, /onethingwell.org/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/onethingwell.atom")), :status => 200) + stub_request(:any, /onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")), status: 200) end let(:agent) do From 445665ee3a36a5d0791d6870605cea80880016c3 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 08:09:57 +0900 Subject: [PATCH 2/7] Add a failing test for #1753 --- spec/data_fixtures/onethingwell.rss | 1 + spec/models/agents/rss_agent_spec.rb | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/spec/data_fixtures/onethingwell.rss b/spec/data_fixtures/onethingwell.rss index b47e8586..ffea9a1a 100644 --- a/spec/data_fixtures/onethingwell.rss +++ b/spec/data_fixtures/onethingwell.rss @@ -15,6 +15,7 @@ csv crossplatform utilities + Gemini diff --git a/spec/models/agents/rss_agent_spec.rb b/spec/models/agents/rss_agent_spec.rb index 4a99cf66..67a4a520 100644 --- a/spec/models/agents/rss_agent_spec.rb +++ b/spec/models/agents/rss_agent_spec.rb @@ -251,6 +251,12 @@ describe Agents::RssAgent do expect(event.payload['enclosure']).to eq({ "url" => "http://c.1tw.org/images/2015/itsy.png", "type" => "image/png", "length" => "48249" }) expect(event.payload['image']).to eq("http://c.1tw.org/images/2015/itsy.png") end + + it "ignores an empty author" do + agent.check + event = agent.events.first + expect(event.payload['authors']).to eq([]) + end end describe 'logging errors with the feed url' do From 852f39d480db6aef93e14d958fb24b1921de5d26 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 08:11:10 +0900 Subject: [PATCH 3/7] Rescue error from Mail::Address#name and #address `Mail::Address.new('')` does not raise any error but calling `name` on the created instance does. --- lib/feedjira_extension.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/feedjira_extension.rb b/lib/feedjira_extension.rb index 1ff8e189..8e86f71f 100644 --- a/lib/feedjira_extension.rb +++ b/lib/feedjira_extension.rb @@ -45,8 +45,8 @@ module FeedjiraExtension rescue self.name = content else - self.name = addr.name - self.email = addr.address + self.name = addr.name rescue nil + self.email = addr.address rescue nil end end From e5c938aa85e57516fe25e846c1baf801924f6c68 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 08:12:48 +0900 Subject: [PATCH 4/7] Exclude empty entries from authors --- lib/feedjira_extension.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/feedjira_extension.rb b/lib/feedjira_extension.rb index 8e86f71f..8e802386 100644 --- a/lib/feedjira_extension.rb +++ b/lib/feedjira_extension.rb @@ -8,6 +8,10 @@ module FeedjiraExtension ENCLOSURE_ATTRS = %i[url type length] class Author < Struct.new(*AUTHOR_ATTRS) + def empty? + all?(&:nil?) + end + def to_json(options = nil) members.flat_map { |key| if value = self[key].presence @@ -110,10 +114,14 @@ module FeedjiraExtension ].each do |name| sax_config.top_level_elements[name].clear - elements name, class: RssAuthor, as: :authors + elements name, class: RssAuthor, as: :_authors end else - elements :author, class: AtomAuthor, as: :authors + elements :author, class: AtomAuthor, as: :_authors + end + + def authors + _authors.reject(&:empty?) end def alternate_link From 5f5e24655243c22a71df315b3b9bcd36a0ce4fc3 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 08:13:43 +0900 Subject: [PATCH 5/7] Use Struct#each_pair --- lib/feedjira_extension.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/feedjira_extension.rb b/lib/feedjira_extension.rb index 8e802386..3cc9209f 100644 --- a/lib/feedjira_extension.rb +++ b/lib/feedjira_extension.rb @@ -13,8 +13,8 @@ module FeedjiraExtension end def to_json(options = nil) - members.flat_map { |key| - if value = self[key].presence + each_pair.flat_map { |key, value| + if value.presence case key when :email "<#{value}>" From 2bb97b53bc2ded964fcf8fcc98465ae0a2775f3c Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 09:06:00 +0900 Subject: [PATCH 6/7] Add failing specs for empty `` elements --- spec/models/agents/rss_agent_spec.rb | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/spec/models/agents/rss_agent_spec.rb b/spec/models/agents/rss_agent_spec.rb index 67a4a520..0d01cea8 100644 --- a/spec/models/agents/rss_agent_spec.rb +++ b/spec/models/agents/rss_agent_spec.rb @@ -8,8 +8,10 @@ describe Agents::RssAgent do } stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200) + stub_request(:any, /bad.github.com/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")).gsub(/]+\/>/, ''), status: 200) stub_request(:any, /SlickdealsnetFP/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/slickdeals.atom")), :status => 200) stub_request(:any, /onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")), status: 200) + stub_request(:any, /bad.onethingwell.org/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/onethingwell.rss")).gsub(/(?<=)[^<]*/, ''), status: 200) end let(:agent) do @@ -257,6 +259,30 @@ describe Agents::RssAgent do event = agent.events.first expect(event.payload['authors']).to eq([]) end + + context 'with an empty link in RSS' do + before do + @valid_options['url'] = 'http://bad.onethingwell.org/rss' + end + + it "does not leak :no_buffer" do + agent.check + event = agent.events.first + expect(event.payload['links']).to eq([]) + end + end + + context 'with an empty link in RSS' do + before do + @valid_options['url'] = "https://bad.github.com/cantino/huginn/commits/master.atom" + end + + it "does not leak :no_buffer" do + agent.check + event = agent.events.first + expect(event.payload['links']).to eq([]) + end + end end describe 'logging errors with the feed url' do From 4d101327098f10c8a8623e071e70d22c23d2c130 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Thu, 27 Oct 2016 09:06:57 +0900 Subject: [PATCH 7/7] Fix a bug where an empty `` is wrongly parsed Due to a problem in sax-machine's internals, an empty `` in RSS would be parsed to JSON as `{ "href": "no_buffer" }`. Now empty `` elements in RSS and ATOM are simply ignored just like other collection elements like ``. --- lib/feedjira_extension.rb | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/lib/feedjira_extension.rb b/lib/feedjira_extension.rb index 3cc9209f..a9a9ac11 100644 --- a/lib/feedjira_extension.rb +++ b/lib/feedjira_extension.rb @@ -80,6 +80,12 @@ module FeedjiraExtension attribute attr end + def empty? + LINK_ATTRS.all? { |attr| + __send__(attr).nil? + } + end + def to_json(options = nil) LINK_ATTRS.each_with_object({}) { |key, hash| if value = __send__(key) @@ -94,10 +100,20 @@ module FeedjiraExtension value :href + def empty? + !href.is_a?(String) + end + def to_json(options = nil) - { - href: href - }.to_json(options) + case href + when String + { href: href } + else + # Ignore non-string values, because SaxMachine leaks its + # internal value :no_buffer when the content of an element + # is empty. + {} + end.to_json(options) end end @@ -174,14 +190,18 @@ module FeedjiraExtension when /FeedBurner/ elements :'atok10:link', class: AtomLink, as: :atom_links - def links - @links ||= [*rss_links, *atom_links] - end + def _links + [*rss_links, *atom_links] + end else - alias_method :links, :rss_links + alias_method :_links, :rss_links end else - elements :link, class: AtomLink, as: :links + elements :link, class: AtomLink, as: :_links + end + + def links + _links.reject(&:empty?) end def alternate_link