From 276420866e42b3708e3cdfdca2237d8c716e522a Mon Sep 17 00:00:00 2001
From: Akinori MUSHA <knu@idaemons.org>
Date: Thu, 18 Jun 2015 22:28:09 +0900
Subject: [PATCH 1/4] Add a url_on_receive option to WebsiteAgent

This option allows a WebsiteAgent to build a URL using Liquid templating
from an incoming event.
---
 app/models/agents/website_agent.rb | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/app/models/agents/website_agent.rb b/app/models/agents/website_agent.rb
index 9e9e7db6..02434951 100644
--- a/app/models/agents/website_agent.rb
+++ b/app/models/agents/website_agent.rb
@@ -19,7 +19,7 @@ module Agents
 
       `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
 
-      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
+      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_on_receive` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
 
       # Supported Document Types
 
@@ -135,7 +135,8 @@ module Agents
 
     def validate_options
       # Check for required fields
-      errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
+      errors.add(:base, "either url or url_on_receive is required") unless options['url'].present? || options['url_on_receive'].present?
+      errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
       if !options['extract'].present? && extraction_type != "json"
         errors.add(:base, "extract is required for all types except json")
       end
@@ -257,7 +258,12 @@ module Agents
     def receive(incoming_events)
       incoming_events.each do |event|
         interpolate_with(event) do
-          url_to_scrape = event.payload['url']
+          url_to_scrape =
+            if url_template = options['url_on_receive'].presence
+              interpolate_string(url_template)
+            else
+              event.payload['url']
+            end
           check_url(url_to_scrape,
                     interpolated['mode'].to_s == "merge" ? event.payload : {})
         end

From 1e336f029e8c272f33f435ba10c8c73e9f7063c4 Mon Sep 17 00:00:00 2001
From: Akinori MUSHA <knu@idaemons.org>
Date: Fri, 19 Jun 2015 15:02:14 +0900
Subject: [PATCH 2/4] Add a spec for the url_on_receive option

---
 spec/models/agents/website_agent_spec.rb | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/spec/models/agents/website_agent_spec.rb b/spec/models/agents/website_agent_spec.rb
index 21ff2bf7..7fb5478c 100644
--- a/spec/models/agents/website_agent_spec.rb
+++ b/spec/models/agents/website_agent_spec.rb
@@ -633,6 +633,17 @@ fire: hot
         }.to change { Event.count }.by(1)
       end
 
+      it "should use url_on_receive as url to scrape if it exists when receiving an event" do
+        stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
+
+        @checker.options = @valid_options.merge(
+          'url_on_receive' => 'http://example.org/?url={{url | uri_escape}}'
+        )
+        @checker.receive([@event])
+
+        expect(stub).to have_been_requested
+      end
+
       it "should interpolate values from incoming event payload" do
         expect {
           @valid_options['extract'] = {

From 130ca6c9af05c37ba69759d86d29dff71ffebec6 Mon Sep 17 00:00:00 2001
From: Akinori MUSHA <knu@idaemons.org>
Date: Fri, 19 Jun 2015 17:43:16 +0900
Subject: [PATCH 3/4] Rename the option name to `url_from_event`, suggested by
 @cantino

---
 app/models/agents/website_agent.rb       | 6 +++---
 spec/models/agents/website_agent_spec.rb | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/models/agents/website_agent.rb b/app/models/agents/website_agent.rb
index 02434951..a97b16d4 100644
--- a/app/models/agents/website_agent.rb
+++ b/app/models/agents/website_agent.rb
@@ -19,7 +19,7 @@ module Agents
 
       `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
 
-      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_on_receive` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
+      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_from_event` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
 
       # Supported Document Types
 
@@ -135,7 +135,7 @@ module Agents
 
     def validate_options
       # Check for required fields
-      errors.add(:base, "either url or url_on_receive is required") unless options['url'].present? || options['url_on_receive'].present?
+      errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present?
       errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
       if !options['extract'].present? && extraction_type != "json"
         errors.add(:base, "extract is required for all types except json")
@@ -259,7 +259,7 @@ module Agents
       incoming_events.each do |event|
         interpolate_with(event) do
           url_to_scrape =
-            if url_template = options['url_on_receive'].presence
+            if url_template = options['url_from_event'].presence
               interpolate_string(url_template)
             else
               event.payload['url']
diff --git a/spec/models/agents/website_agent_spec.rb b/spec/models/agents/website_agent_spec.rb
index 7fb5478c..077fba8d 100644
--- a/spec/models/agents/website_agent_spec.rb
+++ b/spec/models/agents/website_agent_spec.rb
@@ -633,11 +633,11 @@ fire: hot
         }.to change { Event.count }.by(1)
       end
 
-      it "should use url_on_receive as url to scrape if it exists when receiving an event" do
+      it "should use url_from_event as url to scrape if it exists when receiving an event" do
         stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
 
         @checker.options = @valid_options.merge(
-          'url_on_receive' => 'http://example.org/?url={{url | uri_escape}}'
+          'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
         )
         @checker.receive([@event])
 

From e6670903d670b64dfe425b6a2b02565e9263a34a Mon Sep 17 00:00:00 2001
From: Akinori MUSHA <knu@idaemons.org>
Date: Fri, 19 Jun 2015 17:47:07 +0900
Subject: [PATCH 4/4] Add an entry to CHANGES.md

---
 CHANGES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGES.md b/CHANGES.md
index 2c170157..8aca9838 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,6 @@
 # Changes
 
+* Jun 19, 2015   - Add `url_from_event` to WebsiteAgent.
 * Jun 17, 2015   - RssAgent emits events for new feed items in chronological order.
 * Jun 15, 2015   - Liquid filter `uri_expand` added.
 * Jun 12, 2015   - RSSAgent can now accept an array of URLs.