Merge pull request #866 from cantino/website_agent-url_on_receive

Add a url_from_event option to WebsiteAgent
This commit is contained in:
Akinori MUSHA 2015-06-19 17:53:21 +09:00
commit caa2132b99
3 changed files with 21 additions and 3 deletions

View file

@ -1,5 +1,6 @@
# Changes
* Jun 19, 2015 - Add `url_from_event` to WebsiteAgent.
* Jun 17, 2015 - RssAgent emits events for new feed items in chronological order.
* Jun 15, 2015 - Liquid filter `uri_expand` added.
* Jun 12, 2015 - RSSAgent can now accept an array of URLs.

View file

@ -19,7 +19,7 @@ module Agents
`url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_from_event` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
# Supported Document Types
@ -135,7 +135,8 @@ module Agents
def validate_options
# Check for required fields
errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present?
errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
if !options['extract'].present? && extraction_type != "json"
errors.add(:base, "extract is required for all types except json")
end
@ -257,7 +258,12 @@ module Agents
def receive(incoming_events)
incoming_events.each do |event|
interpolate_with(event) do
url_to_scrape = event.payload['url']
url_to_scrape =
if url_template = options['url_from_event'].presence
interpolate_string(url_template)
else
event.payload['url']
end
check_url(url_to_scrape,
interpolated['mode'].to_s == "merge" ? event.payload : {})
end

View file

@ -633,6 +633,17 @@ fire: hot
}.to change { Event.count }.by(1)
end
it "should use url_from_event as url to scrape if it exists when receiving an event" do
stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
@checker.options = @valid_options.merge(
'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
)
@checker.receive([@event])
expect(stub).to have_been_requested
end
it "should interpolate values from incoming event payload" do
expect {
@valid_options['extract'] = {