mirror of
https://github.com/Fishwaldo/huginn.git
synced 2025-03-18 21:01:30 +00:00
Merge pull request #866 from cantino/website_agent-url_on_receive
Add a url_from_event option to WebsiteAgent
This commit is contained in:
commit
caa2132b99
3 changed files with 21 additions and 3 deletions
|
@ -1,5 +1,6 @@
|
|||
# Changes
|
||||
|
||||
* Jun 19, 2015 - Add `url_from_event` to WebsiteAgent.
|
||||
* Jun 17, 2015 - RssAgent emits events for new feed items in chronological order.
|
||||
* Jun 15, 2015 - Liquid filter `uri_expand` added.
|
||||
* Jun 12, 2015 - RSSAgent can now accept an array of URLs.
|
||||
|
|
|
@ -19,7 +19,7 @@ module Agents
|
|||
|
||||
`url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
|
||||
|
||||
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
|
||||
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_from_event` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values.
|
||||
|
||||
# Supported Document Types
|
||||
|
||||
|
@ -135,7 +135,8 @@ module Agents
|
|||
|
||||
def validate_options
|
||||
# Check for required fields
|
||||
errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
|
||||
errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present?
|
||||
errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
|
||||
if !options['extract'].present? && extraction_type != "json"
|
||||
errors.add(:base, "extract is required for all types except json")
|
||||
end
|
||||
|
@ -257,7 +258,12 @@ module Agents
|
|||
def receive(incoming_events)
|
||||
incoming_events.each do |event|
|
||||
interpolate_with(event) do
|
||||
url_to_scrape = event.payload['url']
|
||||
url_to_scrape =
|
||||
if url_template = options['url_from_event'].presence
|
||||
interpolate_string(url_template)
|
||||
else
|
||||
event.payload['url']
|
||||
end
|
||||
check_url(url_to_scrape,
|
||||
interpolated['mode'].to_s == "merge" ? event.payload : {})
|
||||
end
|
||||
|
|
|
@ -633,6 +633,17 @@ fire: hot
|
|||
}.to change { Event.count }.by(1)
|
||||
end
|
||||
|
||||
it "should use url_from_event as url to scrape if it exists when receiving an event" do
|
||||
stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
|
||||
|
||||
@checker.options = @valid_options.merge(
|
||||
'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
|
||||
)
|
||||
@checker.receive([@event])
|
||||
|
||||
expect(stub).to have_been_requested
|
||||
end
|
||||
|
||||
it "should interpolate values from incoming event payload" do
|
||||
expect {
|
||||
@valid_options['extract'] = {
|
||||
|
|
Loading…
Add table
Reference in a new issue