mirror of
https://github.com/Fishwaldo/huginn.git
synced 2025-03-15 19:31:26 +00:00
Add a new option template
to WebsiteAgent
If given, it is used as a Liquid template for each event created by the Agent, instead of directly emitting the results of extraction as events. An existing spec needs to be fixed because WebsiteAgent now has the `template` option, which may not be a hash of hashes.
This commit is contained in:
parent
9a3290ef40
commit
fe35df8752
3 changed files with 67 additions and 12 deletions
|
@ -111,6 +111,17 @@ module Agents
|
|||
|
||||
Set `http_success_codes` to an array of status codes (e.g., `[404, 422]`) to treat HTTP response codes beyond 200 as successes.
|
||||
|
||||
If a `template` option is given, it is used as a Liquid template for each event created by this Agent, instead of directly emitting the results of extraction as events. In the template, keys of extracted data can be interpolated, and some additional variables are also available as explained in the next section. For example:
|
||||
|
||||
"template": {
|
||||
"url": "{{ url }}",
|
||||
"title": "{{ title }}",
|
||||
"description": "{{ body_text }}",
|
||||
"last_modified": "{{ _response_.headers.Last-Modified | date: '%FT%T' }}"
|
||||
}
|
||||
|
||||
In the `on_change` mode, change is detected based on the resulted event payload after applying this option. If you want to add some keys to each event but ignore any change in them, set `mode` to `all` and put a DeDuplicationAgent downstream.
|
||||
|
||||
# Liquid Templating
|
||||
|
||||
In Liquid templating, the following variable is available:
|
||||
|
@ -127,8 +138,10 @@ module Agents
|
|||
MD
|
||||
|
||||
event_description do
|
||||
keys = options['template'].presence || options['extract'].keys
|
||||
|
||||
"Events will have the following fields:\n\n %s" % [
|
||||
Utils.pretty_print(Hash[options['extract'].keys.map { |key|
|
||||
Utils.pretty_print(Hash[keys.map { |key|
|
||||
[key, "..."]
|
||||
}])
|
||||
]
|
||||
|
@ -157,6 +170,7 @@ module Agents
|
|||
errors.add(:base, "either url, url_from_event, or data_from_event are required") unless options['url'].present? || options['url_from_event'].present? || options['data_from_event'].present?
|
||||
errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
|
||||
validate_extract_options!
|
||||
validate_template_options!
|
||||
validate_http_success_codes!
|
||||
|
||||
# Check for optional fields
|
||||
|
@ -281,6 +295,15 @@ module Agents
|
|||
end
|
||||
end
|
||||
|
||||
def validate_template_options!
|
||||
template = options['template'].presence or return
|
||||
|
||||
unless Hash === template &&
|
||||
template.each_pair.all? { |key, value| String === value }
|
||||
errors.add(:base, 'template must be a hash of strings.')
|
||||
end
|
||||
end
|
||||
|
||||
def check
|
||||
check_urls(interpolated['url'])
|
||||
end
|
||||
|
@ -343,20 +366,33 @@ module Agents
|
|||
extract_xml(doc)
|
||||
end
|
||||
|
||||
num_unique_lengths = interpolated['extract'].keys.map { |name| output[name].length }.uniq
|
||||
|
||||
if num_unique_lengths.length != 1
|
||||
if output.each_value.each_cons(2).any? { |m, n| m.size != n.size }
|
||||
raise "Got an uneven number of matches for #{interpolated['name']}: #{interpolated['extract'].inspect}"
|
||||
end
|
||||
|
||||
old_events = previous_payloads num_unique_lengths.first
|
||||
num_unique_lengths.first.times do |index|
|
||||
result = {}
|
||||
interpolated['extract'].keys.each do |name|
|
||||
result[name] = output[name][index]
|
||||
if name.to_s == 'url' && url.present?
|
||||
result[name] = (url + Utils.normalize_uri(result[name])).to_s
|
||||
num_tuples = output.each_value.first.size
|
||||
|
||||
old_events = previous_payloads num_tuples
|
||||
|
||||
template = options['template'].presence
|
||||
|
||||
num_tuples.times do |index|
|
||||
extracted = {}
|
||||
interpolated['extract'].each_key do |name|
|
||||
extracted[name] = output[name][index]
|
||||
end
|
||||
|
||||
result =
|
||||
if template
|
||||
interpolate_with(extracted) do
|
||||
interpolate_options(template)
|
||||
end
|
||||
else
|
||||
extracted
|
||||
end
|
||||
|
||||
if payload_url = result['url'].presence
|
||||
result['url'] = (url + Utils.normalize_uri(payload_url)).to_s
|
||||
end
|
||||
|
||||
if store_payload!(old_events, result)
|
||||
|
|
|
@ -739,6 +739,22 @@ describe Agents::WebsiteAgent do
|
|||
expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
|
||||
end
|
||||
|
||||
it "should be formatted by template after extraction" do
|
||||
@valid_options['template'] = {
|
||||
'url' => '{{url}}',
|
||||
'title' => '{{title | upcase}}',
|
||||
'summary' => '{{title}}: {{hovertext | truncate: 20}}',
|
||||
}
|
||||
@checker.options = @valid_options
|
||||
@checker.check
|
||||
event = Event.last
|
||||
expect(event.payload).to eq({
|
||||
'title' => 'EVOLVING',
|
||||
'url' => 'http://imgs.xkcd.com/comics/evolving.png',
|
||||
'summary' => 'Evolving: Biologists play r...',
|
||||
})
|
||||
end
|
||||
|
||||
describe "XML" do
|
||||
before do
|
||||
stub_request(:any, /github_rss/).to_return(
|
||||
|
|
|
@ -130,7 +130,10 @@ shared_examples_for AgentControllerConcern do
|
|||
end
|
||||
|
||||
it "should configure targets with nested objects" do
|
||||
agent.control_targets << agents(:bob_data_output_agent)
|
||||
agent.control_targets = [
|
||||
agents(:bob_basecamp_agent), # does not support a `template` option, but anyway
|
||||
agents(:bob_data_output_agent)
|
||||
]
|
||||
agent.options['action'] = 'configure'
|
||||
agent.options['configure_options'] = {
|
||||
template: {
|
||||
|
|
Loading…
Add table
Reference in a new issue