mirror of
https://github.com/Fishwaldo/huginn.git
synced 2025-03-16 03:41:41 +00:00
in website agent with type json, allow extract to be blank; in which case, the entire json object will be stored as the payload
This commit is contained in:
parent
aa8cea24de
commit
43194c3c1b
2 changed files with 71 additions and 34 deletions
|
@ -61,7 +61,10 @@ module Agents
|
|||
end
|
||||
|
||||
def validate_options
|
||||
errors.add(:base, "url, expected_update_period_in_days, and extract are required") unless options[:expected_update_period_in_days].present? && options[:url].present? && options[:extract].present?
|
||||
errors.add(:base, "url and expected_update_period_in_days are required") unless options[:expected_update_period_in_days].present? && options[:url].present?
|
||||
if !options[:extract].present? && options[:type] != "json"
|
||||
errors.add(:base, "extract is required for all types except json")
|
||||
end
|
||||
end
|
||||
|
||||
def check
|
||||
|
@ -74,45 +77,54 @@ module Agents
|
|||
request.on_success do |response|
|
||||
doc = parse(response.body)
|
||||
output = {}
|
||||
options[:extract].each do |name, extraction_details|
|
||||
result = if extraction_type == "json"
|
||||
output[name] = Utils.values_at(doc, extraction_details[:path])
|
||||
else
|
||||
output[name] = doc.css(extraction_details[:css]).map { |node|
|
||||
if extraction_details[:attr]
|
||||
node.attr(extraction_details[:attr])
|
||||
elsif extraction_details[:text]
|
||||
node.text()
|
||||
else
|
||||
error ":attr or :text is required on HTML or XML extraction patterns"
|
||||
return
|
||||
end
|
||||
}
|
||||
end
|
||||
log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
|
||||
end
|
||||
|
||||
num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
|
||||
|
||||
if num_unique_lengths.length != 1
|
||||
error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
|
||||
return
|
||||
end
|
||||
|
||||
previous_payloads = events.order("id desc").limit(UNIQUENESS_LOOK_BACK).pluck(:payload).map(&:to_json) if options[:mode].to_s == "on_change"
|
||||
num_unique_lengths.first.times do |index|
|
||||
result = {}
|
||||
options[:extract].keys.each do |name|
|
||||
result[name] = output[name][index]
|
||||
if name.to_s == 'url'
|
||||
result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
|
||||
end
|
||||
end
|
||||
|
||||
if extraction_type == "json" && !options[:extract].present?
|
||||
result = doc
|
||||
if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json))
|
||||
log "Storing new result for '#{name}': #{result.inspect}"
|
||||
create_event :payload => result
|
||||
end
|
||||
else
|
||||
options[:extract].each do |name, extraction_details|
|
||||
result = if extraction_type == "json"
|
||||
output[name] = Utils.values_at(doc, extraction_details[:path])
|
||||
else
|
||||
output[name] = doc.css(extraction_details[:css]).map { |node|
|
||||
if extraction_details[:attr]
|
||||
node.attr(extraction_details[:attr])
|
||||
elsif extraction_details[:text]
|
||||
node.text()
|
||||
else
|
||||
error ":attr or :text is required on HTML or XML extraction patterns"
|
||||
return
|
||||
end
|
||||
}
|
||||
end
|
||||
log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
|
||||
end
|
||||
|
||||
num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
|
||||
|
||||
if num_unique_lengths.length != 1
|
||||
error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
|
||||
return
|
||||
end
|
||||
|
||||
num_unique_lengths.first.times do |index|
|
||||
result = {}
|
||||
options[:extract].keys.each do |name|
|
||||
result[name] = output[name][index]
|
||||
if name.to_s == 'url'
|
||||
result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
|
||||
end
|
||||
end
|
||||
|
||||
if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json))
|
||||
log "Storing new result for '#{name}': #{result.inspect}"
|
||||
create_event :payload => result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
hydra.queue request
|
||||
|
|
|
@ -155,6 +155,31 @@ describe Agents::WebsiteAgent do
|
|||
event.payload[:version].should == 2
|
||||
event.payload[:title].should == "first"
|
||||
end
|
||||
|
||||
it "stores the whole object if :extract is not specified" do
|
||||
json = {
|
||||
:response => {
|
||||
:version => 2,
|
||||
:title => "hello!"
|
||||
}
|
||||
}
|
||||
stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
|
||||
site = {
|
||||
:name => "Some JSON Response",
|
||||
:expected_update_period_in_days => 2,
|
||||
:type => "json",
|
||||
:url => "http://json-site.com",
|
||||
:mode => :on_change
|
||||
}
|
||||
checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
|
||||
checker.user = users(:bob)
|
||||
checker.save!
|
||||
|
||||
checker.check
|
||||
event = Event.last
|
||||
event.payload[:response][:version].should == 2
|
||||
event.payload[:response][:title].should == "hello!"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue