Merge pull request #1301 from kreuzwerker/feature/file-handling

Introduce concept to handle files
This commit is contained in:
Dominik Sander 2016-03-18 20:57:50 +01:00
commit 9a588e080b
20 changed files with 1799 additions and 31 deletions

View file

@ -63,6 +63,12 @@ gem 'haversine'
gem 'omniauth-evernote'
gem 'evernote_oauth'
# LocalFileAgent (watch functionality)
gem 'listen', '~> 3.0.5', require: false
# S3Agent
gem 'aws-sdk-core', '~> 2.2.15'
# Optional Services.
gem 'omniauth-37signals' # BasecampAgent
gem 'omniauth-wunderlist', github: 'wunderlist/omniauth-wunderlist', ref: 'd0910d0396107b9302aa1bc50e74bb140990ccb8'
@ -75,7 +81,6 @@ unless Gem::Version.new(Bundler::VERSION) >= Gem::Version.new('1.5.0')
end
gem 'protected_attributes', '~>1.0.8' # This must be loaded before some other gems, like delayed_job.
gem 'ace-rails-ap', '~> 2.0.1'
gem 'bootstrap-kaminari-views', '~> 0.0.3'
gem 'bundler', '>= 1.5.0'

View file

@ -110,6 +110,8 @@ GEM
addressable (>= 2.3.1)
extlib (>= 0.9.15)
multi_json (>= 1.0.0)
aws-sdk-core (2.2.15)
jmespath (~> 1.0)
bcrypt (3.1.10)
better_errors (1.1.0)
coderay (>= 1.0.0)
@ -279,6 +281,7 @@ GEM
hypdf (1.0.7)
httmultiparty (= 0.3.10)
i18n (0.7.0)
jmespath (1.1.3)
jquery-rails (3.1.3)
railties (>= 3.0, < 5.0)
thor (>= 0.14, < 2.0)
@ -577,6 +580,7 @@ PLATFORMS
DEPENDENCIES
ace-rails-ap (~> 2.0.1)
aws-sdk-core (~> 2.2.15)
better_errors (~> 1.1)
binding_of_caller
bootstrap-kaminari-views (~> 0.0.3)
@ -621,6 +625,7 @@ DEPENDENCIES
kramdown (~> 1.3.3)
letter_opener_web
liquid (~> 3.0.3)
listen (~> 3.0.5)
mini_magick
mqtt
multi_xml

View file

@ -0,0 +1,58 @@
module FileHandling
extend ActiveSupport::Concern
def get_file_pointer(file)
{ file_pointer: { file: file, agent_id: id } }
end
def get_io(event)
return nil unless event.payload['file_pointer'] &&
event.payload['file_pointer']['file'] &&
event.payload['file_pointer']['agent_id']
event.user.agents.find(event.payload['file_pointer']['agent_id']).get_io(event.payload['file_pointer']['file'])
end
def emitting_file_handling_agent_description
@emitting_file_handling_agent_description ||=
"This agent only emits a 'file pointer', not the data inside the files, the following agents can consume the created events: `#{receiving_file_handling_agents.join('`, `')}`. Read more about the concept in the [wiki](https://github.com/cantino/huginn/wiki/How-Huginn-works-with-files)."
end
def receiving_file_handling_agent_description
@receiving_file_handling_agent_description ||=
"This agent can consume a 'file pointer' event from the following agents with no additional configuration: `#{emitting_file_handling_agents.join('`, `')}`. Read more about the concept in the [wiki](https://github.com/cantino/huginn/wiki/How-Huginn-works-with-files)."
end
private
def emitting_file_handling_agents
emitting_file_handling_agents = file_handling_agents.select { |a| a.emits_file_pointer? }
emitting_file_handling_agents.map { |a| a.to_s.demodulize }
end
def receiving_file_handling_agents
receiving_file_handling_agents = file_handling_agents.select { |a| a.consumes_file_pointer? }
receiving_file_handling_agents.map { |a| a.to_s.demodulize }
end
def file_handling_agents
@file_handling_agents ||= Agent.types.select{ |c| c.included_modules.include?(FileHandling) }.map { |d| d.name.constantize }
end
module ClassMethods
def emits_file_pointer!
@emits_file_pointer = true
end
def emits_file_pointer?
!!@emits_file_pointer
end
def consumes_file_pointer!
@consumes_file_pointer = true
end
def consumes_file_pointer?
!!@consumes_file_pointer
end
end
end

View file

@ -12,4 +12,8 @@ module WorkingHelpers
def received_event_without_error?
(last_receive_at.present? && last_error_log_at.blank?) || (last_receive_at.present? && last_error_log_at.present? && last_receive_at > last_error_log_at)
end
end
def checked_without_error?
(last_check_at.present? && last_error_log_at.nil?) || (last_check_at.present? && last_error_log_at.present? && last_check_at > last_error_log_at)
end
end

View file

@ -0,0 +1,195 @@
module Agents
class CsvAgent < Agent
include FormConfigurable
include FileHandling
cannot_be_scheduled!
consumes_file_pointer!
def default_options
{
'mode' => 'parse',
'separator' => ',',
'use_fields' => '',
'output' => 'event_per_row',
'with_header' => 'true',
'data_path' => '$.data',
'data_key' => 'data'
}
end
description do
<<-MD
The `CsvAgent` parses or serializes CSV data. When parsing, events can either be emitted for the entire CSV, or one per row.
Set `mode` to `parse` to parse CSV from incoming event, when set to `serialize` the agent serilizes the data of events to CSV.
### Universal options
Specify the `separator` which is used to seperate the fields from each other (default is `,`).
`data_key` sets the key which contains the serialized CSV or parsed CSV data in emitted events.
### Parsing
If `use_fields` is set to a comma seperated string and the CSV file contains field headers the agent will only extract the specified fields.
`output` determines wheather one event per row is emitted or one event that includes all the rows.
Set `with_header` to `true` if first line of the CSV file are field names.
#{receiving_file_handling_agent_description}
When receiving the CSV data in a regular event use [JSONPath](http://goessner.net/articles/JsonPath/) to select the path in `data_path`. `data_path` is only used when the received event does not contain a 'file pointer'.
### Serializing
If `use_fields` is set to a comma seperated string and the first received event has a object at the specified `data_path` the generated CSV will only include the given fields.
Set `with_header` to `true` to include a field header in the CSV.
Use [JSONPath](http://goessner.net/articles/JsonPath/) in `data_path` to select with part of the received events should be serialized.
MD
end
event_description do
"Events will looks like this:\n\n %s" % if interpolated['mode'] == 'parse'
rows = if boolify(interpolated['with_header'])
[{'column' => 'row1 value1', 'column2' => 'row1 value2'}, {'column' => 'row2 value3', 'column2' => 'row2 value4'}]
else
[['row1 value1', 'row1 value2'], ['row2 value1', 'row2 value2']]
end
if interpolated['output'] == 'event_per_row'
Utils.pretty_print(interpolated['data_key'] => rows[0])
else
Utils.pretty_print(interpolated['data_key'] => rows)
end
else
Utils.pretty_print(interpolated['data_key'] => '"generated","csv","data"' + "\n" + '"column1","column2","column3"')
end
end
form_configurable :mode, type: :array, values: %w(parse serialize)
form_configurable :separator, type: :string
form_configurable :data_key, type: :string
form_configurable :with_header, type: :boolean
form_configurable :use_fields, type: :string
form_configurable :output, type: :array, values: %w(event_per_row event_per_file)
form_configurable :data_path, type: :string
def validate_options
if options['with_header'].blank? || ![true, false].include?(boolify(options['with_header']))
errors.add(:base, "The 'with_header' options is required and must be set to 'true' or 'false'")
end
if options['mode'] == 'serialize' && options['data_path'].blank?
errors.add(:base, "When mode is set to serialize data_path has to be present.")
end
end
def working?
received_event_without_error?
end
def receive(incoming_events)
case options['mode']
when 'parse'
parse(incoming_events)
when 'serialize'
serialize(incoming_events)
end
end
private
def serialize(incoming_events)
mo = interpolated(incoming_events.first)
rows = rows_from_events(incoming_events, mo)
csv = CSV.generate(col_sep: separator(mo), force_quotes: true ) do |csv|
if boolify(mo['with_header']) && rows.first.is_a?(Hash)
if mo['use_fields'].present?
csv << extract_options(mo)
else
csv << rows.first.keys
end
end
rows.each do |data|
if data.is_a?(Hash)
if mo['use_fields'].present?
csv << data.extract!(*extract_options(mo)).values
else
csv << data.values
end
else
csv << data
end
end
end
create_event payload: { mo['data_key'] => csv }
end
def rows_from_events(incoming_events, mo)
[].tap do |rows|
incoming_events.each do |event|
data = Utils.value_at(event.payload, mo['data_path'])
if data.is_a?(Array) && (data[0].is_a?(Array) || data[0].is_a?(Hash))
data.each { |row| rows << row }
else
rows << data
end
end
end
end
def parse(incoming_events)
incoming_events.each do |event|
mo = interpolated(event)
next unless io = local_get_io(event)
if mo['output'] == 'event_per_row'
parse_csv(io, mo) do |payload|
create_event payload: { mo['data_key'] => payload }
end
else
create_event payload: { mo['data_key'] => parse_csv(io, mo, []) }
end
end
end
def local_get_io(event)
if io = get_io(event)
io
else
Utils.value_at(event.payload, interpolated['data_path'])
end
end
def parse_csv(io, mo, array = nil)
CSV.new(io, col_sep: separator(mo), headers: boolify(mo['with_header'])).each do |row|
if block_given?
yield get_payload(row, mo)
else
array << get_payload(row, mo)
end
end
array
end
def separator(mo)
mo['separator'] == '\\t' ? "\t" : mo['separator']
end
def get_payload(row, mo)
if boolify(mo['with_header'])
if mo['use_fields'].present?
row.to_hash.extract!(*extract_options(mo))
else
row.to_hash
end
else
row
end
end
def extract_options(mo)
mo['use_fields'].split(',').map(&:strip)
end
end
end

View file

@ -3,23 +3,42 @@ require 'time'
module Agents
class FtpsiteAgent < Agent
cannot_receive_events!
include FileHandling
default_schedule "every_12h"
gem_dependency_check { defined?(Net::FTP) && defined?(Net::FTP::List) }
description <<-MD
The FTP Site Agent checks an FTP site and creates Events based on newly uploaded files in a directory.
emits_file_pointer!
#{'## Include `net-ftp-list` in your Gemfile to use this Agent!' if dependencies_missing?}
description do
<<-MD
The Ftp Site Agent checks an FTP site and creates Events based on newly uploaded files in a directory. When receiving events it creates files on the configured FTP server.
#{'## Include `net-ftp-list` in your Gemfile to use this Agent!' if dependencies_missing?}
Specify a `url` that represents a directory of an FTP site to watch, and a list of `patterns` to match against file names.
`mode` must be present and either `read` or `write`, in `read` mode the agent checks the FTP site for changed files, with `write` it writes received events to a file on the server.
Login credentials can be included in `url` if authentication is required.
### Universal options
Only files with a last modification time later than the `after` value, if specifed, are notified.
MD
Specify a `url` that represents a directory of an FTP site to watch, and a list of `patterns` to match against file names.
Login credentials can be included in `url` if authentication is required: `ftp://username:password@ftp.example.com/path`. Liquid formatting is supported as well: `ftp://{% credential ftp_credentials %}@ftp.example.com/`
Optionally specify the encoding of the files you want to read/write in `force_encoding`, by default UTF-8 is used.
### Reading
Only files with a last modification time later than the `after` value, if specifed, are emitted as event.
### Writing
Specify the filename to use in `filename`, Liquid interpolation is possible to change the name per event.
Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) templating in `data` to specify which part of the received event should be written.
#{emitting_file_handling_agent_description}
MD
end
event_description <<-MD
Events look like this:
@ -32,42 +51,67 @@ module Agents
MD
def working?
event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
if interpolated['mode'] == 'read'
event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
else
received_event_without_error?
end
end
def default_options
{
'mode' => 'read',
'expected_update_period_in_days' => "1",
'url' => "ftp://example.org/pub/releases/",
'patterns' => [
'foo-*.tar.gz',
],
'after' => Time.now.iso8601,
'force_encoding' => '',
'filename' => '',
'data' => '{{ data }}'
}
end
def validate_options
# Check for required fields
begin
url = options['url']
String === url or raise
uri = URI(url)
URI::FTP === uri or raise
errors.add(:base, "url must end with a slash") unless uri.path.end_with?('/')
if !options['url'].include?('{{')
url = interpolated['url']
String === url or raise
uri = URI(url)
URI::FTP === uri or raise
errors.add(:base, "url must end with a slash") if uri.path.present? && !uri.path.end_with?('/')
end
rescue
errors.add(:base, "url must be a valid FTP URL")
end
patterns = options['patterns']
case patterns
when Array
if patterns.empty?
errors.add(:base, "patterns must not be empty")
options['mode'] = 'read' if options['mode'].blank? && new_record?
if options['mode'].blank? || !['read', 'write'].include?(options['mode'])
errors.add(:base, "The 'mode' option is required and must be set to 'read' or 'write'")
end
case interpolated['mode']
when 'read'
patterns = options['patterns']
case patterns
when Array
if patterns.empty?
errors.add(:base, "patterns must not be empty")
end
when nil, ''
errors.add(:base, "patterns must be specified")
else
errors.add(:base, "patterns must be an array")
end
when 'write'
if options['filename'].blank?
errors.add(:base, "filename must be specified in 'write' mode")
end
if options['data'].blank?
errors.add(:base, "data must be specified in 'write' mode")
end
when nil, ''
errors.add(:base, "patterns must be specified")
else
errors.add(:base, "patterns must be an array")
end
# Check for optional fields
@ -85,6 +129,7 @@ module Agents
end
def check
return if interpolated['mode'] != 'read'
saving_entries do |found|
each_entry { |filename, mtime|
found[filename, mtime]
@ -92,6 +137,17 @@ module Agents
end
end
def receive(incoming_events)
return if interpolated['mode'] != 'write'
incoming_events.each do |event|
mo = interpolated(event)
mo['data'].encode!(interpolated['force_encoding'], invalid: :replace, undef: :replace) if interpolated['force_encoding'].present?
open_ftp(base_uri) do |ftp|
ftp.storbinary("STOR #{mo['filename']}", StringIO.new(mo['data']), Net::FTP::DEFAULT_BLOCKSIZE)
end
end
end
def each_entry
patterns = interpolated['patterns']
@ -147,9 +203,10 @@ module Agents
ftp.passive = true
path = uri.path.chomp('/')
log "Changing directory to #{path}"
ftp.chdir(path)
if (path = uri.path.chomp('/')).present?
log "Changing directory to #{path}"
ftp.chdir(path)
end
yield ftp
ensure
@ -176,17 +233,28 @@ module Agents
new_files.sort_by { |filename|
found_entries[filename]
}.each { |filename|
create_event payload: {
create_event payload: get_file_pointer(filename).merge({
'url' => (base_uri + uri_path_escape(filename)).to_s,
'filename' => filename,
'timestamp' => found_entries[filename],
}
})
}
memory['known_entries'] = found_entries
save!
end
def get_io(file)
data = StringIO.new
open_ftp(base_uri) do |ftp|
ftp.getbinaryfile(file, nil) do |chunk|
data.write chunk.force_encoding(options['force_encoding'].presence || 'UTF-8')
end
end
data.rewind
data
end
private
def is_positive_integer?(value)

View file

@ -0,0 +1,190 @@
module Agents
class LocalFileAgent < Agent
include LongRunnable
include FormConfigurable
include FileHandling
emits_file_pointer!
default_schedule 'every_1h'
def self.should_run?
ENV['ENABLE_INSECURE_AGENTS'] == "true"
end
description do
<<-MD
The LocalFileAgent can watch a file/directory for changes or emit an event for every file in that directory. When receiving an event it writes the received data into a file.
`mode` determines if the agent is emitting events for (changed) files or writing received event data to disk.
### Reading
When `watch` is set to `true` the LocalFileAgent will watch the specified `path` for changes, the schedule is ignored and the file system is watched continuously. An event will be emitted for every detected change.
When `watch` is set to `false` the agent will emit an event for every file in the directory on each scheduled run.
#{emitting_file_handling_agent_description}
### Writing
Every event will be writting into a file at `path`, Liquid interpolation is possible to change the path per event.
When `append` is true the received data will be appended to the file.
Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) templating in `data` to specify which part of the received event should be written.
*Warning*: This type of Agent can read and write any file the user that runs the Huginn server has access to, and is #{Agents::LocalFileAgent.should_run? ? "**currently enabled**" : "**currently disabled**"}.
Only enable this Agent if you trust everyone using your Huginn installation.
You can enable this Agent in your .env file by setting `ENABLE_INSECURE_AGENTS` to `true`.
MD
end
event_description do
"Events will looks like this:\n\n %s" % if boolify(interpolated['watch'])
Utils.pretty_print(
"file_pointer" => {
"file" => "/tmp/test/filename",
"agent_id" => id
},
"event_type" => "modified/added/removed"
)
else
Utils.pretty_print(
"file_pointer" => {
"file" => "/tmp/test/filename",
"agent_id" => id
}
)
end
end
def default_options
{
'mode' => 'read',
'watch' => 'true',
'append' => 'false',
'path' => "",
'data' => '{{ data }}'
}
end
form_configurable :mode, type: :array, values: %w(read write)
form_configurable :watch, type: :array, values: %w(true false)
form_configurable :path, type: :string
form_configurable :append, type: :boolean
form_configurable :data, type: :string
def validate_options
if options['mode'].blank? || !['read', 'write'].include?(options['mode'])
errors.add(:base, "The 'mode' option is required and must be set to 'read' or 'write'")
end
if options['watch'].blank? || ![true, false].include?(boolify(options['watch']))
errors.add(:base, "The 'watch' option is required and must be set to 'true' or 'false'")
end
if options['append'].blank? || ![true, false].include?(boolify(options['append']))
errors.add(:base, "The 'append' option is required and must be set to 'true' or 'false'")
end
if options['path'].blank?
errors.add(:base, "The 'path' option is required.")
end
end
def working?
should_run?(false) && ((interpolated['mode'] == 'read' && check_path_existance && checked_without_error?) ||
(interpolated['mode'] == 'write' && received_event_without_error?))
end
def check
return if interpolated['mode'] != 'read' || boolify(interpolated['watch']) || !should_run?
return unless check_path_existance(true)
if File.directory?(expanded_path)
Dir.glob(File.join(expanded_path, '*')).select { |f| File.file?(f) }
else
[expanded_path]
end.each do |file|
create_event payload: get_file_pointer(file)
end
end
def receive(incoming_events)
return if interpolated['mode'] != 'write' || !should_run?
incoming_events.each do |event|
mo = interpolated(event)
File.open(File.expand_path(mo['path']), boolify(mo['append']) ? 'a' : 'w') do |file|
file.write(mo['data'])
end
end
end
def start_worker?
interpolated['mode'] == 'read' && boolify(interpolated['watch']) && should_run? && check_path_existance
end
def check_path_existance(log = true)
if !File.exist?(expanded_path)
error("File or directory '#{expanded_path}' does not exist") if log
return false
end
true
end
def get_io(file)
File.open(file, 'r')
end
def expanded_path
@expanded_path ||= File.expand_path(interpolated['path'])
end
private
def should_run?(log = true)
if self.class.should_run?
true
else
error("Unable to run because insecure agents are not enabled. Set ENABLE_INSECURE_AGENTS to true in the Huginn .env configuration.") if log
false
end
end
class Worker < LongRunnable::Worker
def setup
require 'listen'
@listener = Listen.to(*listen_options, &method(:callback))
end
def run
sleep unless agent.check_path_existance(true)
@listener.start
sleep
end
def stop
@listener.stop
end
private
def callback(*changes)
AgentRunner.with_connection do
changes.zip([:modified, :added, :removed]).each do |files, event_type|
files.each do |file|
agent.create_event payload: agent.get_file_pointer(file).merge(event_type: event_type)
end
end
agent.touch(:last_check_at)
end
end
def listen_options
if File.directory?(agent.expanded_path)
[agent.expanded_path, ignore!: [] ]
else
[File.dirname(agent.expanded_path), { ignore!: [], only: /\A#{Regexp.escape(File.basename(agent.expanded_path))}\z/ } ]
end
end
end
end
end

View file

@ -0,0 +1,50 @@
module Agents
class ReadFileAgent < Agent
include FormConfigurable
include FileHandling
cannot_be_scheduled!
consumes_file_pointer!
def default_options
{
'data_key' => 'data'
}
end
description do
<<-MD
The ReadFileAgent takes events from `FileHandling` agents, reads the file, and emits the contents as a string.
`data_key` specifies the key of the emitted event which contains the file contents.
#{receiving_file_handling_agent_description}
MD
end
event_description <<-MD
{
"data" => '...'
}
MD
form_configurable :data_key, type: :string
def validate_options
if options['data_key'].blank?
errors.add(:base, "The 'data_key' options is required.")
end
end
def working?
received_event_without_error?
end
def receive(incoming_events)
incoming_events.each do |event|
next unless io = get_io(event)
create_event payload: { interpolated['data_key'] => io.read }
end
end
end
end

View file

@ -0,0 +1,206 @@
module Agents
class S3Agent < Agent
include FormConfigurable
include FileHandling
emits_file_pointer!
no_bulk_receive!
default_schedule 'every_1h'
gem_dependency_check { defined?(Aws::S3) }
description do
<<-MD
The S3Agent can watch a bucket for changes or emit an event for every file in that bucket. When receiving events, it writes the data into a file on S3.
#{'## Include `aws-sdk-core` in your Gemfile to use this Agent!' if dependencies_missing?}
`mode` must be present and either `read` or `write`, in `read` mode the agent checks the S3 bucket for changed files, with `write` it writes received events to a file in the bucket.
### Universal options
To use credentials for the `access_key` and `access_key_secret` use the liquid `credential` tag like so `{% credential name-of-credential %}`
Select the `region` in which the bucket was created.
### Reading
When `watch` is set to `true` the S3Agent will watch the specified `bucket` for changes. An event will be emitted for every detected change.
When `watch` is set to `false` the agent will emit an event for every file in the bucket on each sheduled run.
#{emitting_file_handling_agent_description}
### Writing
Specify the filename to use in `filename`, Liquid interpolation is possible to change the name per event.
Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) templating in `data` to specify which part of the received event should be written.
MD
end
event_description do
"Events will looks like this:\n\n %s" % if boolify(interpolated['watch'])
Utils.pretty_print({
"file_pointer" => {
"file" => "filename",
"agent_id" => id
},
"event_type" => "modified/added/removed"
})
else
Utils.pretty_print({
"file_pointer" => {
"file" => "filename",
"agent_id" => id
}
})
end
end
def default_options
{
'mode' => 'read',
'access_key_id' => '',
'access_key_secret' => '',
'watch' => 'true',
'bucket' => "",
'data' => '{{ data }}'
}
end
form_configurable :mode, type: :array, values: %w(read write)
form_configurable :access_key_id, roles: :validatable
form_configurable :access_key_secret, roles: :validatable
form_configurable :region, type: :array, values: %w(us-east-1 us-west-1 us-west-2 eu-west-1 eu-central-1 ap-southeast-1 ap-southeast-2 ap-northeast-1 ap-northeast-2 sa-east-1)
form_configurable :watch, type: :array, values: %w(true false)
form_configurable :bucket, roles: :completable
form_configurable :filename
form_configurable :data
def validate_options
if options['mode'].blank? || !['read', 'write'].include?(options['mode'])
errors.add(:base, "The 'mode' option is required and must be set to 'read' or 'write'")
end
if options['bucket'].blank?
errors.add(:base, "The 'bucket' option is required.")
end
if options['region'].blank?
errors.add(:base, "The 'region' option is required.")
end
case interpolated['mode']
when 'read'
if options['watch'].blank? || ![true, false].include?(boolify(options['watch']))
errors.add(:base, "The 'watch' option is required and must be set to 'true' or 'false'")
end
when 'write'
if options['filename'].blank?
errors.add(:base, "filename must be specified in 'write' mode")
end
if options['data'].blank?
errors.add(:base, "data must be specified in 'write' mode")
end
end
end
def validate_access_key_id
!!buckets
end
def validate_access_key_secret
!!buckets
end
def complete_bucket
(buckets || []).collect { |room| {text: room.name, id: room.name} }
end
def working?
checked_without_error?
end
def check
return if interpolated['mode'] != 'read'
contents = safely do
get_bucket_contents
end
if boolify(interpolated['watch'])
watch(contents)
else
contents.each do |key, _|
create_event payload: get_file_pointer(key)
end
end
end
def get_io(file)
client.get_object(bucket: interpolated['bucket'], key: file).body
end
def receive(incoming_events)
return if interpolated['mode'] != 'write'
incoming_events.each do |event|
safely do
mo = interpolated(event)
client.put_object(bucket: mo['bucket'], key: mo['filename'], body: mo['data'])
end
end
end
private
def safely
yield
rescue Aws::S3::Errors::AccessDenied => e
error("Could not access '#{interpolated['bucket']}' #{e.class} #{e.message}")
rescue Aws::S3::Errors::ServiceError =>e
error("#{e.class}: #{e.message}")
end
def watch(contents)
if last_check_at.nil?
self.memory['seen_contents'] = contents
return
end
new_memory = contents.dup
memory['seen_contents'].each do |key, etag|
if contents[key].blank?
create_event payload: get_file_pointer(key).merge(event_type: :removed)
elsif contents[key] != etag
create_event payload: get_file_pointer(key).merge(event_type: :modified)
end
contents.delete(key)
end
contents.each do |key, etag|
create_event payload: get_file_pointer(key).merge(event_type: :added)
end
self.memory['seen_contents'] = new_memory
end
def get_bucket_contents
contents = {}
client.list_objects(bucket: interpolated['bucket']).each do |response|
response.contents.each do |file|
contents[file.key] = file.etag
end
end
contents
end
def client
@client ||= Aws::S3::Client.new(credentials: Aws::Credentials.new(interpolated['access_key_id'], interpolated['access_key_secret']),
region: interpolated['region'])
end
def buckets(log = false)
@buckets ||= client.list_buckets.buckets
rescue Aws::S3::Errors::ServiceError => e
false
end
end
end

View file

@ -0,0 +1,15 @@
class AddModeOptionToFtpsiteAgents < ActiveRecord::Migration
def up
Agents::FtpsiteAgent.find_each do |agent|
agent.options['mode'] = 'read'
agent.save!(validate: false)
end
end
def down
Agents::FtpsiteAgent.find_each do |agent|
agent.options.delete 'mode'
agent.save!(validate: false)
end
end
end

View file

@ -118,5 +118,6 @@ end
require 'agents/twitter_stream_agent'
require 'agents/jabber_agent'
require 'agents/local_file_agent'
require 'huginn_scheduler'
require 'delayed_job_worker'

View file

@ -15,7 +15,7 @@ module Utils
def self.pretty_print(struct, indent = true)
output = JSON.pretty_generate(struct)
if indent
output.gsub(/\n/i, "\n ").tap { |a| p a }
output.gsub(/\n/i, "\n ")
else
output
end

View file

@ -12,3 +12,4 @@ EVERNOTE_OAUTH_KEY=evernoteoauthkey
EVERNOTE_OAUTH_SECRET=evernoteoauthsecret
FAILED_JOBS_TO_KEEP=2
REQUIRE_CONFIRMED_EMAIL=false
ENABLE_INSECURE_AGENTS=true

View file

@ -0,0 +1,244 @@
require 'rails_helper'
describe Agents::CsvAgent do
before(:each) do
@valid_params = {
'mode' => 'parse',
'separator' => ',',
'use_fields' => '',
'output' => 'event_per_row',
'with_header' => 'true',
'data_path' => '$.data',
'data_key' => 'data'
}
@checker = Agents::CsvAgent.new(:name => 'somename', :options => @valid_params)
@checker.user = users(:jane)
@checker.save!
@lfa = Agents::LocalFileAgent.new(name: 'local', options: {path: '{{}}', watch: 'false', append: 'false', mode: 'read'})
@lfa.user = users(:jane)
@lfa.save!
end
it_behaves_like 'FileHandlingConsumer'
context '#validate_options' do
it 'is valid with the given options' do
expect(@checker).to be_valid
end
it "requires with_header to be either 'true' or 'false'" do
@checker.options['with_header'] = 'true'
expect(@checker).to be_valid
@checker.options['with_header'] = 'false'
expect(@checker).to be_valid
@checker.options['with_header'] = 'test'
expect(@checker).not_to be_valid
end
it "data_path has to be set in serialize mode" do
@checker.options['mode'] = 'serialize'
@checker.options['data_path'] = ''
expect(@checker).not_to be_valid
end
end
context '#working' do
it 'is not working without having received an event' do
expect(@checker).not_to be_working
end
it 'is working after receiving an event without error' do
@checker.last_receive_at = Time.now
expect(@checker).to be_working
end
end
context '#receive' do
after(:all) do
FileUtils.rm(File.join(Rails.root, 'tmp', 'csv'))
end
def event_with_contents(contents)
path = File.join(Rails.root, 'tmp', 'csv')
File.open(path, 'w') do |f|
f.write(contents)
end
Event.new(payload: { 'file_pointer' => {'agent_id' => @lfa.id, 'file' => path } }, user_id: @checker.user_id)
end
context "agent options" do
let(:with_headers) { event_with_contents("one,two\n1,2\n2,3") }
let(:without_headers) { event_with_contents("1,2\n2,3") }
context "output" do
it "creates one event per row" do
@checker.options['output'] = 'event_per_row'
expect { @checker.receive([with_headers]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
end
it "creates one event per file" do
@checker.options['output'] = 'event_per_file'
expect { @checker.receive([with_headers]) }.to change(Event, :count).by(1)
expect(Event.last.payload).to eq(@checker.options['data_key'] => [{"one"=>"1", "two"=>"2"}, {"one"=>"2", "two"=>"3"}])
end
end
context "with_header" do
it "works without headers" do
@checker.options['with_header'] = 'false'
expect { @checker.receive([without_headers]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq({@checker.options['data_key']=>["2", "3"]})
end
it "works without headers and event_per_file" do
@checker.options['with_header'] = 'false'
@checker.options['output'] = 'event_per_file'
expect { @checker.receive([without_headers]) }.to change(Event, :count).by(1)
expect(Event.last.payload).to eq({@checker.options['data_key']=>[['1', '2'], ["2", "3"]]})
end
end
context "use_fields" do
it "extracts the specified columns" do
@checker.options['use_fields'] = 'one'
expect { @checker.receive([with_headers]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2'})
end
end
context "data_path" do
it "can receive the CSV via a regular event" do
@checker.options['data_path'] = '$.data'
event = Event.new(payload: {'data' => "one,two\r\n1,2\r\n2,3"})
expect { @checker.receive([event]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
end
end
end
context "handling different CSV formats" do
it "works with windows line endings" do
event = event_with_contents("one,two\r\n1,2\r\n2,3")
expect { @checker.receive([event]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
end
it "works with OSX line endings" do
event = event_with_contents("one,two\r1,2\r2,3")
expect { @checker.receive([event]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
end
it "handles quotes correctly" do
event = event_with_contents("\"one\",\"two\"\n1,2\n\"\"2, two\",3")
expect { @checker.receive([event]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '"2, two', 'two' => '3'})
end
it "works with tab seperated csv" do
event = event_with_contents("one\ttwo\r\n1\t2\r\n2\t3")
@checker.options['separator'] = '\\t'
expect { @checker.receive([event]) }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
end
end
context "serializing" do
before(:each) do
@checker.options['mode'] = 'serialize'
@checker.options['data_path'] = '$.data'
@checker.options['data_key'] = 'data'
end
it "writes headers when with_header is true" do
event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n")
end
it "writes one row per received event" do
event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
event2 = Event.new(payload: { 'data' => {'key' => '2value', 'key2' => '2value2', 'key3' => '2value3'} })
expect { @checker.receive([event, event2])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n\"2value\",\"2value2\",\"2value3\"\n")
end
it "accepts multiple rows per event" do
event = Event.new(payload: { 'data' => [{'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'}, {'key' => '2value', 'key2' => '2value2', 'key3' => '2value3'}] })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n\"2value\",\"2value2\",\"2value3\"\n")
end
it "does not write the headers when with_header is false" do
@checker.options['with_header'] = 'false'
event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"value\",\"value2\",\"value3\"\n")
end
it "only serialize the keys specified in use_fields" do
@checker.options['use_fields'] = 'key2, key3'
event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"key2\",\"key3\"\n\"value2\",\"value3\"\n")
end
it "respects the order of use_fields" do
@checker.options['use_fields'] = 'key3, key'
event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"key3\",\"key\"\n\"value3\",\"value\"\n")
end
it "respects use_fields and writes no header" do
@checker.options['with_header'] = 'false'
@checker.options['use_fields'] = 'key2, key3'
event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"value2\",\"value3\"\n")
end
context "arrays" do
it "does not write a header" do
@checker.options['with_header'] = 'false'
event = Event.new(payload: { 'data' => ['value1', 'value2'] })
event2 = Event.new(payload: { 'data' => ['value3', 'value4'] })
expect { @checker.receive([event, event2])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"value1\",\"value2\"\n\"value3\",\"value4\"\n")
end
it "handles nested arrays" do
event = Event.new(payload: { 'data' => [['value1', 'value2'], ['value3', 'value4']] })
expect { @checker.receive([event])}.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => "\"value1\",\"value2\"\n\"value3\",\"value4\"\n")
end
end
end
end
context '#event_description' do
it "works with event_per_row and headers" do
@checker.options['output'] = 'event_per_row'
@checker.options['with_header'] = 'true'
description = @checker.event_description
expect(description).not_to match(/\n\s+\[\n/)
expect(description).to include(": {\n")
end
it "works with event_per_file and without headers" do
@checker.options['output'] = 'event_per_file'
@checker.options['with_header'] = 'false'
description = @checker.event_description
expect(description).to match(/\n\s+\[\n/)
expect(description).not_to include(": {\n")
end
it "shows dummy CSV when in serialize mode" do
@checker.options['mode'] = 'serialize'
description = @checker.event_description
expect(description).to include('"generated\",\"csv')
end
end
end

View file

@ -8,12 +8,74 @@ describe Agents::FtpsiteAgent do
'expected_update_period_in_days' => 1,
'url' => "ftp://ftp.example.org/pub/releases/",
'patterns' => ["example*.tar.gz"],
'mode' => 'read',
'filename' => 'test',
'data' => '{{ data }}'
}
@checker = Agents::FtpsiteAgent.new(:name => "Example", :options => @site, :keep_events_for => 2.days)
@checker.user = users(:bob)
@checker.save!
end
context "#validate_options" do
it "requires url to be a valid URI" do
@checker.options['url'] = 'not_valid'
expect(@checker).not_to be_valid
end
it "allows an URI without a path" do
@checker.options['url'] = 'ftp://ftp.example.org'
expect(@checker).to be_valid
end
it "does not check the url when liquid output markup is used" do
@checker.options['url'] = 'ftp://{{ ftp_host }}'
expect(@checker).to be_valid
end
it "requires patterns to be present and not empty array" do
@checker.options['patterns'] = ''
expect(@checker).not_to be_valid
@checker.options['patterns'] = 'not an array'
expect(@checker).not_to be_valid
@checker.options['patterns'] = []
expect(@checker).not_to be_valid
end
it "when present timestamp must be parsable into a Time object instance" do
@checker.options['timestamp'] = '2015-01-01 00:00:01'
expect(@checker).to be_valid
@checker.options['timestamp'] = 'error'
expect(@checker).not_to be_valid
end
it "requires mode to be set to 'read' or 'write'" do
@checker.options['mode'] = 'write'
expect(@checker).to be_valid
@checker.options['mode'] = ''
expect(@checker).not_to be_valid
end
it 'automatically sets mode to read when the agent is a new record' do
checker = Agents::FtpsiteAgent.new(name: 'test', options: @site.except('mode'))
checker.user = users(:bob)
expect(checker).to be_valid
expect(checker.options['mode']).to eq('read')
end
it "requires 'filename' in 'write' mode" do
@checker.options['mode'] = 'write'
@checker.options['filename'] = ''
expect(@checker).not_to be_valid
end
it "requires 'data' in 'write' mode" do
@checker.options['mode'] = 'write'
@checker.options['data'] = ''
expect(@checker).not_to be_valid
end
end
describe "#check" do
before do
@ -42,6 +104,7 @@ describe Agents::FtpsiteAgent do
}
expect(Event.last(2).first.payload).to eq({
'file_pointer' => { 'file' => 'example-1.1.tar.gz', 'agent_id' => @checker.id },
'url' => 'ftp://ftp.example.org/pub/releases/example-1.1.tar.gz',
'filename' => 'example-1.1.tar.gz',
'timestamp' => '2014-04-01T10:00:00Z',
@ -71,12 +134,14 @@ describe Agents::FtpsiteAgent do
}
expect(Event.last(2).first.payload).to eq({
'file_pointer' => { 'file' => 'example-1.2.tar.gz', 'agent_id' => @checker.id },
'url' => 'ftp://ftp.example.org/pub/releases/example-1.2.tar.gz',
'filename' => 'example-1.2.tar.gz',
'timestamp' => '2014-04-02T10:00:00Z',
})
expect(Event.last.payload).to eq({
'file_pointer' => { 'file' => 'example latest.tar.gz', 'agent_id' => @checker.id },
'url' => 'ftp://ftp.example.org/pub/releases/example%20latest.tar.gz',
'filename' => 'example latest.tar.gz',
'timestamp' => '2014-04-02T10:00:01Z',
@ -113,5 +178,83 @@ describe Agents::FtpsiteAgent do
end
end
context "#open_ftp" do
before(:each) do
@ftp_mock = mock()
mock(@ftp_mock).close
mock(@ftp_mock).connect('ftp.example.org', 21)
mock(@ftp_mock).passive=(true)
mock(Net::FTP).new { @ftp_mock }
end
context 'with_path' do
before(:each) { mock(@ftp_mock).chdir('pub/releases') }
it "logs in as anonymous when no user and password are given" do
mock(@ftp_mock).login('anonymous', 'anonymous@')
expect { |b| @checker.open_ftp(@checker.base_uri, &b) }.to yield_with_args(@ftp_mock)
end
it "passes the provided user and password" do
@checker.options['url'] = "ftp://user:password@ftp.example.org/pub/releases/"
mock(@ftp_mock).login('user', 'password')
expect { |b| @checker.open_ftp(@checker.base_uri, &b) }.to yield_with_args(@ftp_mock)
end
end
it "does not call chdir when no path is given" do
@checker.options['url'] = "ftp://ftp.example.org/"
mock(@ftp_mock).login('anonymous', 'anonymous@')
expect { |b| @checker.open_ftp(@checker.base_uri, &b) }.to yield_with_args(@ftp_mock)
end
end
context "#get_io" do
it "returns the contents of the file" do
ftp_mock= mock()
mock(ftp_mock).getbinaryfile('file', nil).yields('data')
mock(@checker).open_ftp(@checker.base_uri).yields(ftp_mock)
expect(@checker.get_io('file').read).to eq('data')
end
it "uses the encoding specified in force_encoding to convert the data to UTF-8" do
ftp_mock= mock()
mock(ftp_mock).getbinaryfile('file', nil).yields('ümlaut'.force_encoding('ISO-8859-15'))
mock(@checker).open_ftp(@checker.base_uri).yields(ftp_mock)
expect(@checker.get_io('file').read).to eq('ümlaut')
end
it "returns an empty StringIO instance when no data was read" do
ftp_mock= mock()
mock(ftp_mock).getbinaryfile('file', nil)
mock(@checker).open_ftp(@checker.base_uri).yields(ftp_mock)
expect(@checker.get_io('file').length).to eq(0)
end
end
context "#receive" do
before(:each) do
@checker.options['mode'] = 'write'
@checker.options['filename'] = 'file.txt'
@checker.options['data'] = '{{ data }}'
@ftp_mock= mock()
@stringio = StringIO.new()
mock(@checker).open_ftp(@checker.base_uri).yields(@ftp_mock)
end
it "writes the data at data into a file" do
mock(StringIO).new('hello world🔥') { @stringio }
mock(@ftp_mock).storbinary('STOR file.txt', @stringio, Net::FTP::DEFAULT_BLOCKSIZE)
event = Event.new(payload: {'data' => 'hello world🔥'})
@checker.receive([event])
end
it "converts the string encoding when force_encoding is specified" do
@checker.options['force_encoding'] = 'ISO-8859-1'
mock(StringIO).new('hello world?') { @stringio }
mock(@ftp_mock).storbinary('STOR file.txt', @stringio, Net::FTP::DEFAULT_BLOCKSIZE)
event = Event.new(payload: {'data' => 'hello world🔥'})
@checker.receive([event])
end
end
end
end

View file

@ -0,0 +1,276 @@
require 'rails_helper'
describe Agents::LocalFileAgent do
before(:each) do
@valid_params = {
'mode' => 'read',
'watch' => 'false',
'append' => 'false',
'path' => File.join(Rails.root, 'tmp', 'spec')
}
FileUtils.mkdir_p File.join(Rails.root, 'tmp', 'spec')
@checker = Agents::LocalFileAgent.new(:name => "somename", :options => @valid_params)
@checker.user = users(:jane)
@checker.save!
end
after(:all) do
FileUtils.rm_r File.join(Rails.root, 'tmp', 'spec')
end
describe "#validate_options" do
it "is valid with the given options" do
expect(@checker).to be_valid
end
it "requires mode to be either 'read' or 'write'" do
@checker.options['mode'] = 'write'
expect(@checker).to be_valid
@checker.options['mode'] = 'write'
expect(@checker).to be_valid
@checker.options['mode'] = 'test'
expect(@checker).not_to be_valid
end
it "requires the path to be set" do
@checker.options['path'] = ''
expect(@checker).not_to be_valid
end
it "requires watch to be present" do
@checker.options['watch'] = ''
expect(@checker).not_to be_valid
end
it "requires watch to be either 'true' or 'false'" do
@checker.options['watch'] = 'true'
expect(@checker).to be_valid
@checker.options['watch'] = 'false'
expect(@checker).to be_valid
@checker.options['watch'] = 'test'
expect(@checker).not_to be_valid
end
it "requires append to be either 'true' or 'false'" do
@checker.options['append'] = 'true'
expect(@checker).to be_valid
@checker.options['append'] = 'false'
expect(@checker).to be_valid
@checker.options['append'] = 'test'
expect(@checker).not_to be_valid
end
end
context "#working" do
it "is working with no recent errors in read mode" do
@checker.last_check_at = Time.now
expect(@checker).to be_working
end
it "is working with no recent errors in write mode" do
@checker.options['mode'] = 'write'
@checker.last_receive_at = Time.now
expect(@checker).to be_working
end
end
context "#check_path_existance" do
it "is truethy when the path exists" do
expect(@checker.check_path_existance).to be_truthy
end
it "is falsy when the path does not exist" do
@checker.options['path'] = '/doesnotexist'
expect(@checker.check_path_existance).to be_falsy
end
it "create a log entry" do
@checker.options['path'] = '/doesnotexist'
expect { @checker.check_path_existance(true) }.to change(AgentLog, :count).by(1)
end
it "works with non-expanded paths" do
@checker.options['path'] = '~'
expect(@checker.check_path_existance).to be_truthy
end
end
def with_files(*files)
files.each { |f| FileUtils.touch(f) }
yield
files.each { |f| FileUtils.rm(f) }
end
context "#check" do
it "does not create events when the directory is empty" do
expect { @checker.check }.to change(Event, :count).by(0)
end
it "creates an event for every file in the directory" do
with_files(File.join(Rails.root, 'tmp', 'spec', 'one'), File.join(Rails.root, 'tmp', 'spec', 'two')) do
expect { @checker.check }.to change(Event, :count).by(2)
expect(Event.last.payload.has_key?('file_pointer')).to be_truthy
end
end
it "creates an event if the configured file exists" do
@checker.options['path'] = File.join(Rails.root, 'tmp', 'spec', 'one')
with_files(File.join(Rails.root, 'tmp', 'spec', 'one'), File.join(Rails.root, 'tmp', 'spec', 'two')) do
expect { @checker.check }.to change(Event, :count).by(1)
payload = Event.last.payload
expect(payload.has_key?('file_pointer')).to be_truthy
expect(payload['file_pointer']['file']).to eq(@checker.options['path'])
end
end
it "does not run when ENABLE_INSECURE_AGENTS is not set to true" do
ENV['ENABLE_INSECURE_AGENTS'] = 'false'
expect { @checker.check }.to change(AgentLog, :count).by(1)
ENV['ENABLE_INSECURE_AGENTS'] = 'true'
end
end
context "#event_description" do
it "should include event_type when watch is set to true" do
@checker.options['watch'] = 'true'
expect(@checker.event_description).to include('event_type')
end
it "should not include event_type when watch is set to false" do
@checker.options['watch'] = 'false'
expect(@checker.event_description).not_to include('event_type')
end
end
it "get_io opens the file" do
mock(File).open('test', 'r')
@checker.get_io('test')
end
context "#start_worker?" do
it "reeturns true when watch is true" do
@checker.options['watch'] = 'true'
expect(@checker.start_worker?).to be_truthy
end
it "returns false when watch is false" do
@checker.options['watch'] = 'false'
expect(@checker.start_worker?).to be_falsy
end
end
context "#receive" do
before(:each) do
@checker.options['mode'] = 'write'
@checker.options['data'] = '{{ data }}'
@file_mock = mock()
end
it "writes the data at data into a file" do
mock(@file_mock).write('hello world')
event = Event.new(payload: {'data' => 'hello world'})
mock(File).open(File.join(Rails.root, 'tmp', 'spec'), 'w').yields @file_mock
@checker.receive([event])
end
it "appends the data at data onto a file" do
mock(@file_mock).write('hello world')
@checker.options['append'] = 'true'
event = Event.new(payload: {'data' => 'hello world'})
mock(File).open(File.join(Rails.root, 'tmp', 'spec'), 'a').yields @file_mock
@checker.receive([event])
end
it "does not receive when ENABLE_INSECURE_AGENTS is not set to true" do
ENV['ENABLE_INSECURE_AGENTS'] = 'false'
expect { @checker.receive([]) }.to change(AgentLog, :count).by(1)
ENV['ENABLE_INSECURE_AGENTS'] = 'true'
end
end
describe describe Agents::LocalFileAgent::Worker do
require 'listen'
before(:each) do
@checker.options['watch'] = true
@checker.save
@worker = Agents::LocalFileAgent::Worker.new(agent: @checker)
@listen_mock = mock()
end
context "#setup" do
it "initializes the listen gem" do
mock(Listen).to(@checker.options['path'], ignore!: [])
@worker.setup
end
end
context "#run" do
before(:each) do
stub(Listen).to { @listen_mock }
@worker.setup
end
it "starts to listen to changes in the directory when the path is present" do
mock(@worker).sleep
mock(@listen_mock).start
@worker.run
end
it "does nothing when the path does not exist" do
mock(@worker.agent).check_path_existance(true) { false }
dont_allow(@listen_mock).start
mock(@worker).sleep { raise "Sleeping" }
expect { @worker.run }.to raise_exception(RuntimeError, 'Sleeping')
end
end
context "#stop" do
it "stops the listen gem" do
stub(Listen).to { @listen_mock }
@worker.setup
mock(@listen_mock).stop
@worker.stop
end
end
context "#callback" do
let(:file) { File.join(Rails.root, 'tmp', 'one') }
let(:file2) { File.join(Rails.root, 'tmp', 'one2') }
it "creates an event for modifies files" do
expect { @worker.send(:callback, [file], [], [])}.to change(Event, :count).by(1)
payload = Event.last.payload
expect(payload['event_type']).to eq('modified')
end
it "creates an event for modifies files" do
expect { @worker.send(:callback, [], [file], [])}.to change(Event, :count).by(1)
payload = Event.last.payload
expect(payload['event_type']).to eq('added')
end
it "creates an event for modifies files" do
expect { @worker.send(:callback, [], [], [file])}.to change(Event, :count).by(1)
payload = Event.last.payload
expect(payload['event_type']).to eq('removed')
end
it "creates an event each changed file" do
expect { @worker.send(:callback, [], [file], [file2])}.to change(Event, :count).by(2)
end
end
context "#listen_options" do
it "returns the path when a directory is given" do
expect(@worker.send(:listen_options)).to eq([File.join(Rails.root, 'tmp', 'spec'), ignore!: []])
end
it "restricts to only the specified filename" do
@worker.agent.options['path'] = File.join(Rails.root, 'tmp', 'one')
expect(@worker.send(:listen_options)).to eq([File.join(Rails.root, 'tmp'), { only: /\Aone\z/, ignore!: [] } ])
end
end
end
end

View file

@ -0,0 +1,47 @@
require 'rails_helper'
describe Agents::ReadFileAgent do
before(:each) do
@valid_params = {
'data_key' => 'data',
}
@checker = Agents::ReadFileAgent.new(:name => 'somename', :options => @valid_params)
@checker.user = users(:jane)
@checker.save!
end
it_behaves_like 'FileHandlingConsumer'
context '#validate_options' do
it 'is valid with the given options' do
expect(@checker).to be_valid
end
it "requires data_key to be present" do
@checker.options['data_key'] = ''
expect(@checker).not_to be_valid
end
end
context '#working' do
it 'is not working without having received an event' do
expect(@checker).not_to be_working
end
it 'is working after receiving an event without error' do
@checker.last_receive_at = Time.now
expect(@checker).to be_working
end
end
context '#receive' do
it "emits an event with the contents of the receives files" do
event = Event.new(payload: {file_pointer: {agent_id: 111, file: 'test'}})
io_mock = mock()
mock(@checker).get_io(event) { StringIO.new("testdata") }
expect { @checker.receive([event]) }.to change(Event, :count).by(1)
expect(Event.last.payload).to eq('data' => 'testdata')
end
end
end

View file

@ -0,0 +1,220 @@
require 'rails_helper'
describe Agents::S3Agent do
before(:each) do
@valid_params = {
'mode' => 'read',
'access_key_id' => '32343242',
'access_key_secret' => '1231312',
'watch' => 'false',
'bucket' => 'testbucket',
'region' => 'us-east-1',
'filename' => 'test.txt',
'data' => '{{ data }}'
}
@checker = Agents::S3Agent.new(:name => "somename", :options => @valid_params)
@checker.user = users(:jane)
@checker.save!
end
describe "#validate_options" do
it "requires the bucket to be set" do
@checker.options['bucket'] = ''
expect(@checker).not_to be_valid
end
it "requires watch to be present" do
@checker.options['watch'] = ''
expect(@checker).not_to be_valid
end
it "requires watch to be either 'true' or 'false'" do
@checker.options['watch'] = 'true'
expect(@checker).to be_valid
@checker.options['watch'] = 'false'
expect(@checker).to be_valid
@checker.options['watch'] = 'test'
expect(@checker).not_to be_valid
end
it "requires region to be present" do
@checker.options['region'] = ''
expect(@checker).not_to be_valid
end
it "requires mode to be set to 'read' or 'write'" do
@checker.options['mode'] = 'write'
expect(@checker).to be_valid
@checker.options['mode'] = ''
expect(@checker).not_to be_valid
end
it "requires 'filename' in 'write' mode" do
@checker.options['mode'] = 'write'
@checker.options['filename'] = ''
expect(@checker).not_to be_valid
end
it "requires 'data' in 'write' mode" do
@checker.options['mode'] = 'write'
@checker.options['data'] = ''
expect(@checker).not_to be_valid
end
end
describe "#validating" do
it "validates the key" do
mock(@checker).client { raise Aws::S3::Errors::SignatureDoesNotMatch.new('', '') }
expect(@checker.validate_access_key_id).to be_falsy
end
it "validates the secret" do
mock(@checker).buckets { true }
expect(@checker.validate_access_key_secret).to be_truthy
end
end
it "completes the buckets" do
mock(@checker).buckets { [OpenStruct.new(name: 'test'), OpenStruct.new(name: 'test2')]}
expect(@checker.complete_bucket).to eq([{text: 'test', id: 'test'}, {text: 'test2', id: 'test2'}])
end
context "#working" do
it "is working with no recent errors" do
@checker.last_check_at = Time.now
expect(@checker).to be_working
end
end
context "#check" do
context "not watching" do
it "emits an event for every file" do
mock(@checker).get_bucket_contents { {"test"=>"231232", "test2"=>"4564545"} }
expect { @checker.check }.to change(Event, :count).by(2)
expect(Event.last.payload).to eq({"file_pointer" => {"file"=>"test2", "agent_id"=> @checker.id}})
end
end
context "watching" do
before(:each) do
@checker.options['watch'] = 'true'
end
it "does not emit any events on the first run" do
contents = {"test"=>"231232", "test2"=>"4564545"}
mock(@checker).get_bucket_contents { contents }
expect { @checker.check }.not_to change(Event, :count)
expect(@checker.memory).to eq('seen_contents' => contents)
end
context "detecting changes" do
before(:each) do
contents = {"test"=>"231232", "test2"=>"4564545"}
mock(@checker).get_bucket_contents { contents }
expect { @checker.check }.not_to change(Event, :count)
@checker.last_check_at = Time.now
end
it "emits events for removed files" do
contents = {"test"=>"231232"}
mock(@checker).get_bucket_contents { contents }
expect { @checker.check }.to change(Event, :count).by(1)
expect(Event.last.payload).to eq({"file_pointer" => {"file" => "test2", "agent_id"=> @checker.id}, "event_type" => "removed"})
end
it "emits events for modified files" do
contents = {"test"=>"231232", "test2"=>"changed"}
mock(@checker).get_bucket_contents { contents }
expect { @checker.check }.to change(Event, :count).by(1)
expect(Event.last.payload).to eq({"file_pointer" => {"file" => "test2", "agent_id"=> @checker.id}, "event_type" => "modified"})
end
it "emits events for added files" do
contents = {"test"=>"231232", "test2"=>"4564545", "test3" => "31231231"}
mock(@checker).get_bucket_contents { contents }
expect { @checker.check }.to change(Event, :count).by(1)
expect(Event.last.payload).to eq({"file_pointer" => {"file" => "test3", "agent_id"=> @checker.id}, "event_type" => "added"})
end
end
context "error handling" do
it "handles AccessDenied exceptions" do
mock(@checker).get_bucket_contents { raise Aws::S3::Errors::AccessDenied.new('', '') }
expect { @checker.check }.to change(AgentLog, :count).by(1)
expect(AgentLog.last.message).to eq("Could not access 'testbucket' Aws::S3::Errors::AccessDenied ")
end
it "handles generic S3 exceptions" do
mock(@checker).get_bucket_contents { raise Aws::S3::Errors::PermanentRedirect.new('', 'error') }
expect { @checker.check }.to change(AgentLog, :count).by(1)
expect(AgentLog.last.message).to eq("Aws::S3::Errors::PermanentRedirect: error")
end
end
end
end
it "get_io returns a StringIO object" do
stringio =StringIO.new
mock_response = mock()
mock(mock_response).body { stringio }
mock_client = mock()
mock(mock_client).get_object(bucket: 'testbucket', key: 'testfile') { mock_response }
mock(@checker).client { mock_client }
@checker.get_io('testfile')
end
context "#get_bucket_contents" do
it "returns a hash with the contents of the bucket" do
mock_response = mock()
mock(mock_response).contents { [OpenStruct.new(key: 'test', etag: '231232'), OpenStruct.new(key: 'test2', etag: '4564545')] }
mock_client = mock()
mock(mock_client).list_objects(bucket: 'testbucket') { [mock_response] }
mock(@checker).client { mock_client }
expect(@checker.send(:get_bucket_contents)).to eq({"test"=>"231232", "test2"=>"4564545"})
end
end
context "#client" do
it "initializes the S3 client correctly" do
mock_credential = mock()
mock(Aws::Credentials).new('32343242', '1231312') { mock_credential }
mock(Aws::S3::Client).new(credentials: mock_credential,
region: 'us-east-1')
@checker.send(:client)
end
end
context "#event_description" do
it "should include event_type when watch is set to true" do
@checker.options['watch'] = 'true'
expect(@checker.event_description).to include('event_type')
end
it "should not include event_type when watch is set to false" do
@checker.options['watch'] = 'false'
expect(@checker.event_description).not_to include('event_type')
end
end
context "#receive" do
before(:each) do
@checker.options['mode'] = 'write'
@checker.options['filename'] = 'file.txt'
@checker.options['data'] = '{{ data }}'
end
it "writes the data at data into a file" do
client_mock = mock()
mock(client_mock).put_object(bucket: @checker.options['bucket'], key: @checker.options['filename'], body: 'hello world!')
mock(@checker).client { client_mock }
event = Event.new(payload: {'data' => 'hello world!'})
@checker.receive([event])
end
it "does nothing when mode is set to 'read'" do
@checker.options['mode'] = 'read'
event = Event.new(payload: {'data' => 'hello world!'})
@checker.receive([event])
end
end
end

View file

@ -0,0 +1,16 @@
require 'rails_helper'
shared_examples_for 'FileHandlingConsumer' do
it 'returns a file pointer' do
expect(@checker.get_file_pointer('testfile')).to eq(file_pointer: { file: "testfile", agent_id: @checker.id})
end
it 'get_io raises an exception when trying to access an agent of a different user' do
@checker2 = @checker.dup
@checker2.user = users(:bob)
@checker2.save!
expect(@checker2.user.id).not_to eq(@checker.user.id)
event = Event.new(user: @checker.user, payload: {'file_pointer' => {'file' => 'test', 'agent_id' => @checker2.id}})
expect { @checker.get_io(event) }.to raise_error(ActiveRecord::RecordNotFound)
end
end

View file

@ -50,4 +50,28 @@ shared_examples_for WorkingHelpers do
expect(@agent.received_event_without_error?).to eq(true)
end
end
describe "checked_without_error?" do
before do
@agent = described_class.new
end
it "should return false until the first time check ran" do
expect(@agent.checked_without_error?).to eq(false)
@agent.last_check_at = Time.now
expect(@agent.checked_without_error?).to eq(true)
end
it "should return false when the last error occured after the check" do
@agent.last_check_at = Time.now - 1.minute
@agent.last_error_log_at = Time.now
expect(@agent.checked_without_error?).to eq(false)
end
it "should return true when the last check occured after the last error" do
@agent.last_check_at = Time.now
@agent.last_error_log_at = Time.now - 1.minute
expect(@agent.checked_without_error?).to eq(true)
end
end
end