diff --git a/Gemfile b/Gemfile index f4143523..4065a39c 100644 --- a/Gemfile +++ b/Gemfile @@ -63,6 +63,12 @@ gem 'haversine' gem 'omniauth-evernote' gem 'evernote_oauth' +# LocalFileAgent (watch functionality) +gem 'listen', '~> 3.0.5', require: false + +# S3Agent +gem 'aws-sdk-core', '~> 2.2.15' + # Optional Services. gem 'omniauth-37signals' # BasecampAgent gem 'omniauth-wunderlist', github: 'wunderlist/omniauth-wunderlist', ref: 'd0910d0396107b9302aa1bc50e74bb140990ccb8' @@ -75,7 +81,6 @@ unless Gem::Version.new(Bundler::VERSION) >= Gem::Version.new('1.5.0') end gem 'protected_attributes', '~>1.0.8' # This must be loaded before some other gems, like delayed_job. - gem 'ace-rails-ap', '~> 2.0.1' gem 'bootstrap-kaminari-views', '~> 0.0.3' gem 'bundler', '>= 1.5.0' diff --git a/Gemfile.lock b/Gemfile.lock index 7908df81..239fa74c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -110,6 +110,8 @@ GEM addressable (>= 2.3.1) extlib (>= 0.9.15) multi_json (>= 1.0.0) + aws-sdk-core (2.2.15) + jmespath (~> 1.0) bcrypt (3.1.10) better_errors (1.1.0) coderay (>= 1.0.0) @@ -279,6 +281,7 @@ GEM hypdf (1.0.7) httmultiparty (= 0.3.10) i18n (0.7.0) + jmespath (1.1.3) jquery-rails (3.1.3) railties (>= 3.0, < 5.0) thor (>= 0.14, < 2.0) @@ -577,6 +580,7 @@ PLATFORMS DEPENDENCIES ace-rails-ap (~> 2.0.1) + aws-sdk-core (~> 2.2.15) better_errors (~> 1.1) binding_of_caller bootstrap-kaminari-views (~> 0.0.3) @@ -621,6 +625,7 @@ DEPENDENCIES kramdown (~> 1.3.3) letter_opener_web liquid (~> 3.0.3) + listen (~> 3.0.5) mini_magick mqtt multi_xml diff --git a/app/concerns/file_handling.rb b/app/concerns/file_handling.rb new file mode 100644 index 00000000..261e1285 --- /dev/null +++ b/app/concerns/file_handling.rb @@ -0,0 +1,58 @@ +module FileHandling + extend ActiveSupport::Concern + + def get_file_pointer(file) + { file_pointer: { file: file, agent_id: id } } + end + + def get_io(event) + return nil unless event.payload['file_pointer'] && + event.payload['file_pointer']['file'] && + event.payload['file_pointer']['agent_id'] + event.user.agents.find(event.payload['file_pointer']['agent_id']).get_io(event.payload['file_pointer']['file']) + end + + def emitting_file_handling_agent_description + @emitting_file_handling_agent_description ||= + "This agent only emits a 'file pointer', not the data inside the files, the following agents can consume the created events: `#{receiving_file_handling_agents.join('`, `')}`. Read more about the concept in the [wiki](https://github.com/cantino/huginn/wiki/How-Huginn-works-with-files)." + end + + def receiving_file_handling_agent_description + @receiving_file_handling_agent_description ||= + "This agent can consume a 'file pointer' event from the following agents with no additional configuration: `#{emitting_file_handling_agents.join('`, `')}`. Read more about the concept in the [wiki](https://github.com/cantino/huginn/wiki/How-Huginn-works-with-files)." + end + + private + + def emitting_file_handling_agents + emitting_file_handling_agents = file_handling_agents.select { |a| a.emits_file_pointer? } + emitting_file_handling_agents.map { |a| a.to_s.demodulize } + end + + def receiving_file_handling_agents + receiving_file_handling_agents = file_handling_agents.select { |a| a.consumes_file_pointer? } + receiving_file_handling_agents.map { |a| a.to_s.demodulize } + end + + def file_handling_agents + @file_handling_agents ||= Agent.types.select{ |c| c.included_modules.include?(FileHandling) }.map { |d| d.name.constantize } + end + + module ClassMethods + def emits_file_pointer! + @emits_file_pointer = true + end + + def emits_file_pointer? + !!@emits_file_pointer + end + + def consumes_file_pointer! + @consumes_file_pointer = true + end + + def consumes_file_pointer? + !!@consumes_file_pointer + end + end +end diff --git a/app/concerns/working_helpers.rb b/app/concerns/working_helpers.rb index 166f2366..a6319ca9 100644 --- a/app/concerns/working_helpers.rb +++ b/app/concerns/working_helpers.rb @@ -12,4 +12,8 @@ module WorkingHelpers def received_event_without_error? (last_receive_at.present? && last_error_log_at.blank?) || (last_receive_at.present? && last_error_log_at.present? && last_receive_at > last_error_log_at) end -end \ No newline at end of file + + def checked_without_error? + (last_check_at.present? && last_error_log_at.nil?) || (last_check_at.present? && last_error_log_at.present? && last_check_at > last_error_log_at) + end +end diff --git a/app/models/agents/csv_agent.rb b/app/models/agents/csv_agent.rb new file mode 100644 index 00000000..5246ff02 --- /dev/null +++ b/app/models/agents/csv_agent.rb @@ -0,0 +1,195 @@ +module Agents + class CsvAgent < Agent + include FormConfigurable + include FileHandling + + cannot_be_scheduled! + consumes_file_pointer! + + def default_options + { + 'mode' => 'parse', + 'separator' => ',', + 'use_fields' => '', + 'output' => 'event_per_row', + 'with_header' => 'true', + 'data_path' => '$.data', + 'data_key' => 'data' + } + end + + description do + <<-MD + The `CsvAgent` parses or serializes CSV data. When parsing, events can either be emitted for the entire CSV, or one per row. + + Set `mode` to `parse` to parse CSV from incoming event, when set to `serialize` the agent serilizes the data of events to CSV. + + ### Universal options + + Specify the `separator` which is used to seperate the fields from each other (default is `,`). + + `data_key` sets the key which contains the serialized CSV or parsed CSV data in emitted events. + + ### Parsing + + If `use_fields` is set to a comma seperated string and the CSV file contains field headers the agent will only extract the specified fields. + + `output` determines wheather one event per row is emitted or one event that includes all the rows. + + Set `with_header` to `true` if first line of the CSV file are field names. + + #{receiving_file_handling_agent_description} + + When receiving the CSV data in a regular event use [JSONPath](http://goessner.net/articles/JsonPath/) to select the path in `data_path`. `data_path` is only used when the received event does not contain a 'file pointer'. + + ### Serializing + + If `use_fields` is set to a comma seperated string and the first received event has a object at the specified `data_path` the generated CSV will only include the given fields. + + Set `with_header` to `true` to include a field header in the CSV. + + Use [JSONPath](http://goessner.net/articles/JsonPath/) in `data_path` to select with part of the received events should be serialized. + MD + end + + event_description do + "Events will looks like this:\n\n %s" % if interpolated['mode'] == 'parse' + rows = if boolify(interpolated['with_header']) + [{'column' => 'row1 value1', 'column2' => 'row1 value2'}, {'column' => 'row2 value3', 'column2' => 'row2 value4'}] + else + [['row1 value1', 'row1 value2'], ['row2 value1', 'row2 value2']] + end + if interpolated['output'] == 'event_per_row' + Utils.pretty_print(interpolated['data_key'] => rows[0]) + else + Utils.pretty_print(interpolated['data_key'] => rows) + end + else + Utils.pretty_print(interpolated['data_key'] => '"generated","csv","data"' + "\n" + '"column1","column2","column3"') + end + end + + form_configurable :mode, type: :array, values: %w(parse serialize) + form_configurable :separator, type: :string + form_configurable :data_key, type: :string + form_configurable :with_header, type: :boolean + form_configurable :use_fields, type: :string + form_configurable :output, type: :array, values: %w(event_per_row event_per_file) + form_configurable :data_path, type: :string + + def validate_options + if options['with_header'].blank? || ![true, false].include?(boolify(options['with_header'])) + errors.add(:base, "The 'with_header' options is required and must be set to 'true' or 'false'") + end + if options['mode'] == 'serialize' && options['data_path'].blank? + errors.add(:base, "When mode is set to serialize data_path has to be present.") + end + end + + def working? + received_event_without_error? + end + + def receive(incoming_events) + case options['mode'] + when 'parse' + parse(incoming_events) + when 'serialize' + serialize(incoming_events) + end + end + + private + def serialize(incoming_events) + mo = interpolated(incoming_events.first) + rows = rows_from_events(incoming_events, mo) + csv = CSV.generate(col_sep: separator(mo), force_quotes: true ) do |csv| + if boolify(mo['with_header']) && rows.first.is_a?(Hash) + if mo['use_fields'].present? + csv << extract_options(mo) + else + csv << rows.first.keys + end + end + rows.each do |data| + if data.is_a?(Hash) + if mo['use_fields'].present? + csv << data.extract!(*extract_options(mo)).values + else + csv << data.values + end + else + csv << data + end + end + end + create_event payload: { mo['data_key'] => csv } + end + + def rows_from_events(incoming_events, mo) + [].tap do |rows| + incoming_events.each do |event| + data = Utils.value_at(event.payload, mo['data_path']) + if data.is_a?(Array) && (data[0].is_a?(Array) || data[0].is_a?(Hash)) + data.each { |row| rows << row } + else + rows << data + end + end + end + end + + def parse(incoming_events) + incoming_events.each do |event| + mo = interpolated(event) + next unless io = local_get_io(event) + if mo['output'] == 'event_per_row' + parse_csv(io, mo) do |payload| + create_event payload: { mo['data_key'] => payload } + end + else + create_event payload: { mo['data_key'] => parse_csv(io, mo, []) } + end + end + end + + def local_get_io(event) + if io = get_io(event) + io + else + Utils.value_at(event.payload, interpolated['data_path']) + end + end + + def parse_csv(io, mo, array = nil) + CSV.new(io, col_sep: separator(mo), headers: boolify(mo['with_header'])).each do |row| + if block_given? + yield get_payload(row, mo) + else + array << get_payload(row, mo) + end + end + array + end + + def separator(mo) + mo['separator'] == '\\t' ? "\t" : mo['separator'] + end + + def get_payload(row, mo) + if boolify(mo['with_header']) + if mo['use_fields'].present? + row.to_hash.extract!(*extract_options(mo)) + else + row.to_hash + end + else + row + end + end + + def extract_options(mo) + mo['use_fields'].split(',').map(&:strip) + end + end +end diff --git a/app/models/agents/ftpsite_agent.rb b/app/models/agents/ftpsite_agent.rb index 316764c6..a0d6585d 100644 --- a/app/models/agents/ftpsite_agent.rb +++ b/app/models/agents/ftpsite_agent.rb @@ -3,23 +3,42 @@ require 'time' module Agents class FtpsiteAgent < Agent - cannot_receive_events! + include FileHandling default_schedule "every_12h" gem_dependency_check { defined?(Net::FTP) && defined?(Net::FTP::List) } - description <<-MD - The FTP Site Agent checks an FTP site and creates Events based on newly uploaded files in a directory. + emits_file_pointer! - #{'## Include `net-ftp-list` in your Gemfile to use this Agent!' if dependencies_missing?} + description do + <<-MD + The Ftp Site Agent checks an FTP site and creates Events based on newly uploaded files in a directory. When receiving events it creates files on the configured FTP server. + #{'## Include `net-ftp-list` in your Gemfile to use this Agent!' if dependencies_missing?} - Specify a `url` that represents a directory of an FTP site to watch, and a list of `patterns` to match against file names. + `mode` must be present and either `read` or `write`, in `read` mode the agent checks the FTP site for changed files, with `write` it writes received events to a file on the server. - Login credentials can be included in `url` if authentication is required. + ### Universal options - Only files with a last modification time later than the `after` value, if specifed, are notified. - MD + Specify a `url` that represents a directory of an FTP site to watch, and a list of `patterns` to match against file names. + + Login credentials can be included in `url` if authentication is required: `ftp://username:password@ftp.example.com/path`. Liquid formatting is supported as well: `ftp://{% credential ftp_credentials %}@ftp.example.com/` + + Optionally specify the encoding of the files you want to read/write in `force_encoding`, by default UTF-8 is used. + + ### Reading + + Only files with a last modification time later than the `after` value, if specifed, are emitted as event. + + ### Writing + + Specify the filename to use in `filename`, Liquid interpolation is possible to change the name per event. + + Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) templating in `data` to specify which part of the received event should be written. + + #{emitting_file_handling_agent_description} + MD + end event_description <<-MD Events look like this: @@ -32,42 +51,67 @@ module Agents MD def working? - event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs? + if interpolated['mode'] == 'read' + event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs? + else + received_event_without_error? + end end def default_options { + 'mode' => 'read', 'expected_update_period_in_days' => "1", 'url' => "ftp://example.org/pub/releases/", 'patterns' => [ 'foo-*.tar.gz', ], 'after' => Time.now.iso8601, + 'force_encoding' => '', + 'filename' => '', + 'data' => '{{ data }}' } end def validate_options # Check for required fields begin - url = options['url'] - String === url or raise - uri = URI(url) - URI::FTP === uri or raise - errors.add(:base, "url must end with a slash") unless uri.path.end_with?('/') + if !options['url'].include?('{{') + url = interpolated['url'] + String === url or raise + uri = URI(url) + URI::FTP === uri or raise + errors.add(:base, "url must end with a slash") if uri.path.present? && !uri.path.end_with?('/') + end rescue errors.add(:base, "url must be a valid FTP URL") end - patterns = options['patterns'] - case patterns - when Array - if patterns.empty? - errors.add(:base, "patterns must not be empty") + options['mode'] = 'read' if options['mode'].blank? && new_record? + if options['mode'].blank? || !['read', 'write'].include?(options['mode']) + errors.add(:base, "The 'mode' option is required and must be set to 'read' or 'write'") + end + + case interpolated['mode'] + when 'read' + patterns = options['patterns'] + case patterns + when Array + if patterns.empty? + errors.add(:base, "patterns must not be empty") + end + when nil, '' + errors.add(:base, "patterns must be specified") + else + errors.add(:base, "patterns must be an array") + end + when 'write' + if options['filename'].blank? + errors.add(:base, "filename must be specified in 'write' mode") + end + if options['data'].blank? + errors.add(:base, "data must be specified in 'write' mode") end - when nil, '' - errors.add(:base, "patterns must be specified") - else - errors.add(:base, "patterns must be an array") end # Check for optional fields @@ -85,6 +129,7 @@ module Agents end def check + return if interpolated['mode'] != 'read' saving_entries do |found| each_entry { |filename, mtime| found[filename, mtime] @@ -92,6 +137,17 @@ module Agents end end + def receive(incoming_events) + return if interpolated['mode'] != 'write' + incoming_events.each do |event| + mo = interpolated(event) + mo['data'].encode!(interpolated['force_encoding'], invalid: :replace, undef: :replace) if interpolated['force_encoding'].present? + open_ftp(base_uri) do |ftp| + ftp.storbinary("STOR #{mo['filename']}", StringIO.new(mo['data']), Net::FTP::DEFAULT_BLOCKSIZE) + end + end + end + def each_entry patterns = interpolated['patterns'] @@ -147,9 +203,10 @@ module Agents ftp.passive = true - path = uri.path.chomp('/') - log "Changing directory to #{path}" - ftp.chdir(path) + if (path = uri.path.chomp('/')).present? + log "Changing directory to #{path}" + ftp.chdir(path) + end yield ftp ensure @@ -176,17 +233,28 @@ module Agents new_files.sort_by { |filename| found_entries[filename] }.each { |filename| - create_event payload: { + create_event payload: get_file_pointer(filename).merge({ 'url' => (base_uri + uri_path_escape(filename)).to_s, 'filename' => filename, 'timestamp' => found_entries[filename], - } + }) } memory['known_entries'] = found_entries save! end + def get_io(file) + data = StringIO.new + open_ftp(base_uri) do |ftp| + ftp.getbinaryfile(file, nil) do |chunk| + data.write chunk.force_encoding(options['force_encoding'].presence || 'UTF-8') + end + end + data.rewind + data + end + private def is_positive_integer?(value) diff --git a/app/models/agents/local_file_agent.rb b/app/models/agents/local_file_agent.rb new file mode 100644 index 00000000..a3701880 --- /dev/null +++ b/app/models/agents/local_file_agent.rb @@ -0,0 +1,190 @@ +module Agents + class LocalFileAgent < Agent + include LongRunnable + include FormConfigurable + include FileHandling + + emits_file_pointer! + + default_schedule 'every_1h' + + def self.should_run? + ENV['ENABLE_INSECURE_AGENTS'] == "true" + end + + description do + <<-MD + The LocalFileAgent can watch a file/directory for changes or emit an event for every file in that directory. When receiving an event it writes the received data into a file. + + `mode` determines if the agent is emitting events for (changed) files or writing received event data to disk. + + ### Reading + + When `watch` is set to `true` the LocalFileAgent will watch the specified `path` for changes, the schedule is ignored and the file system is watched continuously. An event will be emitted for every detected change. + + When `watch` is set to `false` the agent will emit an event for every file in the directory on each scheduled run. + + #{emitting_file_handling_agent_description} + + ### Writing + + Every event will be writting into a file at `path`, Liquid interpolation is possible to change the path per event. + + When `append` is true the received data will be appended to the file. + + Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) templating in `data` to specify which part of the received event should be written. + + *Warning*: This type of Agent can read and write any file the user that runs the Huginn server has access to, and is #{Agents::LocalFileAgent.should_run? ? "**currently enabled**" : "**currently disabled**"}. + Only enable this Agent if you trust everyone using your Huginn installation. + You can enable this Agent in your .env file by setting `ENABLE_INSECURE_AGENTS` to `true`. + MD + end + + event_description do + "Events will looks like this:\n\n %s" % if boolify(interpolated['watch']) + Utils.pretty_print( + "file_pointer" => { + "file" => "/tmp/test/filename", + "agent_id" => id + }, + "event_type" => "modified/added/removed" + ) + else + Utils.pretty_print( + "file_pointer" => { + "file" => "/tmp/test/filename", + "agent_id" => id + } + ) + end + end + + def default_options + { + 'mode' => 'read', + 'watch' => 'true', + 'append' => 'false', + 'path' => "", + 'data' => '{{ data }}' + } + end + + form_configurable :mode, type: :array, values: %w(read write) + form_configurable :watch, type: :array, values: %w(true false) + form_configurable :path, type: :string + form_configurable :append, type: :boolean + form_configurable :data, type: :string + + def validate_options + if options['mode'].blank? || !['read', 'write'].include?(options['mode']) + errors.add(:base, "The 'mode' option is required and must be set to 'read' or 'write'") + end + if options['watch'].blank? || ![true, false].include?(boolify(options['watch'])) + errors.add(:base, "The 'watch' option is required and must be set to 'true' or 'false'") + end + if options['append'].blank? || ![true, false].include?(boolify(options['append'])) + errors.add(:base, "The 'append' option is required and must be set to 'true' or 'false'") + end + if options['path'].blank? + errors.add(:base, "The 'path' option is required.") + end + end + + def working? + should_run?(false) && ((interpolated['mode'] == 'read' && check_path_existance && checked_without_error?) || + (interpolated['mode'] == 'write' && received_event_without_error?)) + end + + def check + return if interpolated['mode'] != 'read' || boolify(interpolated['watch']) || !should_run? + return unless check_path_existance(true) + if File.directory?(expanded_path) + Dir.glob(File.join(expanded_path, '*')).select { |f| File.file?(f) } + else + [expanded_path] + end.each do |file| + create_event payload: get_file_pointer(file) + end + end + + def receive(incoming_events) + return if interpolated['mode'] != 'write' || !should_run? + incoming_events.each do |event| + mo = interpolated(event) + File.open(File.expand_path(mo['path']), boolify(mo['append']) ? 'a' : 'w') do |file| + file.write(mo['data']) + end + end + end + + def start_worker? + interpolated['mode'] == 'read' && boolify(interpolated['watch']) && should_run? && check_path_existance + end + + def check_path_existance(log = true) + if !File.exist?(expanded_path) + error("File or directory '#{expanded_path}' does not exist") if log + return false + end + true + end + + def get_io(file) + File.open(file, 'r') + end + + def expanded_path + @expanded_path ||= File.expand_path(interpolated['path']) + end + + private + + def should_run?(log = true) + if self.class.should_run? + true + else + error("Unable to run because insecure agents are not enabled. Set ENABLE_INSECURE_AGENTS to true in the Huginn .env configuration.") if log + false + end + end + + class Worker < LongRunnable::Worker + def setup + require 'listen' + @listener = Listen.to(*listen_options, &method(:callback)) + end + + def run + sleep unless agent.check_path_existance(true) + + @listener.start + sleep + end + + def stop + @listener.stop + end + + private + + def callback(*changes) + AgentRunner.with_connection do + changes.zip([:modified, :added, :removed]).each do |files, event_type| + files.each do |file| + agent.create_event payload: agent.get_file_pointer(file).merge(event_type: event_type) + end + end + agent.touch(:last_check_at) + end + end + + def listen_options + if File.directory?(agent.expanded_path) + [agent.expanded_path, ignore!: [] ] + else + [File.dirname(agent.expanded_path), { ignore!: [], only: /\A#{Regexp.escape(File.basename(agent.expanded_path))}\z/ } ] + end + end + end + end +end diff --git a/app/models/agents/read_file_agent.rb b/app/models/agents/read_file_agent.rb new file mode 100644 index 00000000..a7903408 --- /dev/null +++ b/app/models/agents/read_file_agent.rb @@ -0,0 +1,50 @@ +module Agents + class ReadFileAgent < Agent + include FormConfigurable + include FileHandling + + cannot_be_scheduled! + consumes_file_pointer! + + def default_options + { + 'data_key' => 'data' + } + end + + description do + <<-MD + The ReadFileAgent takes events from `FileHandling` agents, reads the file, and emits the contents as a string. + + `data_key` specifies the key of the emitted event which contains the file contents. + + #{receiving_file_handling_agent_description} + MD + end + + event_description <<-MD + { + "data" => '...' + } + MD + + form_configurable :data_key, type: :string + + def validate_options + if options['data_key'].blank? + errors.add(:base, "The 'data_key' options is required.") + end + end + + def working? + received_event_without_error? + end + + def receive(incoming_events) + incoming_events.each do |event| + next unless io = get_io(event) + create_event payload: { interpolated['data_key'] => io.read } + end + end + end +end diff --git a/app/models/agents/s3_agent.rb b/app/models/agents/s3_agent.rb new file mode 100644 index 00000000..850ed1c7 --- /dev/null +++ b/app/models/agents/s3_agent.rb @@ -0,0 +1,206 @@ +module Agents + class S3Agent < Agent + include FormConfigurable + include FileHandling + + emits_file_pointer! + no_bulk_receive! + + default_schedule 'every_1h' + + gem_dependency_check { defined?(Aws::S3) } + + description do + <<-MD + The S3Agent can watch a bucket for changes or emit an event for every file in that bucket. When receiving events, it writes the data into a file on S3. + + #{'## Include `aws-sdk-core` in your Gemfile to use this Agent!' if dependencies_missing?} + + `mode` must be present and either `read` or `write`, in `read` mode the agent checks the S3 bucket for changed files, with `write` it writes received events to a file in the bucket. + + ### Universal options + + To use credentials for the `access_key` and `access_key_secret` use the liquid `credential` tag like so `{% credential name-of-credential %}` + + Select the `region` in which the bucket was created. + + ### Reading + + When `watch` is set to `true` the S3Agent will watch the specified `bucket` for changes. An event will be emitted for every detected change. + + When `watch` is set to `false` the agent will emit an event for every file in the bucket on each sheduled run. + + #{emitting_file_handling_agent_description} + + ### Writing + + Specify the filename to use in `filename`, Liquid interpolation is possible to change the name per event. + + Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) templating in `data` to specify which part of the received event should be written. + MD + end + + event_description do + "Events will looks like this:\n\n %s" % if boolify(interpolated['watch']) + Utils.pretty_print({ + "file_pointer" => { + "file" => "filename", + "agent_id" => id + }, + "event_type" => "modified/added/removed" + }) + else + Utils.pretty_print({ + "file_pointer" => { + "file" => "filename", + "agent_id" => id + } + }) + end + end + + def default_options + { + 'mode' => 'read', + 'access_key_id' => '', + 'access_key_secret' => '', + 'watch' => 'true', + 'bucket' => "", + 'data' => '{{ data }}' + } + end + + form_configurable :mode, type: :array, values: %w(read write) + form_configurable :access_key_id, roles: :validatable + form_configurable :access_key_secret, roles: :validatable + form_configurable :region, type: :array, values: %w(us-east-1 us-west-1 us-west-2 eu-west-1 eu-central-1 ap-southeast-1 ap-southeast-2 ap-northeast-1 ap-northeast-2 sa-east-1) + form_configurable :watch, type: :array, values: %w(true false) + form_configurable :bucket, roles: :completable + form_configurable :filename + form_configurable :data + + def validate_options + if options['mode'].blank? || !['read', 'write'].include?(options['mode']) + errors.add(:base, "The 'mode' option is required and must be set to 'read' or 'write'") + end + if options['bucket'].blank? + errors.add(:base, "The 'bucket' option is required.") + end + if options['region'].blank? + errors.add(:base, "The 'region' option is required.") + end + + case interpolated['mode'] + when 'read' + if options['watch'].blank? || ![true, false].include?(boolify(options['watch'])) + errors.add(:base, "The 'watch' option is required and must be set to 'true' or 'false'") + end + when 'write' + if options['filename'].blank? + errors.add(:base, "filename must be specified in 'write' mode") + end + if options['data'].blank? + errors.add(:base, "data must be specified in 'write' mode") + end + end + end + + def validate_access_key_id + !!buckets + end + + def validate_access_key_secret + !!buckets + end + + def complete_bucket + (buckets || []).collect { |room| {text: room.name, id: room.name} } + end + + def working? + checked_without_error? + end + + def check + return if interpolated['mode'] != 'read' + contents = safely do + get_bucket_contents + end + if boolify(interpolated['watch']) + watch(contents) + else + contents.each do |key, _| + create_event payload: get_file_pointer(key) + end + end + end + + def get_io(file) + client.get_object(bucket: interpolated['bucket'], key: file).body + end + + def receive(incoming_events) + return if interpolated['mode'] != 'write' + incoming_events.each do |event| + safely do + mo = interpolated(event) + client.put_object(bucket: mo['bucket'], key: mo['filename'], body: mo['data']) + end + end + end + + private + + def safely + yield + rescue Aws::S3::Errors::AccessDenied => e + error("Could not access '#{interpolated['bucket']}' #{e.class} #{e.message}") + rescue Aws::S3::Errors::ServiceError =>e + error("#{e.class}: #{e.message}") + end + + def watch(contents) + if last_check_at.nil? + self.memory['seen_contents'] = contents + return + end + + new_memory = contents.dup + + memory['seen_contents'].each do |key, etag| + if contents[key].blank? + create_event payload: get_file_pointer(key).merge(event_type: :removed) + elsif contents[key] != etag + create_event payload: get_file_pointer(key).merge(event_type: :modified) + end + contents.delete(key) + end + contents.each do |key, etag| + create_event payload: get_file_pointer(key).merge(event_type: :added) + end + + self.memory['seen_contents'] = new_memory + end + + def get_bucket_contents + contents = {} + client.list_objects(bucket: interpolated['bucket']).each do |response| + response.contents.each do |file| + contents[file.key] = file.etag + end + end + contents + end + + def client + @client ||= Aws::S3::Client.new(credentials: Aws::Credentials.new(interpolated['access_key_id'], interpolated['access_key_secret']), + region: interpolated['region']) + end + + def buckets(log = false) + @buckets ||= client.list_buckets.buckets + rescue Aws::S3::Errors::ServiceError => e + false + end + end +end diff --git a/db/migrate/20160224120316_add_mode_option_to_ftpsite_agents.rb b/db/migrate/20160224120316_add_mode_option_to_ftpsite_agents.rb new file mode 100644 index 00000000..0061bf4a --- /dev/null +++ b/db/migrate/20160224120316_add_mode_option_to_ftpsite_agents.rb @@ -0,0 +1,15 @@ +class AddModeOptionToFtpsiteAgents < ActiveRecord::Migration + def up + Agents::FtpsiteAgent.find_each do |agent| + agent.options['mode'] = 'read' + agent.save!(validate: false) + end + end + + def down + Agents::FtpsiteAgent.find_each do |agent| + agent.options.delete 'mode' + agent.save!(validate: false) + end + end +end diff --git a/lib/agent_runner.rb b/lib/agent_runner.rb index ff99ea80..815f0692 100644 --- a/lib/agent_runner.rb +++ b/lib/agent_runner.rb @@ -118,5 +118,6 @@ end require 'agents/twitter_stream_agent' require 'agents/jabber_agent' +require 'agents/local_file_agent' require 'huginn_scheduler' require 'delayed_job_worker' diff --git a/lib/utils.rb b/lib/utils.rb index 797e6857..84a1a40d 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -15,7 +15,7 @@ module Utils def self.pretty_print(struct, indent = true) output = JSON.pretty_generate(struct) if indent - output.gsub(/\n/i, "\n ").tap { |a| p a } + output.gsub(/\n/i, "\n ") else output end diff --git a/spec/env.test b/spec/env.test index c723aab9..914e9bb6 100644 --- a/spec/env.test +++ b/spec/env.test @@ -12,3 +12,4 @@ EVERNOTE_OAUTH_KEY=evernoteoauthkey EVERNOTE_OAUTH_SECRET=evernoteoauthsecret FAILED_JOBS_TO_KEEP=2 REQUIRE_CONFIRMED_EMAIL=false +ENABLE_INSECURE_AGENTS=true diff --git a/spec/models/agents/csv_agent_spec.rb b/spec/models/agents/csv_agent_spec.rb new file mode 100644 index 00000000..322f1801 --- /dev/null +++ b/spec/models/agents/csv_agent_spec.rb @@ -0,0 +1,244 @@ +require 'rails_helper' + +describe Agents::CsvAgent do + before(:each) do + @valid_params = { + 'mode' => 'parse', + 'separator' => ',', + 'use_fields' => '', + 'output' => 'event_per_row', + 'with_header' => 'true', + 'data_path' => '$.data', + 'data_key' => 'data' + } + + @checker = Agents::CsvAgent.new(:name => 'somename', :options => @valid_params) + @checker.user = users(:jane) + @checker.save! + @lfa = Agents::LocalFileAgent.new(name: 'local', options: {path: '{{}}', watch: 'false', append: 'false', mode: 'read'}) + @lfa.user = users(:jane) + @lfa.save! + end + + it_behaves_like 'FileHandlingConsumer' + + context '#validate_options' do + it 'is valid with the given options' do + expect(@checker).to be_valid + end + + it "requires with_header to be either 'true' or 'false'" do + @checker.options['with_header'] = 'true' + expect(@checker).to be_valid + @checker.options['with_header'] = 'false' + expect(@checker).to be_valid + @checker.options['with_header'] = 'test' + expect(@checker).not_to be_valid + end + + it "data_path has to be set in serialize mode" do + @checker.options['mode'] = 'serialize' + @checker.options['data_path'] = '' + expect(@checker).not_to be_valid + end + end + + context '#working' do + it 'is not working without having received an event' do + expect(@checker).not_to be_working + end + + it 'is working after receiving an event without error' do + @checker.last_receive_at = Time.now + expect(@checker).to be_working + end + end + + context '#receive' do + after(:all) do + FileUtils.rm(File.join(Rails.root, 'tmp', 'csv')) + end + + def event_with_contents(contents) + path = File.join(Rails.root, 'tmp', 'csv') + File.open(path, 'w') do |f| + f.write(contents) + end + Event.new(payload: { 'file_pointer' => {'agent_id' => @lfa.id, 'file' => path } }, user_id: @checker.user_id) + end + + context "agent options" do + let(:with_headers) { event_with_contents("one,two\n1,2\n2,3") } + let(:without_headers) { event_with_contents("1,2\n2,3") } + + context "output" do + it "creates one event per row" do + @checker.options['output'] = 'event_per_row' + expect { @checker.receive([with_headers]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'}) + end + + it "creates one event per file" do + @checker.options['output'] = 'event_per_file' + expect { @checker.receive([with_headers]) }.to change(Event, :count).by(1) + expect(Event.last.payload).to eq(@checker.options['data_key'] => [{"one"=>"1", "two"=>"2"}, {"one"=>"2", "two"=>"3"}]) + end + end + + context "with_header" do + it "works without headers" do + @checker.options['with_header'] = 'false' + expect { @checker.receive([without_headers]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq({@checker.options['data_key']=>["2", "3"]}) + end + + it "works without headers and event_per_file" do + @checker.options['with_header'] = 'false' + @checker.options['output'] = 'event_per_file' + expect { @checker.receive([without_headers]) }.to change(Event, :count).by(1) + expect(Event.last.payload).to eq({@checker.options['data_key']=>[['1', '2'], ["2", "3"]]}) + end + end + + context "use_fields" do + it "extracts the specified columns" do + @checker.options['use_fields'] = 'one' + expect { @checker.receive([with_headers]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2'}) + end + end + + context "data_path" do + it "can receive the CSV via a regular event" do + @checker.options['data_path'] = '$.data' + event = Event.new(payload: {'data' => "one,two\r\n1,2\r\n2,3"}) + expect { @checker.receive([event]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'}) + end + end + end + + context "handling different CSV formats" do + it "works with windows line endings" do + event = event_with_contents("one,two\r\n1,2\r\n2,3") + expect { @checker.receive([event]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'}) + end + + it "works with OSX line endings" do + event = event_with_contents("one,two\r1,2\r2,3") + expect { @checker.receive([event]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'}) + end + + it "handles quotes correctly" do + event = event_with_contents("\"one\",\"two\"\n1,2\n\"\"2, two\",3") + expect { @checker.receive([event]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '"2, two', 'two' => '3'}) + end + + it "works with tab seperated csv" do + event = event_with_contents("one\ttwo\r\n1\t2\r\n2\t3") + @checker.options['separator'] = '\\t' + expect { @checker.receive([event]) }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'}) + end + end + + context "serializing" do + before(:each) do + @checker.options['mode'] = 'serialize' + @checker.options['data_path'] = '$.data' + @checker.options['data_key'] = 'data' + end + + it "writes headers when with_header is true" do + event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n") + end + + it "writes one row per received event" do + event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} }) + event2 = Event.new(payload: { 'data' => {'key' => '2value', 'key2' => '2value2', 'key3' => '2value3'} }) + expect { @checker.receive([event, event2])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n\"2value\",\"2value2\",\"2value3\"\n") + end + + it "accepts multiple rows per event" do + event = Event.new(payload: { 'data' => [{'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'}, {'key' => '2value', 'key2' => '2value2', 'key3' => '2value3'}] }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n\"2value\",\"2value2\",\"2value3\"\n") + end + + it "does not write the headers when with_header is false" do + @checker.options['with_header'] = 'false' + event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"value\",\"value2\",\"value3\"\n") + end + + it "only serialize the keys specified in use_fields" do + @checker.options['use_fields'] = 'key2, key3' + event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"key2\",\"key3\"\n\"value2\",\"value3\"\n") + end + + it "respects the order of use_fields" do + @checker.options['use_fields'] = 'key3, key' + event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"key3\",\"key\"\n\"value3\",\"value\"\n") + end + + it "respects use_fields and writes no header" do + @checker.options['with_header'] = 'false' + @checker.options['use_fields'] = 'key2, key3' + event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"value2\",\"value3\"\n") + end + + context "arrays" do + it "does not write a header" do + @checker.options['with_header'] = 'false' + event = Event.new(payload: { 'data' => ['value1', 'value2'] }) + event2 = Event.new(payload: { 'data' => ['value3', 'value4'] }) + expect { @checker.receive([event, event2])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"value1\",\"value2\"\n\"value3\",\"value4\"\n") + end + + it "handles nested arrays" do + event = Event.new(payload: { 'data' => [['value1', 'value2'], ['value3', 'value4']] }) + expect { @checker.receive([event])}.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => "\"value1\",\"value2\"\n\"value3\",\"value4\"\n") + end + end + end + end + + context '#event_description' do + it "works with event_per_row and headers" do + @checker.options['output'] = 'event_per_row' + @checker.options['with_header'] = 'true' + description = @checker.event_description + expect(description).not_to match(/\n\s+\[\n/) + expect(description).to include(": {\n") + end + + it "works with event_per_file and without headers" do + @checker.options['output'] = 'event_per_file' + @checker.options['with_header'] = 'false' + description = @checker.event_description + expect(description).to match(/\n\s+\[\n/) + expect(description).not_to include(": {\n") + end + + it "shows dummy CSV when in serialize mode" do + @checker.options['mode'] = 'serialize' + description = @checker.event_description + expect(description).to include('"generated\",\"csv') + end + end +end diff --git a/spec/models/agents/ftpsite_agent_spec.rb b/spec/models/agents/ftpsite_agent_spec.rb index 0609a6f3..cf0e2a5e 100644 --- a/spec/models/agents/ftpsite_agent_spec.rb +++ b/spec/models/agents/ftpsite_agent_spec.rb @@ -8,12 +8,74 @@ describe Agents::FtpsiteAgent do 'expected_update_period_in_days' => 1, 'url' => "ftp://ftp.example.org/pub/releases/", 'patterns' => ["example*.tar.gz"], + 'mode' => 'read', + 'filename' => 'test', + 'data' => '{{ data }}' } @checker = Agents::FtpsiteAgent.new(:name => "Example", :options => @site, :keep_events_for => 2.days) @checker.user = users(:bob) @checker.save! end + context "#validate_options" do + it "requires url to be a valid URI" do + @checker.options['url'] = 'not_valid' + expect(@checker).not_to be_valid + end + + it "allows an URI without a path" do + @checker.options['url'] = 'ftp://ftp.example.org' + expect(@checker).to be_valid + end + + it "does not check the url when liquid output markup is used" do + @checker.options['url'] = 'ftp://{{ ftp_host }}' + expect(@checker).to be_valid + end + + it "requires patterns to be present and not empty array" do + @checker.options['patterns'] = '' + expect(@checker).not_to be_valid + @checker.options['patterns'] = 'not an array' + expect(@checker).not_to be_valid + @checker.options['patterns'] = [] + expect(@checker).not_to be_valid + end + + it "when present timestamp must be parsable into a Time object instance" do + @checker.options['timestamp'] = '2015-01-01 00:00:01' + expect(@checker).to be_valid + @checker.options['timestamp'] = 'error' + expect(@checker).not_to be_valid + end + + it "requires mode to be set to 'read' or 'write'" do + @checker.options['mode'] = 'write' + expect(@checker).to be_valid + @checker.options['mode'] = '' + expect(@checker).not_to be_valid + end + + it 'automatically sets mode to read when the agent is a new record' do + checker = Agents::FtpsiteAgent.new(name: 'test', options: @site.except('mode')) + checker.user = users(:bob) + expect(checker).to be_valid + expect(checker.options['mode']).to eq('read') + end + + it "requires 'filename' in 'write' mode" do + @checker.options['mode'] = 'write' + @checker.options['filename'] = '' + expect(@checker).not_to be_valid + end + + it "requires 'data' in 'write' mode" do + @checker.options['mode'] = 'write' + @checker.options['data'] = '' + expect(@checker).not_to be_valid + end + end + describe "#check" do before do @@ -42,6 +104,7 @@ describe Agents::FtpsiteAgent do } expect(Event.last(2).first.payload).to eq({ + 'file_pointer' => { 'file' => 'example-1.1.tar.gz', 'agent_id' => @checker.id }, 'url' => 'ftp://ftp.example.org/pub/releases/example-1.1.tar.gz', 'filename' => 'example-1.1.tar.gz', 'timestamp' => '2014-04-01T10:00:00Z', @@ -71,12 +134,14 @@ describe Agents::FtpsiteAgent do } expect(Event.last(2).first.payload).to eq({ + 'file_pointer' => { 'file' => 'example-1.2.tar.gz', 'agent_id' => @checker.id }, 'url' => 'ftp://ftp.example.org/pub/releases/example-1.2.tar.gz', 'filename' => 'example-1.2.tar.gz', 'timestamp' => '2014-04-02T10:00:00Z', }) expect(Event.last.payload).to eq({ + 'file_pointer' => { 'file' => 'example latest.tar.gz', 'agent_id' => @checker.id }, 'url' => 'ftp://ftp.example.org/pub/releases/example%20latest.tar.gz', 'filename' => 'example latest.tar.gz', 'timestamp' => '2014-04-02T10:00:01Z', @@ -113,5 +178,83 @@ describe Agents::FtpsiteAgent do end end + context "#open_ftp" do + before(:each) do + @ftp_mock = mock() + mock(@ftp_mock).close + mock(@ftp_mock).connect('ftp.example.org', 21) + mock(@ftp_mock).passive=(true) + mock(Net::FTP).new { @ftp_mock } + end + context 'with_path' do + before(:each) { mock(@ftp_mock).chdir('pub/releases') } + + it "logs in as anonymous when no user and password are given" do + mock(@ftp_mock).login('anonymous', 'anonymous@') + expect { |b| @checker.open_ftp(@checker.base_uri, &b) }.to yield_with_args(@ftp_mock) + end + + it "passes the provided user and password" do + @checker.options['url'] = "ftp://user:password@ftp.example.org/pub/releases/" + mock(@ftp_mock).login('user', 'password') + expect { |b| @checker.open_ftp(@checker.base_uri, &b) }.to yield_with_args(@ftp_mock) + end + end + + it "does not call chdir when no path is given" do + @checker.options['url'] = "ftp://ftp.example.org/" + mock(@ftp_mock).login('anonymous', 'anonymous@') + expect { |b| @checker.open_ftp(@checker.base_uri, &b) }.to yield_with_args(@ftp_mock) + end + end + + context "#get_io" do + it "returns the contents of the file" do + ftp_mock= mock() + mock(ftp_mock).getbinaryfile('file', nil).yields('data') + mock(@checker).open_ftp(@checker.base_uri).yields(ftp_mock) + expect(@checker.get_io('file').read).to eq('data') + end + + it "uses the encoding specified in force_encoding to convert the data to UTF-8" do + ftp_mock= mock() + mock(ftp_mock).getbinaryfile('file', nil).yields('ümlaut'.force_encoding('ISO-8859-15')) + mock(@checker).open_ftp(@checker.base_uri).yields(ftp_mock) + expect(@checker.get_io('file').read).to eq('ümlaut') + end + + it "returns an empty StringIO instance when no data was read" do + ftp_mock= mock() + mock(ftp_mock).getbinaryfile('file', nil) + mock(@checker).open_ftp(@checker.base_uri).yields(ftp_mock) + expect(@checker.get_io('file').length).to eq(0) + end + end + + context "#receive" do + before(:each) do + @checker.options['mode'] = 'write' + @checker.options['filename'] = 'file.txt' + @checker.options['data'] = '{{ data }}' + @ftp_mock= mock() + @stringio = StringIO.new() + mock(@checker).open_ftp(@checker.base_uri).yields(@ftp_mock) + end + + it "writes the data at data into a file" do + mock(StringIO).new('hello world🔥') { @stringio } + mock(@ftp_mock).storbinary('STOR file.txt', @stringio, Net::FTP::DEFAULT_BLOCKSIZE) + event = Event.new(payload: {'data' => 'hello world🔥'}) + @checker.receive([event]) + end + + it "converts the string encoding when force_encoding is specified" do + @checker.options['force_encoding'] = 'ISO-8859-1' + mock(StringIO).new('hello world?') { @stringio } + mock(@ftp_mock).storbinary('STOR file.txt', @stringio, Net::FTP::DEFAULT_BLOCKSIZE) + event = Event.new(payload: {'data' => 'hello world🔥'}) + @checker.receive([event]) + end + end end end diff --git a/spec/models/agents/local_file_agent_spec.rb b/spec/models/agents/local_file_agent_spec.rb new file mode 100644 index 00000000..f059b2cb --- /dev/null +++ b/spec/models/agents/local_file_agent_spec.rb @@ -0,0 +1,276 @@ +require 'rails_helper' + +describe Agents::LocalFileAgent do + before(:each) do + @valid_params = { + 'mode' => 'read', + 'watch' => 'false', + 'append' => 'false', + 'path' => File.join(Rails.root, 'tmp', 'spec') + } + FileUtils.mkdir_p File.join(Rails.root, 'tmp', 'spec') + + @checker = Agents::LocalFileAgent.new(:name => "somename", :options => @valid_params) + @checker.user = users(:jane) + @checker.save! + end + + after(:all) do + FileUtils.rm_r File.join(Rails.root, 'tmp', 'spec') + end + + describe "#validate_options" do + it "is valid with the given options" do + expect(@checker).to be_valid + end + + it "requires mode to be either 'read' or 'write'" do + @checker.options['mode'] = 'write' + expect(@checker).to be_valid + @checker.options['mode'] = 'write' + expect(@checker).to be_valid + @checker.options['mode'] = 'test' + expect(@checker).not_to be_valid + end + + it "requires the path to be set" do + @checker.options['path'] = '' + expect(@checker).not_to be_valid + end + + it "requires watch to be present" do + @checker.options['watch'] = '' + expect(@checker).not_to be_valid + end + + it "requires watch to be either 'true' or 'false'" do + @checker.options['watch'] = 'true' + expect(@checker).to be_valid + @checker.options['watch'] = 'false' + expect(@checker).to be_valid + @checker.options['watch'] = 'test' + expect(@checker).not_to be_valid + end + + it "requires append to be either 'true' or 'false'" do + @checker.options['append'] = 'true' + expect(@checker).to be_valid + @checker.options['append'] = 'false' + expect(@checker).to be_valid + @checker.options['append'] = 'test' + expect(@checker).not_to be_valid + end + end + + context "#working" do + it "is working with no recent errors in read mode" do + @checker.last_check_at = Time.now + expect(@checker).to be_working + end + + it "is working with no recent errors in write mode" do + @checker.options['mode'] = 'write' + @checker.last_receive_at = Time.now + expect(@checker).to be_working + end + end + + context "#check_path_existance" do + it "is truethy when the path exists" do + expect(@checker.check_path_existance).to be_truthy + end + + it "is falsy when the path does not exist" do + @checker.options['path'] = '/doesnotexist' + expect(@checker.check_path_existance).to be_falsy + end + + it "create a log entry" do + @checker.options['path'] = '/doesnotexist' + expect { @checker.check_path_existance(true) }.to change(AgentLog, :count).by(1) + end + + it "works with non-expanded paths" do + @checker.options['path'] = '~' + expect(@checker.check_path_existance).to be_truthy + end + end + + def with_files(*files) + files.each { |f| FileUtils.touch(f) } + yield + files.each { |f| FileUtils.rm(f) } + end + + context "#check" do + it "does not create events when the directory is empty" do + expect { @checker.check }.to change(Event, :count).by(0) + end + + it "creates an event for every file in the directory" do + with_files(File.join(Rails.root, 'tmp', 'spec', 'one'), File.join(Rails.root, 'tmp', 'spec', 'two')) do + expect { @checker.check }.to change(Event, :count).by(2) + expect(Event.last.payload.has_key?('file_pointer')).to be_truthy + end + end + + it "creates an event if the configured file exists" do + @checker.options['path'] = File.join(Rails.root, 'tmp', 'spec', 'one') + with_files(File.join(Rails.root, 'tmp', 'spec', 'one'), File.join(Rails.root, 'tmp', 'spec', 'two')) do + expect { @checker.check }.to change(Event, :count).by(1) + payload = Event.last.payload + expect(payload.has_key?('file_pointer')).to be_truthy + expect(payload['file_pointer']['file']).to eq(@checker.options['path']) + end + end + + it "does not run when ENABLE_INSECURE_AGENTS is not set to true" do + ENV['ENABLE_INSECURE_AGENTS'] = 'false' + expect { @checker.check }.to change(AgentLog, :count).by(1) + ENV['ENABLE_INSECURE_AGENTS'] = 'true' + end + end + + context "#event_description" do + it "should include event_type when watch is set to true" do + @checker.options['watch'] = 'true' + expect(@checker.event_description).to include('event_type') + end + + it "should not include event_type when watch is set to false" do + @checker.options['watch'] = 'false' + expect(@checker.event_description).not_to include('event_type') + end + end + + it "get_io opens the file" do + mock(File).open('test', 'r') + @checker.get_io('test') + end + + context "#start_worker?" do + it "reeturns true when watch is true" do + @checker.options['watch'] = 'true' + expect(@checker.start_worker?).to be_truthy + end + + it "returns false when watch is false" do + @checker.options['watch'] = 'false' + expect(@checker.start_worker?).to be_falsy + end + end + + context "#receive" do + before(:each) do + @checker.options['mode'] = 'write' + @checker.options['data'] = '{{ data }}' + @file_mock = mock() + end + + it "writes the data at data into a file" do + mock(@file_mock).write('hello world') + event = Event.new(payload: {'data' => 'hello world'}) + mock(File).open(File.join(Rails.root, 'tmp', 'spec'), 'w').yields @file_mock + @checker.receive([event]) + end + + it "appends the data at data onto a file" do + mock(@file_mock).write('hello world') + @checker.options['append'] = 'true' + event = Event.new(payload: {'data' => 'hello world'}) + mock(File).open(File.join(Rails.root, 'tmp', 'spec'), 'a').yields @file_mock + @checker.receive([event]) + end + + it "does not receive when ENABLE_INSECURE_AGENTS is not set to true" do + ENV['ENABLE_INSECURE_AGENTS'] = 'false' + expect { @checker.receive([]) }.to change(AgentLog, :count).by(1) + ENV['ENABLE_INSECURE_AGENTS'] = 'true' + end + end + + describe describe Agents::LocalFileAgent::Worker do + require 'listen' + + before(:each) do + @checker.options['watch'] = true + @checker.save + @worker = Agents::LocalFileAgent::Worker.new(agent: @checker) + @listen_mock = mock() + end + + context "#setup" do + it "initializes the listen gem" do + mock(Listen).to(@checker.options['path'], ignore!: []) + @worker.setup + end + end + + context "#run" do + before(:each) do + stub(Listen).to { @listen_mock } + @worker.setup + end + + it "starts to listen to changes in the directory when the path is present" do + mock(@worker).sleep + mock(@listen_mock).start + @worker.run + end + + it "does nothing when the path does not exist" do + mock(@worker.agent).check_path_existance(true) { false } + dont_allow(@listen_mock).start + mock(@worker).sleep { raise "Sleeping" } + expect { @worker.run }.to raise_exception(RuntimeError, 'Sleeping') + end + end + + context "#stop" do + it "stops the listen gem" do + stub(Listen).to { @listen_mock } + @worker.setup + mock(@listen_mock).stop + @worker.stop + end + end + + context "#callback" do + let(:file) { File.join(Rails.root, 'tmp', 'one') } + let(:file2) { File.join(Rails.root, 'tmp', 'one2') } + + it "creates an event for modifies files" do + expect { @worker.send(:callback, [file], [], [])}.to change(Event, :count).by(1) + payload = Event.last.payload + expect(payload['event_type']).to eq('modified') + end + + it "creates an event for modifies files" do + expect { @worker.send(:callback, [], [file], [])}.to change(Event, :count).by(1) + payload = Event.last.payload + expect(payload['event_type']).to eq('added') + end + + it "creates an event for modifies files" do + expect { @worker.send(:callback, [], [], [file])}.to change(Event, :count).by(1) + payload = Event.last.payload + expect(payload['event_type']).to eq('removed') + end + + it "creates an event each changed file" do + expect { @worker.send(:callback, [], [file], [file2])}.to change(Event, :count).by(2) + end + end + + context "#listen_options" do + it "returns the path when a directory is given" do + expect(@worker.send(:listen_options)).to eq([File.join(Rails.root, 'tmp', 'spec'), ignore!: []]) + end + + it "restricts to only the specified filename" do + @worker.agent.options['path'] = File.join(Rails.root, 'tmp', 'one') + expect(@worker.send(:listen_options)).to eq([File.join(Rails.root, 'tmp'), { only: /\Aone\z/, ignore!: [] } ]) + end + end + end +end diff --git a/spec/models/agents/read_file_agent_spec.rb b/spec/models/agents/read_file_agent_spec.rb new file mode 100644 index 00000000..678003ab --- /dev/null +++ b/spec/models/agents/read_file_agent_spec.rb @@ -0,0 +1,47 @@ +require 'rails_helper' + +describe Agents::ReadFileAgent do + before(:each) do + @valid_params = { + 'data_key' => 'data', + } + + @checker = Agents::ReadFileAgent.new(:name => 'somename', :options => @valid_params) + @checker.user = users(:jane) + @checker.save! + end + + it_behaves_like 'FileHandlingConsumer' + + context '#validate_options' do + it 'is valid with the given options' do + expect(@checker).to be_valid + end + + it "requires data_key to be present" do + @checker.options['data_key'] = '' + expect(@checker).not_to be_valid + end + end + + context '#working' do + it 'is not working without having received an event' do + expect(@checker).not_to be_working + end + + it 'is working after receiving an event without error' do + @checker.last_receive_at = Time.now + expect(@checker).to be_working + end + end + + context '#receive' do + it "emits an event with the contents of the receives files" do + event = Event.new(payload: {file_pointer: {agent_id: 111, file: 'test'}}) + io_mock = mock() + mock(@checker).get_io(event) { StringIO.new("testdata") } + expect { @checker.receive([event]) }.to change(Event, :count).by(1) + expect(Event.last.payload).to eq('data' => 'testdata') + end + end +end diff --git a/spec/models/agents/s3_agent_spec.rb b/spec/models/agents/s3_agent_spec.rb new file mode 100644 index 00000000..4fecf59a --- /dev/null +++ b/spec/models/agents/s3_agent_spec.rb @@ -0,0 +1,220 @@ +require 'rails_helper' + +describe Agents::S3Agent do + before(:each) do + @valid_params = { + 'mode' => 'read', + 'access_key_id' => '32343242', + 'access_key_secret' => '1231312', + 'watch' => 'false', + 'bucket' => 'testbucket', + 'region' => 'us-east-1', + 'filename' => 'test.txt', + 'data' => '{{ data }}' + } + + @checker = Agents::S3Agent.new(:name => "somename", :options => @valid_params) + @checker.user = users(:jane) + @checker.save! + end + + describe "#validate_options" do + it "requires the bucket to be set" do + @checker.options['bucket'] = '' + expect(@checker).not_to be_valid + end + + it "requires watch to be present" do + @checker.options['watch'] = '' + expect(@checker).not_to be_valid + end + + it "requires watch to be either 'true' or 'false'" do + @checker.options['watch'] = 'true' + expect(@checker).to be_valid + @checker.options['watch'] = 'false' + expect(@checker).to be_valid + @checker.options['watch'] = 'test' + expect(@checker).not_to be_valid + end + + it "requires region to be present" do + @checker.options['region'] = '' + expect(@checker).not_to be_valid + end + + it "requires mode to be set to 'read' or 'write'" do + @checker.options['mode'] = 'write' + expect(@checker).to be_valid + @checker.options['mode'] = '' + expect(@checker).not_to be_valid + end + + it "requires 'filename' in 'write' mode" do + @checker.options['mode'] = 'write' + @checker.options['filename'] = '' + expect(@checker).not_to be_valid + end + + it "requires 'data' in 'write' mode" do + @checker.options['mode'] = 'write' + @checker.options['data'] = '' + expect(@checker).not_to be_valid + end + end + + describe "#validating" do + it "validates the key" do + mock(@checker).client { raise Aws::S3::Errors::SignatureDoesNotMatch.new('', '') } + expect(@checker.validate_access_key_id).to be_falsy + end + + it "validates the secret" do + mock(@checker).buckets { true } + expect(@checker.validate_access_key_secret).to be_truthy + end + end + + it "completes the buckets" do + mock(@checker).buckets { [OpenStruct.new(name: 'test'), OpenStruct.new(name: 'test2')]} + expect(@checker.complete_bucket).to eq([{text: 'test', id: 'test'}, {text: 'test2', id: 'test2'}]) + end + + context "#working" do + it "is working with no recent errors" do + @checker.last_check_at = Time.now + expect(@checker).to be_working + end + end + + context "#check" do + context "not watching" do + it "emits an event for every file" do + mock(@checker).get_bucket_contents { {"test"=>"231232", "test2"=>"4564545"} } + expect { @checker.check }.to change(Event, :count).by(2) + expect(Event.last.payload).to eq({"file_pointer" => {"file"=>"test2", "agent_id"=> @checker.id}}) + end + end + + context "watching" do + before(:each) do + @checker.options['watch'] = 'true' + end + + it "does not emit any events on the first run" do + contents = {"test"=>"231232", "test2"=>"4564545"} + mock(@checker).get_bucket_contents { contents } + expect { @checker.check }.not_to change(Event, :count) + expect(@checker.memory).to eq('seen_contents' => contents) + end + + context "detecting changes" do + before(:each) do + contents = {"test"=>"231232", "test2"=>"4564545"} + mock(@checker).get_bucket_contents { contents } + expect { @checker.check }.not_to change(Event, :count) + @checker.last_check_at = Time.now + end + + it "emits events for removed files" do + contents = {"test"=>"231232"} + mock(@checker).get_bucket_contents { contents } + expect { @checker.check }.to change(Event, :count).by(1) + expect(Event.last.payload).to eq({"file_pointer" => {"file" => "test2", "agent_id"=> @checker.id}, "event_type" => "removed"}) + end + + it "emits events for modified files" do + contents = {"test"=>"231232", "test2"=>"changed"} + mock(@checker).get_bucket_contents { contents } + expect { @checker.check }.to change(Event, :count).by(1) + expect(Event.last.payload).to eq({"file_pointer" => {"file" => "test2", "agent_id"=> @checker.id}, "event_type" => "modified"}) + end + it "emits events for added files" do + contents = {"test"=>"231232", "test2"=>"4564545", "test3" => "31231231"} + mock(@checker).get_bucket_contents { contents } + expect { @checker.check }.to change(Event, :count).by(1) + expect(Event.last.payload).to eq({"file_pointer" => {"file" => "test3", "agent_id"=> @checker.id}, "event_type" => "added"}) + end + end + + context "error handling" do + it "handles AccessDenied exceptions" do + mock(@checker).get_bucket_contents { raise Aws::S3::Errors::AccessDenied.new('', '') } + expect { @checker.check }.to change(AgentLog, :count).by(1) + expect(AgentLog.last.message).to eq("Could not access 'testbucket' Aws::S3::Errors::AccessDenied ") + end + + it "handles generic S3 exceptions" do + mock(@checker).get_bucket_contents { raise Aws::S3::Errors::PermanentRedirect.new('', 'error') } + expect { @checker.check }.to change(AgentLog, :count).by(1) + expect(AgentLog.last.message).to eq("Aws::S3::Errors::PermanentRedirect: error") + end + end + end + end + + it "get_io returns a StringIO object" do + stringio =StringIO.new + mock_response = mock() + mock(mock_response).body { stringio } + mock_client = mock() + mock(mock_client).get_object(bucket: 'testbucket', key: 'testfile') { mock_response } + mock(@checker).client { mock_client } + @checker.get_io('testfile') + end + + context "#get_bucket_contents" do + it "returns a hash with the contents of the bucket" do + mock_response = mock() + mock(mock_response).contents { [OpenStruct.new(key: 'test', etag: '231232'), OpenStruct.new(key: 'test2', etag: '4564545')] } + mock_client = mock() + mock(mock_client).list_objects(bucket: 'testbucket') { [mock_response] } + mock(@checker).client { mock_client } + expect(@checker.send(:get_bucket_contents)).to eq({"test"=>"231232", "test2"=>"4564545"}) + end + end + + context "#client" do + it "initializes the S3 client correctly" do + mock_credential = mock() + mock(Aws::Credentials).new('32343242', '1231312') { mock_credential } + mock(Aws::S3::Client).new(credentials: mock_credential, + region: 'us-east-1') + @checker.send(:client) + end + end + + context "#event_description" do + it "should include event_type when watch is set to true" do + @checker.options['watch'] = 'true' + expect(@checker.event_description).to include('event_type') + end + + it "should not include event_type when watch is set to false" do + @checker.options['watch'] = 'false' + expect(@checker.event_description).not_to include('event_type') + end + end + + context "#receive" do + before(:each) do + @checker.options['mode'] = 'write' + @checker.options['filename'] = 'file.txt' + @checker.options['data'] = '{{ data }}' + end + + it "writes the data at data into a file" do + client_mock = mock() + mock(client_mock).put_object(bucket: @checker.options['bucket'], key: @checker.options['filename'], body: 'hello world!') + mock(@checker).client { client_mock } + event = Event.new(payload: {'data' => 'hello world!'}) + @checker.receive([event]) + end + + it "does nothing when mode is set to 'read'" do + @checker.options['mode'] = 'read' + event = Event.new(payload: {'data' => 'hello world!'}) + @checker.receive([event]) + end + end +end diff --git a/spec/support/shared_examples/file_handling_consumer.rb b/spec/support/shared_examples/file_handling_consumer.rb new file mode 100644 index 00000000..a864bb2e --- /dev/null +++ b/spec/support/shared_examples/file_handling_consumer.rb @@ -0,0 +1,16 @@ +require 'rails_helper' + +shared_examples_for 'FileHandlingConsumer' do + it 'returns a file pointer' do + expect(@checker.get_file_pointer('testfile')).to eq(file_pointer: { file: "testfile", agent_id: @checker.id}) + end + + it 'get_io raises an exception when trying to access an agent of a different user' do + @checker2 = @checker.dup + @checker2.user = users(:bob) + @checker2.save! + expect(@checker2.user.id).not_to eq(@checker.user.id) + event = Event.new(user: @checker.user, payload: {'file_pointer' => {'file' => 'test', 'agent_id' => @checker2.id}}) + expect { @checker.get_io(event) }.to raise_error(ActiveRecord::RecordNotFound) + end +end \ No newline at end of file diff --git a/spec/support/shared_examples/working_helpers.rb b/spec/support/shared_examples/working_helpers.rb index c15cd7ca..2df1cf72 100644 --- a/spec/support/shared_examples/working_helpers.rb +++ b/spec/support/shared_examples/working_helpers.rb @@ -50,4 +50,28 @@ shared_examples_for WorkingHelpers do expect(@agent.received_event_without_error?).to eq(true) end end + + describe "checked_without_error?" do + before do + @agent = described_class.new + end + + it "should return false until the first time check ran" do + expect(@agent.checked_without_error?).to eq(false) + @agent.last_check_at = Time.now + expect(@agent.checked_without_error?).to eq(true) + end + + it "should return false when the last error occured after the check" do + @agent.last_check_at = Time.now - 1.minute + @agent.last_error_log_at = Time.now + expect(@agent.checked_without_error?).to eq(false) + end + + it "should return true when the last check occured after the last error" do + @agent.last_check_at = Time.now + @agent.last_error_log_at = Time.now - 1.minute + expect(@agent.checked_without_error?).to eq(true) + end + end end