From 0eb956e110a33359efece38da97ff3187207b986 Mon Sep 17 00:00:00 2001 From: Andrew Cantino Date: Fri, 4 Oct 2013 22:24:21 -0700 Subject: [PATCH 1/7] HumanTaskAgent can now accept `combination_mode` and `poll_options` which are used to generate a poll from the HITs answers. The poll asks additional Mechanical Turk users to rate the responses, and then the best one is returned. This allows for an additional step of validation on user generated content / data. --- app/models/agents/human_task_agent.rb | 236 ++++++++++++++++---- spec/models/agents/human_task_agent_spec.rb | 189 ++++++++++++++-- 2 files changed, 365 insertions(+), 60 deletions(-) diff --git a/app/models/agents/human_task_agent.rb b/app/models/agents/human_task_agent.rb index 65a73c5f..0c861e1d 100644 --- a/app/models/agents/human_task_agent.rb +++ b/app/models/agents/human_task_agent.rb @@ -9,9 +9,13 @@ module Agents HITs can be created in response to events, or on a schedule. Set `trigger_on` to either `schedule` or `event`. + # Schedule + The schedule of this Agent is how often it should check for completed HITs, __NOT__ how often to submit one. To configure how often a new HIT should be submitted when in `schedule` mode, set `submission_period` to a number of hours. + # Example + If created with an event, all HIT fields can contain interpolated values via [JSONPaths](http://goessner.net/articles/JsonPath/) placed between < and > characters. For example, if the incoming event was a Twitter event, you could make a HITT to rate its sentiment like this: @@ -58,8 +62,50 @@ module Agents which contain `key` and `text`. For _free\\_text_, the special configuration options are all optional, and are `default`, `min_length`, and `max_length`. - If all of the `questions` are of `type` _selection_, you can set `take_majority` to _true_ at the top level to - automatically select the majority vote for each question across all `assignments`. If all selections are numeric, an `average_answer` will also be generated. + # Combining answers + + There are a couple of ways to combine HITs that have multiple `assignments`, all of which involve setting `combination_mode` at the top level. + + ## Taking the majority + + Option 1: if all of your `questions` are of `type` _selection_, you can set `combination_mode` to `take_majority`. + This will cause the Agent to automatically select the majority vote for each question across all `assignments` and return it as `majority_answer`. + If all selections are numeric, an `average_answer` will also be generated. + + Option 2: you can have the Agent ask additional human workers to rank the `assignments` and return the most highly ranked answer. + To do this, set `combination_mode` to `poll` and provide a `poll_options` object. Here is an example: + + { + "trigger_on": "schedule", + "submission_period": 12, + "combination_mode": "poll", + "poll_options": { + "title": "Take a poll about some jokes", + "instructions": "Please rank these jokes from most funny (5) to least funny (1)", + "assignments": 3, + "row_template": "<$.joke>" + }, + "hit": { + "assignments": 5, + "title": "Tell a joke", + "description": "Please tell me a joke", + "reward": 0.05, + "lifetime_in_seconds": "3600", + "questions": [ + { + "type": "free_text", + "key": "joke", + "name": "Your joke", + "required": "true", + "question": "Joke", + "min_length": "2", + "max_length": "2000" + } + ] + } + } + + # Other settings `lifetime_in_seconds` is the number of seconds a HIT is left on Amazon before it's automatically closed. The default is 1 day. @@ -70,6 +116,12 @@ module Agents Events look like: { + "answers": [ + { + "feedback": "Hello!", + "sentiment": "happy" + } + ] } MD @@ -97,9 +149,13 @@ module Agents errors.add(:base, "all questions of type 'selection' must have a selections array with selections that set 'key' and 'name'") end - if options[:take_majority] == "true" && options[:hit][:questions].any? { |question| question[:type] != "selection" } + if take_majority? && options[:hit][:questions].any? { |question| question[:type] != "selection" } errors.add(:base, "all questions must be of type 'selection' to use the 'take_majority' option") end + + if create_poll? + errors.add(:base, "poll_options is required when combination_mode is set to 'poll' and must have the keys 'title', 'instructions', 'row_template', and 'assignments'") unless options[:poll_options].is_a?(Hash) && options[:poll_options][:title].present? && options[:poll_options][:instructions].present? && options[:poll_options][:row_template].present? && options[:poll_options][:assignments].to_i > 0 + end end def default_options @@ -152,70 +208,153 @@ module Agents if options[:trigger_on] == "schedule" && (memory[:last_schedule] || 0) <= Time.now.to_i - options[:submission_period].to_i * 60 * 60 memory[:last_schedule] = Time.now.to_i - create_hit + create_basic_hit end end def receive(incoming_events) if options[:trigger_on] == "event" incoming_events.each do |event| - create_hit event + create_basic_hit event end end end protected + def take_majority? + options[:combination_mode] == "take_majority" || options[:take_majority] == "true" + end + + def create_poll? + options[:combination_mode] == "poll" + end + + def event_for_hit(hit_id) + if memory[:hits][hit_id.to_sym].is_a?(Hash) + Event.find_by_id(memory[:hits][hit_id.to_sym][:event_id]) + else + nil + end + end + + def hit_type(hit_id) + # Fix this: the Ruby process will slowly run out of RAM by symbolizing these unique keys. + if memory[:hits][hit_id.to_sym].is_a?(Hash) && memory[:hits][hit_id.to_sym][:type] + memory[:hits][hit_id.to_sym][:type].to_sym + else + :user + end + end + def review_hits reviewable_hit_ids = RTurk::GetReviewableHITs.create.hit_ids my_reviewed_hit_ids = reviewable_hit_ids & (memory[:hits] || {}).keys.map(&:to_s) if reviewable_hit_ids.length > 0 log "MTurk reports #{reviewable_hit_ids.length} HITs, of which I own [#{my_reviewed_hit_ids.to_sentence}]" end + my_reviewed_hit_ids.each do |hit_id| hit = RTurk::Hit.new(hit_id) assignments = hit.assignments log "Looking at HIT #{hit_id}. I found #{assignments.length} assignments#{" with the statuses: #{assignments.map(&:status).to_sentence}" if assignments.length > 0}" if assignments.length == hit.max_assignments && assignments.all? { |assignment| assignment.status == "Submitted" } - payload = { :answers => assignments.map(&:answers) } + inbound_event = event_for_hit(hit_id) - if options[:take_majority] == "true" - counts = {} - options[:hit][:questions].each do |question| - question_counts = question[:selections].inject({}) { |memo, selection| memo[selection[:key]] = 0; memo } - assignments.each do |assignment| - answers = ActiveSupport::HashWithIndifferentAccess.new(assignment.answers) - answer = answers[question[:key]] - question_counts[answer] += 1 + if hit_type(hit_id) == :poll + # handle completed polls + + log "Handling a poll: #{hit_id}" + + scores = {} + assignments.each do |assignment| + assignment.answers.each do |index, rating| + scores[index] ||= 0 + scores[index] += rating.to_i end - counts[question[:key]] = question_counts end - payload[:counts] = counts - majority_answer = counts.inject({}) do |memo, (key, question_counts)| - memo[key] = question_counts.to_a.sort {|a, b| a.last <=> b.last }.last.first - memo - end - payload[:majority_answer] = majority_answer + top_answer = scores.to_a.sort {|b, a| a.last <=> b.last }.first.first - if all_questions_are_numeric? - average_answer = counts.inject({}) do |memo, (key, question_counts)| - sum = divisor = 0 - question_counts.to_a.each do |num, count| - sum += num.to_s.to_f * count - divisor += count + payload = { + :answers => memory[:hits][hit_id.to_sym][:answers], + :poll => assignments.map(&:answers), + :best_answer => memory[:hits][hit_id.to_sym][:answers][top_answer.to_i - 1] + } + + event = create_event :payload => payload + log "Event emitted with answer(s) for poll", :outbound_event => event, :inbound_event => inbound_event + else + # handle normal completed HITs + payload = { :answers => assignments.map(&:answers) } + + if take_majority? + counts = {} + options[:hit][:questions].each do |question| + question_counts = question[:selections].inject({}) { |memo, selection| memo[selection[:key]] = 0; memo } + assignments.each do |assignment| + answers = ActiveSupport::HashWithIndifferentAccess.new(assignment.answers) + answer = answers[question[:key]] + question_counts[answer] += 1 end - memo[key] = sum / divisor.to_f + counts[question[:key]] = question_counts + end + payload[:counts] = counts + + majority_answer = counts.inject({}) do |memo, (key, question_counts)| + memo[key] = question_counts.to_a.sort {|a, b| a.last <=> b.last }.last.first memo end - payload[:average_answer] = average_answer + payload[:majority_answer] = majority_answer + + if all_questions_are_numeric? + average_answer = counts.inject({}) do |memo, (key, question_counts)| + sum = divisor = 0 + question_counts.to_a.each do |num, count| + sum += num.to_s.to_f * count + divisor += count + end + memo[key] = sum / divisor.to_f + memo + end + payload[:average_answer] = average_answer + end + end + + if create_poll? + questions = [] + selections = 5.times.map { |i| { :key => i+1, :text => i+1 } }.reverse + assignments.length.times do |index| + questions << { + :type => "selection", + :name => "Item #{index + 1}", + :key => index, + :required => "true", + :question => Utils.interpolate_jsonpaths(options[:poll_options][:row_template], assignments[index].answers), + :selections => selections + } + end + + poll_hit = create_hit :title => options[:poll_options][:title], + :description => options[:poll_options][:instructions], + :questions => questions, + :assignments => options[:poll_options][:assignments], + :lifetime_in_seconds => options[:poll_options][:lifetime_in_seconds], + :reward => options[:poll_options][:reward], + :payload => inbound_event && inbound_event.payload, + :metadata => { :type => :poll, + :original_hit => hit_id, + :answers => assignments.map(&:answers), + :event_id => inbound_event && inbound_event.id } + + log "Poll HIT created with ID #{poll_hit.id} and URL #{poll_hit.url}. Original HIT: #{hit_id}", :inbound_event => inbound_event + else + event = create_event :payload => payload + log "Event emitted with answer(s)", :outbound_event => event, :inbound_event => inbound_event end end - event = create_event :payload => payload - log "Event emitted with answer(s)", :outbound_event => event, :inbound_event => Event.find_by_id(memory[:hits][hit_id.to_sym]) - assignments.each(&:approve!) hit.dispose! @@ -232,22 +371,35 @@ module Agents end end - def create_hit(event = nil) - payload = event ? event.payload : {} - title = Utils.interpolate_jsonpaths(options[:hit][:title], payload).strip - description = Utils.interpolate_jsonpaths(options[:hit][:description], payload).strip - questions = Utils.recursively_interpolate_jsonpaths(options[:hit][:questions], payload) + def create_basic_hit(event = nil) + hit = create_hit :title => options[:hit][:title], + :description => options[:hit][:description], + :questions => options[:hit][:questions], + :assignments => options[:hit][:assignments], + :lifetime_in_seconds => options[:hit][:lifetime_in_seconds], + :reward => options[:hit][:reward], + :payload => event && event.payload, + :metadata => { :event_id => event && event.id } + + log "HIT created with ID #{hit.id} and URL #{hit.url}", :inbound_event => event + end + + def create_hit(opts = {}) + payload = opts[:payload] || {} + title = Utils.interpolate_jsonpaths(opts[:title], payload).strip + description = Utils.interpolate_jsonpaths(opts[:description], payload).strip + questions = Utils.recursively_interpolate_jsonpaths(opts[:questions], payload) hit = RTurk::Hit.create(:title => title) do |hit| - hit.max_assignments = (options[:hit][:assignments] || 1).to_i + hit.max_assignments = (opts[:assignments] || 1).to_i hit.description = description - hit.lifetime = (options[:hit][:lifetime_in_seconds] || 24 * 60 * 60).to_i + hit.lifetime = (opts[:lifetime_in_seconds] || 24 * 60 * 60).to_i hit.question_form AgentQuestionForm.new(:title => title, :description => description, :questions => questions) - hit.reward = (options[:hit][:reward] || 0.05).to_f + hit.reward = (opts[:reward] || 0.05).to_f #hit.qualifications.add :approval_rate, { :gt => 80 } end memory[:hits] ||= {} - memory[:hits][hit.id] = event && event.id - log "HIT created with ID #{hit.id} and URL #{hit.url}", :inbound_event => event + memory[:hits][hit.id] = opts[:metadata] || {} + hit end # RTurk Question Form diff --git a/spec/models/agents/human_task_agent_spec.rb b/spec/models/agents/human_task_agent_spec.rb index 38cb6932..75aa3c57 100644 --- a/spec/models/agents/human_task_agent_spec.rb +++ b/spec/models/agents/human_task_agent_spec.rb @@ -108,7 +108,43 @@ describe Agents::HumanTaskAgent do @checker.should_not be_valid end - it "requires that all questions be of type 'selection' when `take_majority` is `true`" do + it "requires that 'poll_options' be present and populated when 'combination_mode' is set to 'poll'" do + @checker.options[:combination_mode] = "poll" + @checker.should_not be_valid + @checker.options[:poll_options] = {} + @checker.should_not be_valid + @checker.options[:poll_options] = { :title => "Take a poll about jokes", + :instructions => "Rank these by how funny they are", + :assignments => 3, + :row_template => "<$.joke>" } + @checker.should be_valid + @checker.options[:poll_options] = { :instructions => "Rank these by how funny they are", + :assignments => 3, + :row_template => "<$.joke>" } + @checker.should_not be_valid + @checker.options[:poll_options] = { :title => "Take a poll about jokes", + :assignments => 3, + :row_template => "<$.joke>" } + @checker.should_not be_valid + @checker.options[:poll_options] = { :title => "Take a poll about jokes", + :instructions => "Rank these by how funny they are", + :row_template => "<$.joke>" } + @checker.should_not be_valid + @checker.options[:poll_options] = { :title => "Take a poll about jokes", + :instructions => "Rank these by how funny they are", + :assignments => 3} + @checker.should_not be_valid + end + + it "requires that all questions be of type 'selection' when 'combination_mode' is 'take_majority'" do + @checker.options[:combination_mode] = "take_majority" + @checker.should_not be_valid + @checker.options[:hit][:questions][1][:type] = "selection" + @checker.options[:hit][:questions][1][:selections] = @checker.options[:hit][:questions][0][:selections] + @checker.should be_valid + end + + it "accepts 'take_majority': 'true' for legacy support" do @checker.options[:take_majority] = "true" @checker.should_not be_valid @checker.options[:hit][:questions][1][:type] = "selection" @@ -126,7 +162,7 @@ describe Agents::HumanTaskAgent do it "should check for reviewable HITs frequently" do mock(@checker).review_hits.twice - mock(@checker).create_hit.once + mock(@checker).create_basic_hit.once @checker.check @checker.check end @@ -135,7 +171,7 @@ describe Agents::HumanTaskAgent do now = Time.now stub(Time).now { now } mock(@checker).review_hits.times(3) - mock(@checker).create_hit.twice + mock(@checker).create_basic_hit.twice @checker.check now += 1 * 60 * 60 @checker.check @@ -144,7 +180,7 @@ describe Agents::HumanTaskAgent do end it "should ignore events" do - mock(@checker).create_hit(anything).times(0) + mock(@checker).create_basic_hit(anything).times(0) @checker.receive([events(:bob_website_agent_event)]) end end @@ -155,7 +191,7 @@ describe Agents::HumanTaskAgent do now = Time.now stub(Time).now { now } mock(@checker).review_hits.times(3) - mock(@checker).create_hit.times(0) + mock(@checker).create_basic_hit.times(0) @checker.check now += 1 * 60 * 60 @checker.check @@ -164,7 +200,7 @@ describe Agents::HumanTaskAgent do end it "should create HITs based on events" do - mock(@checker).create_hit(events(:bob_website_agent_event)).times(1) + mock(@checker).create_basic_hit(events(:bob_website_agent_event)).times(1) @checker.receive([events(:bob_website_agent_event)]) end end @@ -181,7 +217,7 @@ describe Agents::HumanTaskAgent do mock(hitInterface).question_form(instance_of Agents::HumanTaskAgent::AgentQuestionForm) { |agent_question_form_instance| question_form = agent_question_form_instance } mock(RTurk::Hit).create(:title => "Hi Joe").yields(hitInterface) { hitInterface } - @checker.send :create_hit, @event + @checker.send :create_basic_hit, @event hitInterface.max_assignments.should == @checker.options[:hit][:assignments] hitInterface.reward.should == @checker.options[:hit][:reward] @@ -192,7 +228,7 @@ describe Agents::HumanTaskAgent do xml.should include("Make something for Joe") xml.should include("Joe Question 1") - @checker.memory[:hits][123].should == @event.id + @checker.memory[:hits][123][:event_id].should == @event.id end it "works without an event too" do @@ -201,7 +237,7 @@ describe Agents::HumanTaskAgent do hitInterface.id = 123 mock(hitInterface).question_form(instance_of Agents::HumanTaskAgent::AgentQuestionForm) mock(RTurk::Hit).create(:title => "Hi").yields(hitInterface) { hitInterface } - @checker.send :create_hit + @checker.send :create_basic_hit hitInterface.max_assignments.should == @checker.options[:hit][:assignments] hitInterface.reward.should == @checker.options[:hit][:reward] end @@ -259,8 +295,8 @@ describe Agents::HumanTaskAgent do # It knows about two HITs from two different events. @checker.memory[:hits] = {} - @checker.memory[:hits][:"JH3132836336DHG"] = @event.id - @checker.memory[:hits][:"JH39AA63836DHG"] = event2.id + @checker.memory[:hits][:"JH3132836336DHG"] = { :event_id => @event.id } + @checker.memory[:hits][:"JH39AA63836DHG"] = { :event_id => event2.id } hit_ids = %w[JH3132836336DHG JH39AA63836DHG JH39AA63836DH12345] mock(RTurk::GetReviewableHITs).create { mock!.hit_ids { hit_ids } } # It sees 3 HITs. @@ -273,7 +309,7 @@ describe Agents::HumanTaskAgent do end it "shouldn't do anything if an assignment isn't ready" do - @checker.memory[:hits] = { :"JH3132836336DHG" => @event.id } + @checker.memory[:hits] = { :"JH3132836336DHG" => { :event_id => @event.id } } mock(RTurk::GetReviewableHITs).create { mock!.hit_ids { %w[JH3132836336DHG JH39AA63836DHG JH39AA63836DH12345] } } assignments = [ FakeAssignment.new(:status => "Accepted", :answers => {}), @@ -288,11 +324,11 @@ describe Agents::HumanTaskAgent do @checker.send :review_hits assignments.all? {|a| a.approved == true }.should be_false - @checker.memory[:hits].should == { :"JH3132836336DHG" => @event.id } + @checker.memory[:hits].should == { :"JH3132836336DHG" => { :event_id => @event.id } } end it "shouldn't do anything if an assignment is missing" do - @checker.memory[:hits] = { :"JH3132836336DHG" => @event.id } + @checker.memory[:hits] = { :"JH3132836336DHG" => { :event_id => @event.id } } mock(RTurk::GetReviewableHITs).create { mock!.hit_ids { %w[JH3132836336DHG JH39AA63836DHG JH39AA63836DH12345] } } assignments = [ FakeAssignment.new(:status => "Submitted", :answers => {"sentiment"=>"happy", "feedback"=>"Take 2"}) @@ -306,11 +342,11 @@ describe Agents::HumanTaskAgent do @checker.send :review_hits assignments.all? {|a| a.approved == true }.should be_false - @checker.memory[:hits].should == { :"JH3132836336DHG" => @event.id } + @checker.memory[:hits].should == { :"JH3132836336DHG" => { :event_id => @event.id } } end it "should create events when all assignments are ready" do - @checker.memory[:hits] = { :"JH3132836336DHG" => @event.id } + @checker.memory[:hits] = { :"JH3132836336DHG" => { :event_id => @event.id } } mock(RTurk::GetReviewableHITs).create { mock!.hit_ids { %w[JH3132836336DHG JH39AA63836DHG JH39AA63836DH12345] } } assignments = [ FakeAssignment.new(:status => "Submitted", :answers => {"sentiment"=>"neutral", "feedback"=>""}), @@ -337,8 +373,8 @@ describe Agents::HumanTaskAgent do describe "taking majority votes" do before do - @checker.options[:take_majority] = "true" - @checker.memory[:hits] = { :"JH3132836336DHG" => @event.id } + @checker.options[:combination_mode] = "take_majority" + @checker.memory[:hits] = { :"JH3132836336DHG" => { :event_id => @event.id } } mock(RTurk::GetReviewableHITs).create { mock!.hit_ids { %w[JH3132836336DHG JH39AA63836DHG JH39AA63836DH12345] } } end @@ -386,6 +422,10 @@ describe Agents::HumanTaskAgent do end it "should also provide an average answer when all questions are numeric" do + # it should accept 'take_majority': 'true' as well for legacy support. Demonstrating that here. + @checker.options.delete :combination_mode + @checker.options[:take_majority] = "true" + @checker.options[:hit][:questions] = [ { :type => "selection", @@ -435,5 +475,118 @@ describe Agents::HumanTaskAgent do @checker.memory[:hits].should == {} end end + + describe "creating and reviewing polls" do + before do + @checker.options[:combination_mode] = "poll" + @checker.options[:poll_options] = { + :title => "Hi!", + :instructions => "hello!", + :assignments => 2, + :row_template => "This is <.sentiment>" + } + @event.save! + mock(RTurk::GetReviewableHITs).create { mock!.hit_ids { %w[JH3132836336DHG JH39AA63836DHG JH39AA63836DH12345] } } + end + + it "creates a poll using the row_template, message, and correct number of assignments" do + @checker.memory[:hits] = { :"JH3132836336DHG" => { :event_id => @event.id } } + + # Mock out the HIT's submitted assignments. + assignments = [ + FakeAssignment.new(:status => "Submitted", :answers => {"sentiment"=>"sad", "feedback"=>"This is my feedback 1"}), + FakeAssignment.new(:status => "Submitted", :answers => {"sentiment"=>"neutral", "feedback"=>"This is my feedback 2"}), + FakeAssignment.new(:status => "Submitted", :answers => {"sentiment"=>"happy", "feedback"=>"This is my feedback 3"}), + FakeAssignment.new(:status => "Submitted", :answers => {"sentiment"=>"happy", "feedback"=>"This is my feedback 4"}) + ] + hit = FakeHit.new(:max_assignments => 4, :assignments => assignments) + mock(RTurk::Hit).new("JH3132836336DHG") { hit } + + @checker.memory[:hits][:"JH3132836336DHG"].should be_present + + # Setup mocks for HIT creation + + question_form = nil + hitInterface = OpenStruct.new + hitInterface.id = "JH39AA63836DH12345" + mock(hitInterface).question_form(instance_of Agents::HumanTaskAgent::AgentQuestionForm) { |agent_question_form_instance| question_form = agent_question_form_instance } + mock(RTurk::Hit).create(:title => "Hi!").yields(hitInterface) { hitInterface } + + # And finally, the test. + + lambda { + @checker.send :review_hits + }.should change { Event.count }.by(0) # it does not emit an event until all poll results are in + + # it approves the existing assignments + + assignments.all? {|a| a.approved == true }.should be_true + hit.should be_disposed + + # it creates a new HIT for the poll + + hitInterface.max_assignments.should == @checker.options[:poll_options][:assignments] + hitInterface.description.should == @checker.options[:poll_options][:instructions] + + xml = question_form.to_xml + xml.should include("This is happy") + xml.should include("This is neutral") + xml.should include("This is sad") + + @checker.save + @checker.reload + @checker.memory[:hits][:"JH3132836336DHG"].should_not be_present + @checker.memory[:hits][:"JH39AA63836DH12345"].should be_present + @checker.memory[:hits][:"JH39AA63836DH12345"][:event_id].should == @event.id + @checker.memory[:hits][:"JH39AA63836DH12345"][:type].should == :poll + @checker.memory[:hits][:"JH39AA63836DH12345"][:original_hit].should == "JH3132836336DHG" + @checker.memory[:hits][:"JH39AA63836DH12345"][:answers].length.should == 4 + end + + it "emits an event when all poll results are in, containing the data from the best answer, plus all others" do + original_answers = [ + {:sentiment => "sad", :feedback => "This is my feedback 1"}, + {:sentiment => "neutral", :feedback => "This is my feedback 2"}, + {:sentiment => "happy", :feedback => "This is my feedback 3"}, + {:sentiment => "happy", :feedback => "This is my feedback 4"} + ] + + @checker.memory[:hits] = { + :JH39AA63836DH12345 => { + :type => :poll, + :original_hit => "JH3132836336DHG", + :answers => original_answers, + :event_id => 345 + } + } + + # Mock out the HIT's submitted assignments. + assignments = [ + FakeAssignment.new(:status => "Submitted", :answers => {"1" => "2", "2" => "5", "3" => "3", "4" => "2"}), + FakeAssignment.new(:status => "Submitted", :answers => {"1" => "3", "2" => "4", "3" => "1", "4" => "4"}) + ] + hit = FakeHit.new(:max_assignments => 2, :assignments => assignments) + mock(RTurk::Hit).new("JH39AA63836DH12345") { hit } + + @checker.memory[:hits][:"JH39AA63836DH12345"].should be_present + + lambda { + @checker.send :review_hits + }.should change { Event.count }.by(1) + + # It emits an event + + @checker.events.last.payload[:answers].should == original_answers + @checker.events.last.payload[:poll].should == [{:"1" => "2", :"2" => "5", :"3" => "3", :"4" => "2"}, {:"1" => "3", :"2" => "4", :"3" => "1", :"4" => "4"}] + @checker.events.last.payload[:best_answer].should == {:sentiment => "neutral", :feedback => "This is my feedback 2"} + + # it approves the existing assignments + + assignments.all? {|a| a.approved == true }.should be_true + hit.should be_disposed + + @checker.memory[:hits].should be_empty + end + end end end \ No newline at end of file From b32cef09c908522915ff5167e222465640f5b4a6 Mon Sep 17 00:00:00 2001 From: Andrew Cantino Date: Sat, 5 Oct 2013 11:33:33 -0700 Subject: [PATCH 2/7] doc --- app/models/agents/human_task_agent.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/models/agents/human_task_agent.rb b/app/models/agents/human_task_agent.rb index 0c861e1d..66197b39 100644 --- a/app/models/agents/human_task_agent.rb +++ b/app/models/agents/human_task_agent.rb @@ -105,6 +105,8 @@ module Agents } } + Resulting events will have the original `answers`, as well as the `poll` results, and a field called `best_answer` that contains the best answer as determined by the poll. + # Other settings `lifetime_in_seconds` is the number of seconds a HIT is left on Amazon before it's automatically closed. The default is 1 day. From 66054f92c07c1c94a45635ad582b06df643652e7 Mon Sep 17 00:00:00 2001 From: j-wilkins Date: Wed, 18 Dec 2013 21:03:34 -1000 Subject: [PATCH 3/7] a webhook agent to create events from webhooks. --- app/models/agents/webhook_agent.rb | 35 ++++++++++++++++++++++++ spec/models/agents/webhook_agent_spec.rb | 31 +++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 app/models/agents/webhook_agent.rb create mode 100644 spec/models/agents/webhook_agent_spec.rb diff --git a/app/models/agents/webhook_agent.rb b/app/models/agents/webhook_agent.rb new file mode 100644 index 00000000..02cbc2f3 --- /dev/null +++ b/app/models/agents/webhook_agent.rb @@ -0,0 +1,35 @@ +module Agents + class WebhookAgent < Agent + cannot_be_scheduled! + + description <<-MD + Use this Agent to create events by receiving webhooks from any source. + + Options: + + * `secret` - A token that the host will provide for authentication. + MD + + def default_options + { "secret" => "supersecretstring", } + end + + def receive_webhook(params) + return ["Not Authorized", 401] unless params[:secret] == options[:secret] + + create_event(:payload => params[:payload]) + + ['Event Created', 201] + end + + def working? + true + end + + def validate_options + unless options[:secret].present? + errors.add(:base, "Must specify a :secret for 'Authenticating' requests") + end + end + end +end diff --git a/spec/models/agents/webhook_agent_spec.rb b/spec/models/agents/webhook_agent_spec.rb new file mode 100644 index 00000000..f5173cd3 --- /dev/null +++ b/spec/models/agents/webhook_agent_spec.rb @@ -0,0 +1,31 @@ +require 'spec_helper' + +describe Agents::WebhookAgent do + let(:agent) do + _agent = Agents::WebhookAgent.new(:name => 'webhook', + :options => {:secret => :foobar}) + _agent.user = users(:bob) + _agent.save! + _agent + end + + after { agent.destroy } + + describe 'receive_webhook' do + it 'should create event if secret matches' do + out = nil + lambda { + out = agent.receive_webhook({:secret => :foobar, :payload => {:some => :info}}) + }.should change { Event.count }.by(1) + out.should eq(['Event Created', 201]) + end + + it 'should not create event if secrets dont match' do + out = nil + lambda { + out = agent.receive_webhook({:secret => :bazbat, :payload => {:some => :info}}) + }.should change { Event.count }.by(0) + out.should eq(['Not Authorized', 401]) + end + end +end From dff2bc68f4bf52958e3905873c8e9f0e88cd84f8 Mon Sep 17 00:00:00 2001 From: j-wilkins Date: Fri, 20 Dec 2013 22:41:54 -1000 Subject: [PATCH 4/7] configurable webhook payload , update description, improve tests --- app/models/agents/webhook_agent.rb | 45 +++++++++++++++++++----- spec/models/agents/webhook_agent_spec.rb | 9 +++-- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/app/models/agents/webhook_agent.rb b/app/models/agents/webhook_agent.rb index 02cbc2f3..ce103d13 100644 --- a/app/models/agents/webhook_agent.rb +++ b/app/models/agents/webhook_agent.rb @@ -2,28 +2,51 @@ module Agents class WebhookAgent < Agent cannot_be_scheduled! - description <<-MD - Use this Agent to create events by receiving webhooks from any source. + description do + <<-MD + Use this Agent to create events by receiving webhooks from any source. - Options: + In order to create events with this agent, make a POST request to: + ``` + https://#{ENV['DOMAIN']}/users/#{user.id}/webhooks/#{id || ''}/:secret + ``` where `:secret` is specified in your options. - * `secret` - A token that the host will provide for authentication. - MD + The + + Options: + + * `secret` - A token that the host will provide for authentication. + * `expected_receive_period_in_days` - How often you expect to receive + events this way. Used to determine if the agent is working. + * `payload_path` - JSONPath of the attribute of the POST body to be + used as the Event payload. + MD + end + + event_description do + <<-MD + The event payload is base on the value of the `payload_path` option, + which is set to `#{options[:payload_path]}`. + MD + end def default_options - { "secret" => "supersecretstring", } + { "secret" => "supersecretstring", + "expected_receive_period_in_days" => 1, + "payload_path" => "payload"} end def receive_webhook(params) - return ["Not Authorized", 401] unless params[:secret] == options[:secret] + secret = params.delete(:secret) + return ["Not Authorized", 401] unless secret == options[:secret] - create_event(:payload => params[:payload]) + create_event(:payload => payload_for(params)) ['Event Created', 201] end def working? - true + event_created_within(options[:expected_receive_period_in_days]) && !recent_error_logs? end def validate_options @@ -31,5 +54,9 @@ module Agents errors.add(:base, "Must specify a :secret for 'Authenticating' requests") end end + + def payload_for(params) + Utils.values_at(params, options[:payload_path]) || {} + end end end diff --git a/spec/models/agents/webhook_agent_spec.rb b/spec/models/agents/webhook_agent_spec.rb index f5173cd3..823b6029 100644 --- a/spec/models/agents/webhook_agent_spec.rb +++ b/spec/models/agents/webhook_agent_spec.rb @@ -3,11 +3,12 @@ require 'spec_helper' describe Agents::WebhookAgent do let(:agent) do _agent = Agents::WebhookAgent.new(:name => 'webhook', - :options => {:secret => :foobar}) + :options => {:secret => :foobar, :payload_path => '$'}) _agent.user = users(:bob) _agent.save! _agent end + let(:payload) { {:some => :info} } after { agent.destroy } @@ -15,17 +16,19 @@ describe Agents::WebhookAgent do it 'should create event if secret matches' do out = nil lambda { - out = agent.receive_webhook({:secret => :foobar, :payload => {:some => :info}}) + out = agent.receive_webhook({:secret => :foobar, :payload => payload}) }.should change { Event.count }.by(1) out.should eq(['Event Created', 201]) + Event.last.last.payload.should eq([{'payload' => payload}]) end it 'should not create event if secrets dont match' do out = nil lambda { - out = agent.receive_webhook({:secret => :bazbat, :payload => {:some => :info}}) + out = agent.receive_webhook({:secret => :bazbat, :payload => payload}) }.should change { Event.count }.by(0) out.should eq(['Not Authorized', 401]) + Event.last.last.payload.should eq([{'payload' => payload}]) end end end From d881792ed2bdfbaa62ebaec1b291af133afa59ef Mon Sep 17 00:00:00 2001 From: j-wilkins Date: Thu, 26 Dec 2013 10:58:57 -0600 Subject: [PATCH 5/7] fix spec typo --- spec/models/agents/webhook_agent_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/models/agents/webhook_agent_spec.rb b/spec/models/agents/webhook_agent_spec.rb index 823b6029..fb6e0ead 100644 --- a/spec/models/agents/webhook_agent_spec.rb +++ b/spec/models/agents/webhook_agent_spec.rb @@ -19,7 +19,7 @@ describe Agents::WebhookAgent do out = agent.receive_webhook({:secret => :foobar, :payload => payload}) }.should change { Event.count }.by(1) out.should eq(['Event Created', 201]) - Event.last.last.payload.should eq([{'payload' => payload}]) + Event.last.payload.should eq([{'payload' => payload}]) end it 'should not create event if secrets dont match' do @@ -28,7 +28,7 @@ describe Agents::WebhookAgent do out = agent.receive_webhook({:secret => :bazbat, :payload => payload}) }.should change { Event.count }.by(0) out.should eq(['Not Authorized', 401]) - Event.last.last.payload.should eq([{'payload' => payload}]) + Event.last.payload.should eq([{'payload' => payload}]) end end end From 8ea70518870a8ab3173758d5fdf2db04e81fc968 Mon Sep 17 00:00:00 2001 From: j-wilkins Date: Thu, 26 Dec 2013 11:09:25 -0600 Subject: [PATCH 6/7] fix test logic :grin: --- spec/models/agents/webhook_agent_spec.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spec/models/agents/webhook_agent_spec.rb b/spec/models/agents/webhook_agent_spec.rb index fb6e0ead..2e4c08b7 100644 --- a/spec/models/agents/webhook_agent_spec.rb +++ b/spec/models/agents/webhook_agent_spec.rb @@ -8,7 +8,7 @@ describe Agents::WebhookAgent do _agent.save! _agent end - let(:payload) { {:some => :info} } + let(:payload) { {'some' => 'info'} } after { agent.destroy } @@ -28,7 +28,6 @@ describe Agents::WebhookAgent do out = agent.receive_webhook({:secret => :bazbat, :payload => payload}) }.should change { Event.count }.by(0) out.should eq(['Not Authorized', 401]) - Event.last.payload.should eq([{'payload' => payload}]) end end end From 0b0d809ede34968bd5189dc78ccce9edbdd5603d Mon Sep 17 00:00:00 2001 From: Andrew Cantino Date: Thu, 26 Dec 2013 12:17:18 -0500 Subject: [PATCH 7/7] update dotenv-rails and attempt to make cap sync:db:down work with remote .env files --- Gemfile.lock | 6 +++--- lib/capistrano/sync.rb | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index fb574e34..78072bd3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -63,9 +63,9 @@ GEM railties (>= 3.2.6, < 5) warden (~> 1.2.3) diff-lcs (1.2.4) - dotenv (0.8.0) - dotenv-rails (0.8.0) - dotenv (= 0.8.0) + dotenv (0.9.0) + dotenv-rails (0.9.0) + dotenv (= 0.9.0) em-http-request (1.0.3) addressable (>= 2.2.3) cookiejar diff --git a/lib/capistrano/sync.rb b/lib/capistrano/sync.rb index bb19d859..8af33f93 100644 --- a/lib/capistrano/sync.rb +++ b/lib/capistrano/sync.rb @@ -1,5 +1,6 @@ require 'yaml' require 'pathname' +require 'dotenv' # Edited by Andrew Cantino. Based on: https://gist.github.com/339471 @@ -99,6 +100,28 @@ namespace :sync do return database["#{db}"]['username'], database["#{db}"]['password'], database["#{db}"]['database'], database["#{db}"]['host'] end + # Used by remote_database_config to parse the remote .env file. Depends on the dotenv-rails gem. + class RemoteEnvLoader < Dotenv::Environment + def initialize(data) + @data = data + load + end + + def with_loaded_env + begin + saved_env = ENV.to_hash.dup + ENV.update(self) + yield + ensure + ENV.replace(saved_env) + end + end + + def read + @data.split("\n") + end + end + # # Reads the database credentials from the remote config/database.yml file # +db+ the name of the environment to get the credentials for @@ -106,7 +129,13 @@ namespace :sync do # def remote_database_config(db) remote_config = capture("cat #{current_path}/config/database.yml") - database = YAML::load(remote_config) + remote_env = capture("cat #{current_path}/.env") + + database = nil + RemoteEnvLoader.new(remote_env).with_loaded_env do + database = YAML::load(ERB.new(remote_config).result) + end + return database["#{db}"]['username'], database["#{db}"]['password'], database["#{db}"]['database'], database["#{db}"]['host'] end