From fe50730033bb061f0ee3ad45ae9e49cea6048dd1 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Tue, 19 Apr 2016 23:25:52 +0900 Subject: [PATCH 1/7] DataOutputAgent should limit events after ordering This should fix #1044. --- app/models/agents/data_output_agent.rb | 42 +++++++++++++++++++- spec/models/agents/data_output_agent_spec.rb | 6 +++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/app/models/agents/data_output_agent.rb b/app/models/agents/data_output_agent.rb index 29ac95d8..ca2d4aaf 100644 --- a/app/models/agents/data_output_agent.rb +++ b/app/models/agents/data_output_agent.rb @@ -175,6 +175,43 @@ module Agents interpolated['push_hubs'].presence || [] end + def sorted_events(reload = false) + events = + if (event_ids = memory[:event_ids]) && + memory[:events_order] == events_order && + memory[:events_to_show] >= events_to_show + received_events.where(id: event_ids).to_a + else + memory[:last_event_id] = nil + reload = true + [] + end + + if reload + new_events = + if last_event_id = memory[:last_event_id] + received_events.order(id: :desc).where(Event.arel_table[:id].gt(last_event_id)) + else + # dig at least twice as many events as the number of + # `events_to_show` + received_events.order(id: :desc).limit([source_ids.count, 2].max * events_to_show) + end.to_a + events = new_events.concat(events) + memory[:events_order] = events_order + memory[:events_to_show] = events_to_show + memory[:last_event_id] = events.first.try!(:id) + end + + events = sort_events(events).last(events_to_show) + + if reload + memory[:event_ids] = events.map(&:id) + save + end + + events + end + def receive_web_request(params, method, format) unless interpolated['secrets'].include?(params['secret']) if format =~ /json/ @@ -184,7 +221,7 @@ module Agents end end - source_events = sort_events(received_events.order(id: :desc).limit(events_to_show).to_a) + source_events = sorted_events() interpolation_context.stack do interpolation_context['events'] = source_events @@ -251,6 +288,9 @@ module Agents def receive(incoming_events) url = feed_url(secret: interpolated['secrets'].first, format: :xml) + # Reload new events and update cache + sorted_events(true) + push_hubs.each do |hub| push_to_hub(hub, url) end diff --git a/spec/models/agents/data_output_agent_spec.rb b/spec/models/agents/data_output_agent_spec.rb index 5b9e3f5c..19487f57 100644 --- a/spec/models/agents/data_output_agent_spec.rb +++ b/spec/models/agents/data_output_agent_spec.rb @@ -248,6 +248,12 @@ describe Agents::DataOutputAgent do end it 'can reorder the events_to_show last events based on a Liquid expression' do + # Check that ordering takes place before limiting, not after + agent.options['events_to_show'] = 2 + asc_content, _status, _content_type = agent.receive_web_request({ 'secret' => 'secret2' }, 'get', 'application/json') + expect(asc_content['items'].map {|i| i["title"] }).to eq(["Evolving again", "Evolving yet again with a past date"]) + + agent.options['events_to_show'] = 40 asc_content, _status, _content_type = agent.receive_web_request({ 'secret' => 'secret2' }, 'get', 'application/json') expect(asc_content['items'].map {|i| i["title"] }).to eq(["Evolving", "Evolving again", "Evolving yet again with a past date"]) From 9b96a6fe6381cff9657758830008be40ff0e15cf Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Mon, 6 Jun 2016 19:10:45 +0900 Subject: [PATCH 2/7] Improve the way DataOutputAgent digs past events In the first run, it now checks `2 * events_to_show` events for each source. This also fixes the problem where older events are selected when `events_order` is not specified by sorting events by the `id`. --- app/models/agents/data_output_agent.rb | 28 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/app/models/agents/data_output_agent.rb b/app/models/agents/data_output_agent.rb index ca2d4aaf..1edc9306 100644 --- a/app/models/agents/data_output_agent.rb +++ b/app/models/agents/data_output_agent.rb @@ -188,18 +188,26 @@ module Agents end if reload - new_events = - if last_event_id = memory[:last_event_id] - received_events.order(id: :desc).where(Event.arel_table[:id].gt(last_event_id)) - else - # dig at least twice as many events as the number of - # `events_to_show` - received_events.order(id: :desc).limit([source_ids.count, 2].max * events_to_show) - end.to_a - events = new_events.concat(events) memory[:events_order] = events_order memory[:events_to_show] = events_to_show - memory[:last_event_id] = events.first.try!(:id) + + new_events = + if last_event_id = memory[:last_event_id] + received_events.where(Event.arel_table[:id].gt(last_event_id)). + order(id: :asc).to_a + else + source_ids.flat_map { |source_id| + # dig twice as many events as the number of + # `events_to_show` + received_events.where(agent_id: source_id). + last(2 * events_to_show) + }.sort_by(&:id) + end + + unless new_events.empty? + memory[:last_event_id] = new_events.last.id + events.concat(new_events) + end end events = sort_events(events).last(events_to_show) From adab96bb9750f8e17c6e42132ce2660a7082556a Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Tue, 7 Jun 2016 14:11:19 +0900 Subject: [PATCH 3/7] Allow SortableEvents to use keys other than `events_order` --- app/concerns/sortable_events.rb | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/app/concerns/sortable_events.rb b/app/concerns/sortable_events.rb index 2637b426..1bbb62cf 100644 --- a/app/concerns/sortable_events.rb +++ b/app/concerns/sortable_events.rb @@ -5,6 +5,8 @@ module SortableEvents validate :validate_events_order end + EVENTS_ORDER_KEY = 'events_order'.freeze + def description_events_order(*args) self.class.description_events_order(*args) end @@ -23,9 +25,9 @@ module SortableEvents !can_order_created_events? end - def description_events_order(events = 'events created in each run') + def description_events_order(events = 'events created in each run', events_order_key = EVENTS_ORDER_KEY) <<-MD.lstrip - To specify the order of #{events}, set `events_order` to an array of sort keys, each of which looks like either `expression` or `[expression, type, descending]`, as described as follows: + To specify the order of #{events}, set `#{events_order_key}` to an array of sort keys, each of which looks like either `expression` or `[expression, type, descending]`, as described as follows: * _expression_ is a Liquid template to generate a string to be used as sort key. @@ -48,8 +50,8 @@ module SortableEvents self.class.cannot_order_created_events? end - def events_order - options['events_order'] + def events_order(key = EVENTS_ORDER_KEY) + options[key] end module AutomaticSorter @@ -102,8 +104,8 @@ module SortableEvents } EXPRESSION_TYPES = EXPRESSION_PARSER.keys.freeze - def validate_events_order - case order_by = events_order + def validate_events_order(events_order_key = EVENTS_ORDER_KEY) + case order_by = events_order(events_order_key) when nil when Array # Each tuple may be either [expression, type, desc] or just @@ -113,29 +115,29 @@ module SortableEvents when String # ok else - errors.add(:base, "first element of each events_order tuple must be a Liquid template") + errors.add(:base, "first element of each #{events_order_key} tuple must be a Liquid template") break end case type when nil, *EXPRESSION_TYPES # ok else - errors.add(:base, "second element of each events_order tuple must be #{EXPRESSION_TYPES.to_sentence(last_word_connector: ' or ')}") + errors.add(:base, "second element of each #{events_order_key} tuple must be #{EXPRESSION_TYPES.to_sentence(last_word_connector: ' or ')}") break end if !desc.nil? && boolify(desc).nil? - errors.add(:base, "third element of each events_order tuple must be a boolean value") + errors.add(:base, "third element of each #{events_order_key} tuple must be a boolean value") break end end else - errors.add(:base, "events_order must be an array of arrays") + errors.add(:base, "#{events_order_key} must be an array of arrays") end end # Sort given events in order specified by the "events_order" option - def sort_events(events) - order_by = events_order.presence or + def sort_events(events, events_order_key = EVENTS_ORDER_KEY) + order_by = events_order(events_order_key).presence or return events orders = order_by.map { |_, _, desc = false| boolify(desc) } From 978ec061999167aa519428fc38dd40a63c2ffa33 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Wed, 20 Apr 2016 14:54:17 +0900 Subject: [PATCH 4/7] Rename events_order to events_list_order and readd events_order `events_order` determines how to select events for outputting, whereas `events_list_order` determines how to list selected events in the output. --- app/models/agents/data_output_agent.rb | 24 ++++++++++++++---- ...hange_events_order_to_events_list_order.rb | 25 +++++++++++++++++++ spec/models/agents/data_output_agent_spec.rb | 14 +++++------ 3 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 db/migrate/20160607055850_change_events_order_to_events_list_order.rb diff --git a/app/models/agents/data_output_agent.rb b/app/models/agents/data_output_agent.rb index 1edc9306..ecf2ba07 100644 --- a/app/models/agents/data_output_agent.rb +++ b/app/models/agents/data_output_agent.rb @@ -45,9 +45,14 @@ module Agents "_contents": "tag contents (can be an object for nesting)" } - # Ordering events in the output + # Ordering events - #{description_events_order('events in the output')} + #{description_events_order('events')} + + DataOutputAgent will select the last `events_to_show` entries of its received events sorted in the order specified by `events_order`, which is defaulted to the event creation time. + So, if you have multiple source agents that may create many events in a run, you may want to either increase `events_to_show` to have a larger "window", or specify the `events_order` option to an appropriate value (like `date_published`) so events from various sources are properly mixed in the resulted feed. + + There is also an option `events_list_order` to control the order of events listed in the output, with the same format as `events_order`. It is defaulted to `#{Utils.jsonify(DEFAULT_EVENTS_ORDER['events_list_order'])}` so the latest entry is listed first. # Liquid Templating @@ -175,7 +180,16 @@ module Agents interpolated['push_hubs'].presence || [] end - def sorted_events(reload = false) + DEFAULT_EVENTS_ORDER = { + 'events_order' => nil, + 'events_list_order' => [["{{_index_}}", "number", true]], + } + + def events_order(key = SortableEvents::EVENTS_ORDER_KEY) + super || DEFAULT_EVENTS_ORDER[key] + end + + def latest_events(reload = false) events = if (event_ids = memory[:event_ids]) && memory[:events_order] == events_order && @@ -229,7 +243,7 @@ module Agents end end - source_events = sorted_events() + source_events = sort_events(latest_events(), 'events_list_order') interpolation_context.stack do interpolation_context['events'] = source_events @@ -297,7 +311,7 @@ module Agents url = feed_url(secret: interpolated['secrets'].first, format: :xml) # Reload new events and update cache - sorted_events(true) + latest_events(true) push_hubs.each do |hub| push_to_hub(hub, url) diff --git a/db/migrate/20160607055850_change_events_order_to_events_list_order.rb b/db/migrate/20160607055850_change_events_order_to_events_list_order.rb new file mode 100644 index 00000000..b1313ba9 --- /dev/null +++ b/db/migrate/20160607055850_change_events_order_to_events_list_order.rb @@ -0,0 +1,25 @@ +class ChangeEventsOrderToEventsListOrder < ActiveRecord::Migration + def up + Agents::DataOutputAgent.find_each do |agent| + if value = agent.options.delete('events_order') + agent.options['events_list_order'] = value + agent.save!(validate: false) + end + end + end + + def down + Agents::DataOutputAgent.transaction do + Agents::DataOutputAgent.find_each do |agent| + if agent.options['events_order'] + raise ActiveRecord::IrreversibleMigration, "Cannot revert migration because events_order is configured" + end + + if value = agent.options.delete('events_list_order') + agent.options['events_order'] = value + agent.save!(validate: false) + end + end + end + end +end diff --git a/spec/models/agents/data_output_agent_spec.rb b/spec/models/agents/data_output_agent_spec.rb index 19487f57..bfce75bf 100644 --- a/spec/models/agents/data_output_agent_spec.rb +++ b/spec/models/agents/data_output_agent_spec.rb @@ -142,7 +142,7 @@ describe Agents::DataOutputAgent do "url" => "http://imgs.xkcd.com/comics/evolving0.png", "title" => "Evolving yet again with a past date", "date" => '2014/05/05', - "hovertext" => "Something else" + "hovertext" => "A small text" } end @@ -166,7 +166,7 @@ describe Agents::DataOutputAgent do Evolving yet again with a past date - Secret hovertext: Something else + Secret hovertext: A small text http://imgs.xkcd.com/comics/evolving0.png #{Time.zone.parse(event3.payload['date']).rfc2822} #{event3.id} @@ -216,7 +216,7 @@ describe Agents::DataOutputAgent do 'items' => [ { 'title' => 'Evolving yet again with a past date', - 'description' => 'Secret hovertext: Something else', + 'description' => 'Secret hovertext: A small text', 'link' => 'http://imgs.xkcd.com/comics/evolving0.png', 'guid' => {"contents" => event3.id, "isPermaLink" => "false"}, 'pubDate' => Time.zone.parse(event3.payload['date']).rfc2822, @@ -244,20 +244,20 @@ describe Agents::DataOutputAgent do describe 'ordering' do before do - agent.options['events_order'] = ['{{title}}'] + agent.options['events_order'] = ['{{hovertext}}'] + agent.options['events_list_order'] = ['{{title}}'] end it 'can reorder the events_to_show last events based on a Liquid expression' do - # Check that ordering takes place before limiting, not after agent.options['events_to_show'] = 2 asc_content, _status, _content_type = agent.receive_web_request({ 'secret' => 'secret2' }, 'get', 'application/json') - expect(asc_content['items'].map {|i| i["title"] }).to eq(["Evolving again", "Evolving yet again with a past date"]) + expect(asc_content['items'].map {|i| i["title"] }).to eq(["Evolving", "Evolving again"]) agent.options['events_to_show'] = 40 asc_content, _status, _content_type = agent.receive_web_request({ 'secret' => 'secret2' }, 'get', 'application/json') expect(asc_content['items'].map {|i| i["title"] }).to eq(["Evolving", "Evolving again", "Evolving yet again with a past date"]) - agent.options['events_order'] = [['{{title}}', 'string', true]] + agent.options['events_list_order'] = [['{{title}}', 'string', true]] desc_content, _status, _content_type = agent.receive_web_request({ 'secret' => 'secret2' }, 'get', 'application/json') expect(desc_content['items']).to eq(asc_content['items'].reverse) From 63c35c0832e116bee7e90f5d9d72d97eb0cfaff9 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Tue, 7 Jun 2016 19:20:05 +0900 Subject: [PATCH 5/7] Update RssAgent#events_order --- app/models/agents/rss_agent.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/models/agents/rss_agent.rb b/app/models/agents/rss_agent.rb index 8a038277..7386cee2 100644 --- a/app/models/agents/rss_agent.rb +++ b/app/models/agents/rss_agent.rb @@ -82,8 +82,12 @@ module Agents validate_events_order end - def events_order - super.presence || DEFAULT_EVENTS_ORDER + def events_order(key = SortableEvents::EVENTS_ORDER_KEY) + if key == SortableEvents::EVENTS_ORDER_KEY + super.presence || DEFAULT_EVENTS_ORDER + else + raise ArgumentError, "unsupported key: #{key}" + end end def check From cc8e1bb9ccaff50a4b3d5cb933abe386f6799bf2 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Mon, 25 Jul 2016 17:46:00 +0900 Subject: [PATCH 6/7] Remove an unnecessary call to save() --- app/models/agents/data_output_agent.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/app/models/agents/data_output_agent.rb b/app/models/agents/data_output_agent.rb index ecf2ba07..6ba7ca60 100644 --- a/app/models/agents/data_output_agent.rb +++ b/app/models/agents/data_output_agent.rb @@ -228,7 +228,6 @@ module Agents if reload memory[:event_ids] = events.map(&:id) - save end events From 26a11afb2410d1394cb5eb6ba788ea70ae2e69a0 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Tue, 26 Jul 2016 21:11:29 +0900 Subject: [PATCH 7/7] Rephrase the description of `events_list_order` as suggested by @cantino --- app/models/agents/data_output_agent.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/agents/data_output_agent.rb b/app/models/agents/data_output_agent.rb index 6ba7ca60..e7027b7e 100644 --- a/app/models/agents/data_output_agent.rb +++ b/app/models/agents/data_output_agent.rb @@ -52,7 +52,7 @@ module Agents DataOutputAgent will select the last `events_to_show` entries of its received events sorted in the order specified by `events_order`, which is defaulted to the event creation time. So, if you have multiple source agents that may create many events in a run, you may want to either increase `events_to_show` to have a larger "window", or specify the `events_order` option to an appropriate value (like `date_published`) so events from various sources are properly mixed in the resulted feed. - There is also an option `events_list_order` to control the order of events listed in the output, with the same format as `events_order`. It is defaulted to `#{Utils.jsonify(DEFAULT_EVENTS_ORDER['events_list_order'])}` so the latest entry is listed first. + There is also an option `events_list_order` that only controls the order of events listed in the final output, without attempting to maintain a total order of received events. It has the same format as `events_order` and is defaulted to `#{Utils.jsonify(DEFAULT_EVENTS_ORDER['events_list_order'])}` so the selected events are listed in reverse order like most popular RSS feeds list their articles. # Liquid Templating