mirror of
https://github.com/Fishwaldo/huginn.git
synced 2025-03-15 19:31:26 +00:00
Rss agent dynamic cleanup (#1733)
* Remove fixed limit of 500 IDs used to figure out which entries are new. Instead each ID that is checked against the seen_ids gets moved to the top of the list. IDs that are no longer used in the RSS Feed will end up at the bottom of the seen_ids list end will be removed. * remove uncommented code line * fix undefined method `count' for nil:NilClass, in case the first fetch of an rss feed failed or fetched empty rss feed. changed spec to verify the deletion of old ids by keeping all current ones, not by fixed limit * revert changes for dynamic cleanup. Instead made the limit of stored ids configurable. (key: max_ids) Default limit will remain 500. * fixed error message. replaced repeated function call with single call specifying amount. using options['max_ids'] instead of interpolated['max_ids'] to retrieve setting. * Rename to remembered_id_count and fix spec * Update rss_agent.rb fixed indentation
This commit is contained in:
parent
81324ebad4
commit
e3f79bf84b
2 changed files with 38 additions and 2 deletions
|
@ -31,6 +31,7 @@ module Agents
|
|||
* `force_encoding` - Set `force_encoding` to an encoding name if the website is known to respond with a missing, invalid or wrong charset in the Content-Type header. Note that a text content without a charset is taken as encoded in UTF-8 (not ISO-8859-1).
|
||||
* `user_agent` - A custom User-Agent name (default: "Faraday v#{Faraday::VERSION}").
|
||||
* `max_events_per_run` - Limit number of events created (items parsed) per run for feed.
|
||||
* `remembered_id_count` - Number of IDs to keep track of and avoid re-emitting (default: 500).
|
||||
|
||||
# Ordering Events
|
||||
|
||||
|
@ -133,6 +134,10 @@ module Agents
|
|||
errors.add(:base, "Please provide 'expected_update_period_in_days' to indicate how many days can pass without an update before this Agent is considered to not be working")
|
||||
end
|
||||
|
||||
if options['remembered_id_count'].present? && options['remembered_id_count'].to_i < 1
|
||||
errors.add(:base, "Please provide 'remembered_id_count' as a number bigger than 0 indicating how many IDs should be saved to distinguish between new and old IDs in RSS feeds. Delete option to use default (500).")
|
||||
end
|
||||
|
||||
validate_web_request_options!
|
||||
validate_events_order
|
||||
end
|
||||
|
@ -177,13 +182,17 @@ module Agents
|
|||
log "Fetched #{urls.to_sentence} and created #{events.size} event(s)."
|
||||
end
|
||||
|
||||
def remembered_id_count
|
||||
(options['remembered_id_count'].presence || 500).to_i
|
||||
end
|
||||
|
||||
def check_and_track(entry_id)
|
||||
memory['seen_ids'] ||= []
|
||||
if memory['seen_ids'].include?(entry_id)
|
||||
false
|
||||
else
|
||||
memory['seen_ids'].unshift entry_id
|
||||
memory['seen_ids'].pop if memory['seen_ids'].length > 500
|
||||
memory['seen_ids'].pop(memory['seen_ids'].length - remembered_id_count) if memory['seen_ids'].length > remembered_id_count
|
||||
true
|
||||
end
|
||||
end
|
||||
|
|
|
@ -176,11 +176,38 @@ describe Agents::RssAgent do
|
|||
expect(agent.memory['seen_ids'][0]).to eq(newest_id)
|
||||
end
|
||||
|
||||
it "should truncate the seen_ids in memory at 500 items" do
|
||||
it "should truncate the seen_ids in memory at 500 items per default" do
|
||||
agent.memory['seen_ids'] = ['x'] * 490
|
||||
agent.check
|
||||
expect(agent.memory['seen_ids'].length).to eq(500)
|
||||
end
|
||||
|
||||
it "should truncate the seen_ids in memory at amount of items configured in options" do
|
||||
agent.options['remembered_id_count'] = "600"
|
||||
agent.memory['seen_ids'] = ['x'] * 590
|
||||
agent.check
|
||||
expect(agent.memory['seen_ids'].length).to eq(600)
|
||||
end
|
||||
|
||||
it "should truncate the seen_ids after configuring a lower limit of items when check is executed" do
|
||||
agent.memory['seen_ids'] = ['x'] * 600
|
||||
agent.options['remembered_id_count'] = "400"
|
||||
expect(agent.memory['seen_ids'].length).to eq(600)
|
||||
agent.check
|
||||
expect(agent.memory['seen_ids'].length).to eq(400)
|
||||
end
|
||||
|
||||
it "should truncate the seen_ids at default after removing custom limit" do
|
||||
agent.options['remembered_id_count'] = "600"
|
||||
agent.memory['seen_ids'] = ['x'] * 590
|
||||
agent.check
|
||||
expect(agent.memory['seen_ids'].length).to eq(600)
|
||||
|
||||
agent.options.delete('remembered_id_count')
|
||||
agent.memory['seen_ids'] = ['x'] * 590
|
||||
agent.check
|
||||
expect(agent.memory['seen_ids'].length).to eq(500)
|
||||
end
|
||||
|
||||
it "should support an array of URLs" do
|
||||
agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom", "http://feeds.feedburner.com/SlickdealsnetFP?format=atom"]
|
||||
|
|
Loading…
Add table
Reference in a new issue