Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use device detector to filter bots #277

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
language: ruby
before_install:
- gem install bundler
- gem install bundler -v 1.17.3
before_script:
- cd tests/test_app
- bundle exec rails g impressionist -f
Expand Down
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ platforms :jruby do
end

platforms :ruby, :mswin, :mingw do
gem 'sqlite3'
gem 'sqlite3', '~> 1.3.13'
end

group :test do
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ reporting yet.. this thingy just creates the data.

What about bots?
----------------
They are ignored. 1200 known bots have been added to the ignore list as of
February 1, 2011. Impressionist uses this list:
http://www.user-agents.org/allagents.xml
They are ignored by using [Device Detector's](https://rubygems.org/gems/device_detector/versions/0.2.0) user agent database.

https://github.com/podigee/device_detector/blob/develop/regexes/bots.yml

You can also get a list of bots identified, by running `rake impressionist:bots`

Installation
------------
Expand Down
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ task :default => [:test, :test_app]
namespace :impressionist do
require File.dirname(__FILE__) + "/lib/impressionist/bots"

desc "output the list of bots from http://www.user-agents.org/"
desc "output the list of bots from https://github.com/podigee/device_detector"
task :bots do
p Impressionist::Bots.consume
end
Expand Down
1,462 changes: 3 additions & 1,459 deletions app/models/impressionist/bots.rb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion gemfiles/rails32.gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ platforms :jruby do
end

platforms :ruby, :mswin, :mingw do
gem 'sqlite3'
gem 'sqlite3', '~> 1.3.13'
end

group :test do
Expand Down
2 changes: 1 addition & 1 deletion gemfiles/rails40.gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ platforms :jruby do
end

platforms :ruby, :mswin, :mingw do
gem 'sqlite3'
gem 'sqlite3', '~> 1.3.13'
end

group :test do
Expand Down
2 changes: 1 addition & 1 deletion impressionist.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ Gem::Specification.new do |s|
s.require_path = 'lib'
s.required_rubygems_version = Gem::Requirement.new('>= 1.3.6') if s.respond_to? :required_rubygems_version=

s.add_dependency 'nokogiri', RUBY_VERSION < '2.1.0' ? '~> 1.6.0' : '~> 1'
s.add_dependency 'device_detector', '~> 1.0'
s.add_development_dependency 'bundler', '~> 1.0'
end
23 changes: 14 additions & 9 deletions lib/impressionist/bots.rb
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
require 'timeout'
require 'net/http'
require 'nokogiri'
require 'yaml'

module Impressionist
module Bots
LIST_URL = "http://www.user-agents.org/allagents.xml"
LIST_URL = "https://raw.githubusercontent.com/podigee/device_detector/develop/regexes/bots.yml"
def self.consume
Timeout.timeout(4) do
response = Net::HTTP.get(URI.parse(LIST_URL))
doc = Nokogiri::XML(response)
list = []
doc.xpath('//user-agent').each do |agent|
type = agent.xpath("Type").text
list << agent.xpath("String").text.gsub("&lt;","<") if ["R","S"].include?(type) #gsub hack for badly formatted data
end
list
list = YAML.safe_load(response)
puts list.map { |entry| entry_details(entry) }
end
end

def self.entry_details(entry)
name = entry['name']
regex = entry['regex']
producer = entry['producer']

result = "\nAgent: #{name}\nRegex: #{regex}\n"
result += "Producer: #{producer['name']} - #{producer['url']}" if producer
result
end
end
end
2 changes: 1 addition & 1 deletion tests/test_app/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ end

platforms :ruby, :mswin, :mingw do
gem 'pg'
gem 'sqlite3'
gem 'sqlite3', '~> 1.3.13'
gem 'mysql2'
gem 'mongoid', '~> 3.1'
gem 'bson_ext'
Expand Down
7 changes: 4 additions & 3 deletions tests/test_app/spec/models/bots_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

describe "self.bot?" do
it "is true if user_agent is matches wild card" do
Impressionist::Bots.bot?("google.com bot").should be_true
Impressionist::Bots.bot?("bot").should be_true
Impressionist::Bots.bot?("crawler").should be_true
end

it "is true if user_agent is on bot list" do
Impressionist::Bots.bot?("A-Online Search").should be_true
Impressionist::Bots.bot?("acoonbot").should be_true
end

it "is false if user_agent is blank" do
Expand All @@ -24,4 +25,4 @@
Impressionist::Bots.bot?.should be_false
end
end
end
end