-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfarewellquotes-crawler.rb
executable file
·124 lines (102 loc) · 3.21 KB
/
farewellquotes-crawler.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env ruby
require "rubygems"
require "net/http"
require "uri"
require 'nokogiri'
require 'open-uri'
require 'optparse'
CONTRIBUTORS = %w(
Brandon Brenda Kelly Steve Andrea Dylan David Scott
Donna Cindy Jim Nat Jesse Clare Valerie Ray
Carly Noah Janet Matt Gina Michael Billy Jane
Alison Jake Amanda Kev Allan Ajoy Chris
)
class MessageParser
attr_accessor :url, :max_characters
def initialize(url, max_characters)
@url = url
@max_characters = max_characters
end
def parse
puts "Retrieving messages from..."
messages = []
doc = Nokogiri::HTML(open(url))
doc.css('.entry-content p, .entry-content h3').each do |elt|
text = elt.content.strip;
messages << text unless text.length > @max_characters || text.length < 10 || text.start_with?("~")
end
puts "#{messages.length} messages found."
messages
end
def to_s
"URL: #@url, max characters: #@max_characters"
end
end
class SosMessageClient
attr_accessor :sosmessage_url, :category_id, :post_url
def initialize(sosmessage_url, category_id)
@sosmessage_url = sosmessage_url
@category_id = category_id
@post_url = "#{@sosmessage_url}/api/v2/categories/#{@category_id}/message"
end
def postMessages(messages)
puts "Posting #{messages.length} messages to #{@post_url} ..."
uri = URI.parse(@post_url)
messages_posted = 0
messages.each do |message|
response = Net::HTTP.post_form(uri, {"text" => message, "contributorName" => CONTRIBUTORS.sample})
messages_posted += 1 if response.code.to_i == 200
end
puts "#{messages_posted.to_s} messages successfully posted."
end
def to_s
"SosMessag API URL: #@post_url"
end
end
options = {}
optparse = OptionParser.new do|opts|
opts.banner = 'Usage: farewellquotes-crawler.rb [options]'
options[:categoryid] = nil
opts.on('-c', '--category-id CATEGORY_ID', 'The category id where to post the quotes') do |category|
options[:categoryid] = category
end
options[:sosmessageurl] = 'http://localhost:3000'
opts.on('-s', '--sosmessage-url URL', 'The SosMessage API url') do |s|
options[:sosmessageurl] = s
end
options[:messagesurl] = nil
opts.on('-u', '--messages-url URL', 'The farewell-quotes category url') do |u|
options[:messagesurl] = u
end
options[:maxcharacters] = 5000
opts.on( '-m', '--max-characters MAX', Integer, 'MAX characters of the joke') do |max_characters|
options[:maxcharacters] = max_characters
end
options[:dryrun] = false
opts.on( '-n', '--dry-run', "Don't actually post the messages, only display them") do |dry_run|
options[:dryrun] = dry_run
end
opts.on( '-h', '--help', 'Display this screen' ) do
puts opts
exit
end
end
optparse.parse!
if options[:messagesurl]
if options[:dryrun]
messages = MessageParser.new(options[:messagesurl], options[:maxcharacters]).parse
messages.each do |message|
puts message
puts ""
puts "=========="
puts ""
end
elsif options[:categoryid]
client = SosMessageClient.new(options[:sosmessageurl], options[:categoryid])
messages = MessageParser.new(options[:messagesurl], options[:maxcharacters]).parse
client.postMessages(messages)
end
else
puts optparse
exit
end