From 1b977204681f49d042cddf55d95e97497f0963cd Mon Sep 17 00:00:00 2001 From: Peter Kieltyka Date: Thu, 10 Nov 2011 16:04:05 -0500 Subject: [PATCH] Add new http cache support, and some fixes --- Gemfile | 2 +- Gemfile.lock | 18 ++--- examples/basic.rb | 4 +- examples/basic_fibered.rb | 4 +- examples/concurrent.rb | 81 +++++++++++++++++++ lib/uber-s3/object.rb | 6 +- lib/uber-s3/operation/object/cache_control.rb | 20 ----- lib/uber-s3/operation/object/expires.rb | 20 ----- lib/uber-s3/operation/object/http_cache.rb | 37 +++++++++ lib/uber-s3/response.rb | 4 +- lib/uber-s3/version.rb | 2 +- research/fiber-http.rb | 54 +++++++++++++ research/fiber-test.rb | 20 +++++ research/iterate/Gemfile | 4 +- research/iterate/Gemfile.lock | 22 ++--- research/iterate/iterate.rb | 49 +++++++---- research/iterate/oiterate.rb | 72 +++++++++++++++++ .../{settings.yml => settings.yml.sample} | 0 uber-s3.gemspec | 2 +- 19 files changed, 335 insertions(+), 86 deletions(-) create mode 100644 examples/concurrent.rb delete mode 100644 lib/uber-s3/operation/object/cache_control.rb delete mode 100644 lib/uber-s3/operation/object/expires.rb create mode 100644 lib/uber-s3/operation/object/http_cache.rb create mode 100644 research/fiber-http.rb create mode 100644 research/fiber-test.rb create mode 100644 research/iterate/oiterate.rb rename spec/config/{settings.yml => settings.yml.sample} (100%) diff --git a/Gemfile b/Gemfile index 19e8037..6caacc4 100644 --- a/Gemfile +++ b/Gemfile @@ -6,7 +6,7 @@ group :development, :test do gem 'ruby-debug', :platforms => :mri_18 gem 'ruby-debug19', :platforms => :mri_19 - gem 'eventmachine', '1.0.0.beta.3' + gem 'eventmachine', '1.0.0.beta.4' gem 'em-http-request', :git => 'git://github.com/igrigorik/em-http-request.git' gem 'em-synchrony', :git => 'git://github.com/igrigorik/em-synchrony.git' diff --git a/Gemfile.lock b/Gemfile.lock index 41c53af..da922e7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: git://github.com/igrigorik/em-http-request.git - revision: a1623d48a4edb57c58e76369123c7b9bd517a29a + revision: 1a4123d36a298e8043482ad7b20cb18dfbc2616b specs: em-http-request (1.0.0) addressable (>= 2.2.3) @@ -11,7 +11,7 @@ GIT GIT remote: git://github.com/igrigorik/em-synchrony.git - revision: 39954edecdca6078cb18cea890558339122bd2e1 + revision: f07b2a139ba05a1eaf3559541f3584c73afe05b2 specs: em-synchrony (1.0.0) eventmachine (>= 1.0.0.beta.1) @@ -19,8 +19,8 @@ GIT PATH remote: . specs: - uber-s3 (0.1.3) - mime-types (~> 1.16) + uber-s3 (0.1.5) + mime-types (~> 1.17) GEM remote: http://rubygems.org/ @@ -32,16 +32,16 @@ GEM diff-lcs (1.1.3) em-socksify (0.1.0) eventmachine - eventmachine (1.0.0.beta.3) - eventmachine (1.0.0.beta.3-java) + eventmachine (1.0.0.beta.4) + eventmachine (1.0.0.beta.4-java) http_parser.rb (0.5.3) http_parser.rb (0.5.3-java) linecache (0.46) rbx-require-relative (> 0.0.4) linecache19 (0.5.12) ruby_core_source (>= 0.1.4) - mime-types (1.16) - rake (0.9.2) + mime-types (1.17.2) + rake (0.9.2.2) rbx-require-relative (0.0.5) rspec (2.7.0) rspec-core (~> 2.7.0) @@ -74,7 +74,7 @@ PLATFORMS DEPENDENCIES em-http-request! em-synchrony! - eventmachine (= 1.0.0.beta.3) + eventmachine (= 1.0.0.beta.4) rake rspec (~> 2.7.0) ruby-debug diff --git a/examples/basic.rb b/examples/basic.rb index 907958c..8318ded 100644 --- a/examples/basic.rb +++ b/examples/basic.rb @@ -7,8 +7,8 @@ require 'uber-s3' s3 = UberS3.new({ - :access_key => 'AKIAIIG7K3KMY5BPW7TA', - :secret_access_key => 'BnWNYdKSyEkkrhlkFgPvQ+bVW8J3XEDj3+6ocWCm', + :access_key => 'x', + :secret_access_key => 'y', :bucket => 'nutestbucket', :adapter => :net_http }) diff --git a/examples/basic_fibered.rb b/examples/basic_fibered.rb index 60f93fc..32a2b9d 100644 --- a/examples/basic_fibered.rb +++ b/examples/basic_fibered.rb @@ -11,8 +11,8 @@ require 'uber-s3' s3 = UberS3.new({ - :access_key => 'AKIAIIG7K3KMY5BPW7TA', - :secret_access_key => 'BnWNYdKSyEkkrhlkFgPvQ+bVW8J3XEDj3+6ocWCm', + :access_key => 'x', + :secret_access_key => 'y', :bucket => 'nutestbucket', :adapter => :em_http_fibered }) diff --git a/examples/concurrent.rb b/examples/concurrent.rb new file mode 100644 index 0000000..629bac8 --- /dev/null +++ b/examples/concurrent.rb @@ -0,0 +1,81 @@ +#!/usr/bin/env ruby +$:<< '../lib' << 'lib' +############################################################################### +# +############################################################################### + +# require 'uber-s3' +# +# s3 = UberS3.new({ +# :access_key => 'x', +# :secret_access_key => 'y', +# :bucket => 'nutestbucket', +# :adapter => :em_http_fibered, +# :concurrency => 100 +# }) + +require 'eventmachine' +require 'em-http' +require 'em-synchrony' +require 'em-synchrony/em-http' + +require 'ruby-debug' + +CONCURRENCY = 10 + +EM.run do + pool = EM::Synchrony::ConnectionPool.new(:size => CONCURRENCY) do + EM::HttpRequest.new('http://www.google.ca/') + end + + p = { :keepalive => true } + num = 40 + counter = num + + num.times do + Fiber.new { + x = pool.get(p) + puts "GOT IT #{x.response.length}" + counter -= 1 + + EM.stop if counter <= 0 + }.resume + end + +end + + + +__END__ +############################################################################### +# This example demonstrates the combined power of eventmachine+fibers. +# Notice how the example code is exactly the same as basic.rb +# except instead it's using a non-blocking http client.. boom. +# +# Btw, make sure to bundle up first... +############################################################################### + +require 'uber-s3' + +s3 = UberS3.new({ + :access_key => 'x', + :secret_access_key => 'y', + :bucket => 'nutestbucket', + :adapter => :em_http_fibered +}) + +x = Proc.new do + # Traverse all objects in the bucket -- beware :) + # This will only load the keys and basic info of the object, not the data + s3.objects('/').each do |obj| + puts obj + # puts obj.value # this will actually fetch the data on demand + end +end + +EM.run do + Fiber.new { + x.call + EM.stop + }.resume +end diff --git a/lib/uber-s3/object.rb b/lib/uber-s3/object.rb index c78e2a2..7c06321 100644 --- a/lib/uber-s3/object.rb +++ b/lib/uber-s3/object.rb @@ -1,12 +1,11 @@ class UberS3 class Object include Operation::Object::AccessPolicy - include Operation::Object::CacheControl include Operation::Object::ContentDisposition include Operation::Object::ContentEncoding include Operation::Object::ContentMd5 include Operation::Object::ContentType - include Operation::Object::Expires + include Operation::Object::HttpCache include Operation::Object::Meta include Operation::Object::StorageClass @@ -44,12 +43,13 @@ def save gzip_content! # Standard pass through values - headers['Cache-Control'] = cache_control headers['Content-Disposition'] = content_disposition headers['Content-Encoding'] = content_encoding headers['Content-Length'] = size.to_s headers['Content-Type'] = content_type + headers['Cache-Control'] = cache_control headers['Expires'] = expires + headers['Pragma'] = pragma headers.each {|k,v| headers.delete(k) if v.nil? || v.empty? } diff --git a/lib/uber-s3/operation/object/cache_control.rb b/lib/uber-s3/operation/object/cache_control.rb deleted file mode 100644 index 59f000d..0000000 --- a/lib/uber-s3/operation/object/cache_control.rb +++ /dev/null @@ -1,20 +0,0 @@ -module UberS3::Operation::Object - module CacheControl - - def self.included(base) - base.send :extend, ClassMethods - base.send :include, InstanceMethods - - base.instance_eval do - attr_accessor :cache_control - end - end - - module ClassMethods - end - - module InstanceMethods - end - - end -end \ No newline at end of file diff --git a/lib/uber-s3/operation/object/expires.rb b/lib/uber-s3/operation/object/expires.rb deleted file mode 100644 index 05a5606..0000000 --- a/lib/uber-s3/operation/object/expires.rb +++ /dev/null @@ -1,20 +0,0 @@ -module UberS3::Operation::Object - module Expires - - def self.included(base) - base.send :extend, ClassMethods - base.send :include, InstanceMethods - - base.instance_eval do - attr_accessor :expires - end - end - - module ClassMethods - end - - module InstanceMethods - end - - end -end \ No newline at end of file diff --git a/lib/uber-s3/operation/object/http_cache.rb b/lib/uber-s3/operation/object/http_cache.rb new file mode 100644 index 0000000..eb0775b --- /dev/null +++ b/lib/uber-s3/operation/object/http_cache.rb @@ -0,0 +1,37 @@ +module UberS3::Operation::Object + module HttpCache + + def self.included(base) + base.send :extend, ClassMethods + base.send :include, InstanceMethods + + base.instance_eval do + attr_accessor :cache_control, :expires, :pragma, :ttl + end + end + + module ClassMethods + end + + module InstanceMethods + + # Helper method that will set the max-age for cache-control + def ttl=(seconds) + @ttl = seconds + self.cache_control = "public,max-age=#{seconds}" + end + + # Expires can take a time or string + def expires=(val) + if val.is_a?(String) + self.expires = val + elsif val.is_a?(Time) + # RFC 1123 format + self.expires = val.strftime("%a, %d %b %Y %H:%I:%S %Z") + end + end + + end + + end +end diff --git a/lib/uber-s3/response.rb b/lib/uber-s3/response.rb index 35eb135..7f424ea 100644 --- a/lib/uber-s3/response.rb +++ b/lib/uber-s3/response.rb @@ -13,14 +13,14 @@ def initialize(options={}) self.body = options[:body] self.raw = options[:raw] - check_for_errors! + success? end # TODO: can/should we normalize the keys..? downcase.. etc.? # def header=(header) # end - def check_for_errors! + def success? return if status < 400 || body.to_s.empty? # Errors are XML diff --git a/lib/uber-s3/version.rb b/lib/uber-s3/version.rb index 6d1ad26..f3f2f26 100644 --- a/lib/uber-s3/version.rb +++ b/lib/uber-s3/version.rb @@ -1,3 +1,3 @@ class UberS3 - VERSION = '0.1.4' + VERSION = '0.1.5' end diff --git a/research/fiber-http.rb b/research/fiber-http.rb new file mode 100644 index 0000000..8a3623c --- /dev/null +++ b/research/fiber-http.rb @@ -0,0 +1,54 @@ +require 'bundler' +Bundler.setup + +require 'ruby-debug' +require 'eventmachine' +require 'em-http-request' +require 'fiber' + + +def http_get(url) + f = Fiber.current + + req = EM::HttpRequest.new(url).get + req.callback { f.resume(req) } + req.errback { f.resume(req) } + + Fiber.yield +end + +EM.run do + urls = [ + ['http://nulayer.com/', 'http://twitter.com/#!/nupeter/'], + ['http://google.ca/', 'http://twitter.com/#!/jeffbrenner/'], + ['http://facebook.com/', 'http://twitter.com/#!/nulayer/'], + ['http://data.crowdreel.com.s3.amazonaws.com/2011/05/17-02/1f55685cc1e9c91d80079a49485c718b1f53d97f.jpg', 'http://twitter.com/#!/jack/'], + ['http://data.crowdreel.com.s3.amazonaws.com/2011/05/17-02/54acc4f94b81268982aab94e0d273693962741ab.jpg', 'http://twitter.com/#!/ev/'], + ['http://data.crowdreel.com.s3.amazonaws.com/2011/05/17-02/66ca6cb906f1b517cc576477b3d69e155dc21284.jpg', 'http://twitter.com/#!/presslyapp/'], + ['http://data.crowdreel.com.s3.amazonaws.com/2011/05/17-02/cfeb066a4f0ba75a2081898a6c750d371aafd7e9.jpg', 'http://twitter.com/#!/crpwdreel/'] + ] + + puts "init" + + Fiber.new { + # url = 'http://nulayer.com/' + # ret = http_get(url) + # puts ret.response_header.status.to_s+" from #{url}" + + urls.each do |url| + Fiber.new { + puts "A start: #{url[0]}" + ret = http_get(url[0]) + puts "A done: #{url[0]} -- #{ret.response_header.status.to_s}" + + Fiber.new { + puts "B start: #{url[1]}" + ret = http_get(url[0]) + puts "B done: #{url[1]} -- #{ret.response_header.status.to_s}" + }.resume + }.resume + end + }.resume + + puts "eof" +end diff --git a/research/fiber-test.rb b/research/fiber-test.rb new file mode 100644 index 0000000..63206a0 --- /dev/null +++ b/research/fiber-test.rb @@ -0,0 +1,20 @@ +require 'fiber' + +def woot + x = nil + + f = Fiber.new { |k| + puts k + y = Fiber.yield 1 + puts y + x = "hello" + 2 + } + + puts f.resume(5) + puts f.resume(6) + puts x + +end + +woot diff --git a/research/iterate/Gemfile b/research/iterate/Gemfile index 2b7d923..f406478 100644 --- a/research/iterate/Gemfile +++ b/research/iterate/Gemfile @@ -1,6 +1,8 @@ source :rubygems -gem 'eventmachine', '1.0.0.beta.3' +gem 'mime-types' + +gem 'eventmachine', '1.0.0.beta.4' gem 'em-http-request', :git => 'git://github.com/igrigorik/em-http-request.git' gem 'em-synchrony', :git => 'git://github.com/igrigorik/em-synchrony.git' diff --git a/research/iterate/Gemfile.lock b/research/iterate/Gemfile.lock index ffbcd88..272ac49 100644 --- a/research/iterate/Gemfile.lock +++ b/research/iterate/Gemfile.lock @@ -1,18 +1,19 @@ GIT remote: git://github.com/igrigorik/em-http-request.git - revision: c7848ff95a603c0d97b465ed1f2058178b3f78c0 + revision: a1623d48a4edb57c58e76369123c7b9bd517a29a specs: - em-http-request (1.0.0.beta.4) + em-http-request (1.0.0) addressable (>= 2.2.3) + cookiejar em-socksify eventmachine (>= 1.0.0.beta.3) - http_parser.rb (>= 0.5.1) + http_parser.rb (>= 0.5.3) GIT remote: git://github.com/igrigorik/em-synchrony.git - revision: 5c2aa0ec22509102a1e2fda659c1d2cea5cb6d5e + revision: 39954edecdca6078cb18cea890558339122bd2e1 specs: - em-synchrony (0.3.0.beta.1) + em-synchrony (1.0.0) eventmachine (>= 1.0.0.beta.1) GEM @@ -20,13 +21,15 @@ GEM specs: addressable (2.2.6) archive-tar-minitar (0.5.2) - columnize (0.3.3) + columnize (0.3.4) + cookiejar (0.3.0) em-socksify (0.1.0) eventmachine - eventmachine (1.0.0.beta.3) - http_parser.rb (0.5.1) + eventmachine (1.0.0.beta.4) + http_parser.rb (0.5.3) linecache19 (0.5.12) ruby_core_source (>= 0.1.4) + mime-types (1.16) ruby-debug-base19 (0.11.25) columnize (>= 0.3.1) linecache19 (>= 0.5.11) @@ -44,5 +47,6 @@ PLATFORMS DEPENDENCIES em-http-request! em-synchrony! - eventmachine (= 1.0.0.beta.3) + eventmachine (= 1.0.0.beta.4) + mime-types ruby-debug19 diff --git a/research/iterate/iterate.rb b/research/iterate/iterate.rb index 25366c5..9ba8588 100644 --- a/research/iterate/iterate.rb +++ b/research/iterate/iterate.rb @@ -10,9 +10,10 @@ require 'em-synchrony' require 'em-synchrony/em-http' -require 'fiber_pool' +# require 'fiber_pool' require 'uber-s3' +CONCURRENCY_LEVEL = 100 # ************* Main issue: # At a pool size of 50 or above .. this thing will crash within 15 seconds @@ -20,14 +21,15 @@ # no idea why .. perhaps my connection is tapped, and the queue increases # or file descriptor limit.. etc. eventually the EM deferred_status will # be :failed -@fiber_pool = FiberPool.new(100) +# @fiber_pool = FiberPool.new(CONCURRENCY_LEVEL) + EM.run do s3 = UberS3.new({ :access_key => 'x', :secret_access_key => 'y', - :bucket => 'bucket-goes-here', + :bucket => 'data.crowdreel.com', :adapter => :em_http_fibered }) @@ -50,23 +52,40 @@ end # We duplicate the list to make sure we have enough objects to iterate + counter = 0 (list*10000).each do |obj| - @fiber_pool.spawn do - x = obj.bucket.connection.head(obj.key) - - if x[:status] == 0 - puts "ERROR: we got 0 status.. weird.. here's the raw response" - # For more details, throw a debugger in here and look at x[:raw] closer - puts x[:raw].inspect - - exit - end + + if counter < CONCURRENCY_LEVEL + + Fiber.new { + # @fiber_pool.spawn do + x = obj.bucket.connection.head(obj.key) - puts obj.to_s + " -- #{x[:status]}" - @num += 1 if x + if x.status == 0 + puts "ERROR: we got 0 status.. weird.. here's the raw response" + # For more details, throw a debugger in here and look at x.raw closer + puts x.raw.inspect + + exit + end + + debugger + a = 1 + + puts obj.to_s + " -- #{x.status}" + @num += 1 if x + # end + }.resume end + + counter += 1 end }.resume end + +class FibeConcurrency + +end + diff --git a/research/iterate/oiterate.rb b/research/iterate/oiterate.rb new file mode 100644 index 0000000..0666b35 --- /dev/null +++ b/research/iterate/oiterate.rb @@ -0,0 +1,72 @@ +$:.unshift "." +$:.unshift "../../lib" + +require 'bundler' +Bundler.setup + +require 'ruby-debug' +require 'eventmachine' +require 'em-http' +require 'em-synchrony' +require 'em-synchrony/em-http' + +require 'fiber_pool' +require 'uber-s3' + + +# ************* Main issue: +# At a pool size of 50 or above .. this thing will crash within 15 seconds +# .. but at pool of 10 .. it will keep going and going.. +# no idea why .. perhaps my connection is tapped, and the queue increases +# or file descriptor limit.. etc. eventually the EM deferred_status will +# be :failed +@fiber_pool = FiberPool.new(50) + +EM.run do + + s3 = UberS3.new({ + :access_key => 'x', + :secret_access_key => 'y', + :bucket => 'data.crowdreel.com', + :adapter => :em_http_fibered + }) + + @num = 0 + @start = Time.now + + trap(:INT) do + finish = Time.now + puts "I'm outta here -- #{@num} -- #{finish - @start}" + exit + end + + Fiber.new { + + # Grab up to 50 objects + list = [] + s3.objects('/').each do |obj| + list << obj + break if list.length > 50 + end + + # We duplicate the list to make sure we have enough objects to iterate + (list*10000).each do |obj| + @fiber_pool.spawn do + x = obj.bucket.connection.head(obj.key) + + if x.status == 0 + puts "ERROR: we got 0 status.. weird.. here's the raw response" + # For more details, throw a debugger in here and look at x.raw closer + puts x.raw.inspect + + exit + end + + puts obj.to_s + " -- #{x.status}" + @num += 1 if x + end + end + + }.resume + +end diff --git a/spec/config/settings.yml b/spec/config/settings.yml.sample similarity index 100% rename from spec/config/settings.yml rename to spec/config/settings.yml.sample diff --git a/uber-s3.gemspec b/uber-s3.gemspec index 8c69ad0..0622c51 100644 --- a/uber-s3.gemspec +++ b/uber-s3.gemspec @@ -18,7 +18,7 @@ Gem::Specification.new do |s| s.files = Dir['README.md', 'lib/**/*'] s.require_path = 'lib' - s.add_dependency('mime-types', ['~> 1.16']) + s.add_dependency('mime-types', ['~> 1.17']) s.add_development_dependency('rake') s.add_development_dependency('rspec', ['~> 2.7.0'])