diff --git a/lib/httpclient.rb b/lib/httpclient.rb index 4f4b4297..0897e99b 100644 --- a/lib/httpclient.rb +++ b/lib/httpclient.rb @@ -651,8 +651,8 @@ def redirect_uri_callback=(redirect_uri_callback) # use get method. get returns HTTP::Message as a response and you need to # follow HTTP redirect by yourself if you need. def get_content(uri, *args, &block) - query, header = keyword_argument(args, :query, :header) - success_content(follow_redirect(:get, uri, query, nil, header || {}, &block)) + query, header, to = keyword_argument(args, :query, :header, :to) + success_content(follow_redirect(:get, uri, query, nil, header || {}, to, &block)) end # Posts a content. @@ -995,7 +995,7 @@ def hashy_argument_has_keys(args, *key) key.all? { |e| args[0].key?(e) } end - def do_request(method, uri, query, body, header, &block) + def do_request(method, uri, query, body, header, to = nil, &block) res = nil if HTTP::Message.file?(body) pos = body.pos rescue nil @@ -1016,7 +1016,7 @@ def do_request(method, uri, query, body, header, &block) # We want to delete Connection usage in do_get_block but Newrelic gem depends on it. # https://github.com/newrelic/rpm/blob/master/lib/new_relic/agent/instrumentation/httpclient.rb#L34-L36 conn = Connection.new - res = do_get_block(req, proxy, conn, &block) + res = do_get_block(req, proxy, conn, to, &block) # Webmock's do_get_block returns ConditionVariable if !res.respond_to?(:previous) res = conn.pop @@ -1085,7 +1085,7 @@ def adapt_block(&block) proc { |r, str| block.call(str) } end - def follow_redirect(method, uri, query, body, header, &block) + def follow_redirect(method, uri, query, body, header, to = nil, &block) uri = to_resource_url(uri) if block b = adapt_block(&block) @@ -1101,7 +1101,7 @@ def follow_redirect(method, uri, query, body, header, &block) request_query = query while retry_number < @follow_redirect_count body.pos = pos if pos - res = do_request(method, uri, request_query, body, header, &filtered_block) + res = do_request(method, uri, request_query, body, header, to, &filtered_block) res.previous = previous if res.redirect? if res.header['location'].empty? @@ -1226,7 +1226,7 @@ def no_proxy?(uri) # !! CAUTION !! # Method 'do_get*' runs under MT conditon. Be careful to change. - def do_get_block(req, proxy, conn, &block) + def do_get_block(req, proxy, conn, to = nil, &block) @request_filter.each do |filter| filter.filter_request(req) end @@ -1244,7 +1244,8 @@ def do_get_block(req, proxy, conn, &block) @debug_dev << "\n\n= Response\n\n" if @debug_dev do_get_header(req, res, sess) conn.push(res) - sess.get_body do |part| + + sess.get_body(to) do |part| set_encoding(part, res.body_encoding) if block block.call(res, part.dup) diff --git a/lib/httpclient/session.rb b/lib/httpclient/session.rb index 67e2c3ba..44959994 100644 --- a/lib/httpclient/session.rb +++ b/lib/httpclient/session.rb @@ -29,8 +29,6 @@ class HTTPClient - - # Represents a Site: protocol scheme, host String and port Number. class Site # Protocol scheme. @@ -574,29 +572,31 @@ def eof? end end - def get_body(&block) - begin - read_header if @state == :META - return nil if @state != :DATA - if @transparent_gzip_decompression - block = content_inflater_block(@content_encoding, block) - end - if @chunked - read_body_chunked(&block) - elsif @content_length - read_body_length(&block) - else - read_body_rest(&block) - end - rescue - close - raise - end - if eof? - if @next_connection - @state = :WAIT - else + def get_body(to = nil, &block) + cast_to_io(to, block) do |io| + begin + read_header if @state == :META + return nil if @state != :DATA + if @transparent_gzip_decompression + io = content_inflater(@content_encoding, io) + end + if @chunked + read_body_chunked(io) + elsif @content_length + read_body_length(io) + else + read_body_rest(io) + end + rescue close + raise + end + if eof? + if @next_connection + @state = :WAIT + else + close + end end end nil @@ -697,7 +697,7 @@ def first_inflate(body) end end - def content_inflater_block(content_encoding, block) + def content_inflater(content_encoding, io) case content_encoding when 'gzip', 'x-gzip' # zlib itself has a functionality to decompress gzip stream. @@ -706,15 +706,12 @@ def content_inflater_block(content_encoding, block) # > windowBits can also be greater than 15 for optional gzip decoding. Add 32 to # > windowBits to enable zlib and gzip decoding with automatic header detection, # > or add 16 to decode only the gzip format - inflate_stream = Zlib::Inflate.new(Zlib::MAX_WBITS + 32) + IOInflater.new(io, Zlib::Inflate.new(Zlib::MAX_WBITS + 32)) when 'deflate' - inflate_stream = LenientInflater.new + IOInflater.new(io, LenientInflater.new) else - return block + io end - Proc.new { |buf| - block.call(inflate_stream.inflate(buf)) - } end def set_header(req) @@ -872,35 +869,20 @@ def parse_content_header(key, value) end end - def read_body_length(&block) - return nil if @content_length == 0 - while true - buf = empty_bin_str - maxbytes = @read_block_size - maxbytes = @content_length if maxbytes > @content_length && @content_length > 0 - ::Timeout.timeout(@receive_timeout, ReceiveTimeoutError) do - begin - @socket.readpartial(maxbytes, buf) - rescue EOFError - close - buf = nil - if @strict_response_size_check - raise BadResponseError.new("EOF while reading rest #{@content_length} bytes") - end - end - end - if buf && buf.bytesize > 0 - @content_length -= buf.bytesize - yield buf - else - @content_length = 0 - end - return if @content_length == 0 + def read_body_length(io) + ::Timeout.timeout(@receive_timeout, ReceiveTimeoutError) do + @content_length -= IO.copy_stream(@socket, io, @content_length) + end + + if @strict_response_size_check && @content_length > 0 + raise BadResponseError.new("EOF while reading rest #{@content_length} bytes") + else + @content_length = 0 end end RS = "\r\n" - def read_body_chunked(&block) + def read_body_chunked(io) buf = empty_bin_str while true ::Timeout.timeout(@receive_timeout, ReceiveTimeoutError) do @@ -919,14 +901,14 @@ def read_body_chunked(&block) @socket.read(2) end unless buf.empty? - yield buf + io.write(buf) end end end - def read_body_rest + def read_body_rest(io) if @readbuf and @readbuf.bytesize > 0 - yield @readbuf + io.write(@readbuf) @readbuf = nil end while true @@ -942,19 +924,52 @@ def read_body_rest end end if buf && buf.bytesize > 0 - yield buf + io.write(buf) else return end end end + def cast_to_io(to, block) + if to.respond_to?(:write) + yield to + elsif to + File.open(to, 'w+') do |file| + yield file + end + else + yield IOBlockAdapter.new(block) + end + end + + class IOInflater + def initialize(io, inflater) + @io = io + @inflater = inflater + end + + def write(chunk) + @io.write(@inflater.inflate(chunk)) + chunk.bytesize + end + end + + class IOBlockAdapter + def initialize(block) + @block = block + end + + def write(chunk) + @block.call(chunk) + chunk.bytesize + end + end + def empty_bin_str str = '' str.force_encoding('BINARY') if str.respond_to?(:force_encoding) str end end - - end diff --git a/test/helper.rb b/test/helper.rb index 26bc4f9b..29ae50d4 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -15,7 +15,7 @@ require 'stringio' require 'cgi' require 'webrick/httputils' - +require 'tmpdir' module Helper Port = 17171 diff --git a/test/test_httpclient.rb b/test/test_httpclient.rb index c8e5330c..fdecc5c4 100644 --- a/test/test_httpclient.rb +++ b/test/test_httpclient.rb @@ -629,6 +629,41 @@ def test_get_content_with_block end end + def test_get_content_with_path + with_tmp_path do |path| + @client.get_content(serverurl + 'hello', to: path) + assert_equal('hello', File.read(path)) + end + end + + def test_get_content_with_io + with_tmp_path do |path| + File.open(path, 'w+') do |io| + @client.get_content(serverurl + 'hello', to: io) + end + assert_equal('hello', File.read(path)) + end + end + + def test_get_gzipped_content_with_io + @client.transparent_gzip_decompression = true + + with_tmp_path do |path| + @client.get_content(serverurl + 'compressed?enc=gzip', to: path) + assert_equal('hello', File.read(path)) + end + + with_tmp_path do |path| + @client.get_content(serverurl + 'compressed?enc=deflate', to: path) + assert_equal('hello', File.read(path)) + end + + with_tmp_path do |path| + @client.get_content(serverurl + 'compressed?enc=deflate_noheader', to: path) + assert_equal('hello', File.read(path)) + end + end + def test_post_content assert_equal('hello', @client.post_content(serverurl + 'hello')) assert_equal('hello', @client.post_content(serverurl + 'redirect1')) @@ -829,16 +864,6 @@ def test_get_with_block_arity_2_and_redirects assert_nil(res.content) end - def test_get_with_block_string_recycle - @client.read_block_size = 2 - body = [] - _res = @client.get(serverurl + 'servlet') { |str| - body << str - } - assert_equal(2, body.size) - assert_equal("get", body.join) # Was "tt" by String object recycle... - end - def test_get_with_block_chunked_string_recycle server = TCPServer.open('localhost', 0) server_thread = Thread.new { @@ -1922,6 +1947,14 @@ def test_tcp_keepalive private + def with_tmp_path + path = File.join(Dir.tmpdir, 'http-client-test') + File.delete(path) if File.exists?(path) + yield path + ensure + File.delete(path) if File.exists?(path) + end + def check_query_get(query) WEBrick::HTTPUtils.parse_query( @client.get(serverurl + 'servlet', query).header["x-query"][0]