diff --git a/.gitignore b/.gitignore index e3200e0..9db21c5 100644 --- a/.gitignore +++ b/.gitignore @@ -45,7 +45,9 @@ build-iPhoneSimulator/ # for a library or gem, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: -# Gemfile.lock +Gemfile.lock +# Note: We include both in source control as we set up CI based on the specified version here. +# We _should_ modify the CI process so gem builds can go against an arbitrary Ruby version in the future. # .ruby-version # .ruby-gemset @@ -54,3 +56,9 @@ build-iPhoneSimulator/ # Used by RuboCop. Remote config files pulled in from inherit_from directive. # .rubocop-https?--* + +# Ignore for macOS systems +.DS_Store + +# Jetbrains IDEs +.idea/ diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..b169316 --- /dev/null +++ b/.rspec @@ -0,0 +1,4 @@ +--format documentation +--color +--order random +--require spec_helper.rb diff --git a/.ruby-gemset b/.ruby-gemset new file mode 100644 index 0000000..5fe6b5c --- /dev/null +++ b/.ruby-gemset @@ -0,0 +1 @@ +yesware diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 0000000..c0013a8 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +ruby-2.7.3 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..a132e03 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,9 @@ +### 1.0.0 / 2021-04-22 [Initial Release] + +* Handle parsing Snowflake values for the following types: + * Numeric data types + * String data types + * Booleans + * Dates +* Support insertion of multiple rows using the `VALUES` syntax. +* Support creating tables with `String` columns with maximum varchar size (16777216). diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..8ff5a84 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +# Specify your gem's dependencies in sequel-snowflake.gemspec +gemspec diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..d10c633 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1 @@ +Copyright (c) 2021 Yesware, Inc. All rights reserved. diff --git a/README.md b/README.md index 501bf08..f974f39 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,55 @@ # sequel-snowflake -Sequel adapter for Snowflake + +An adapter to connect to Snowflake databases using [Sequel](http://sequel.jeremyevans.net/). +This provides proper types for returned values, as opposed to the ODBC adapter. + +## Installation + +Add this line to your application's Gemfile: + + gem 'sequel-snowflake' + +And then execute: + + $ bundle install + +Or install it yourself as: + + $ gem install sequel-snowflake + +## Usage + +When establishing the connection, specify `:snowflake` as the adapter to use. + +```ruby +DB = Sequel.connect(adapter: :snowflake, + drvconnect: conn_str) +``` + +## Testing + +In order to run specs, you'll need a Snowflake account. A connection string should be +provided as an environment variable `SNOWFLAKE_CONN_STR`. For example, on macOS, +our connection string would resemble: + +```bash +DRIVER=/opt/snowflake/snowflakeodbc/lib/universal/libSnowflake.dylib; +SERVER=..snowflakecomputing.com; +DATABASE=; +WAREHOUSE=; +SCHEMA=; +UID=; +PWD=; +CLIENT_SESSION_KEEP_ALIVE=true; +``` + +The test will create a temporary table on the specified database to run tests on, and this will +be taken down either via the `after(:each)` blocks or when the connection is closed. + +## Contributing + +1. Fork it ( https://github.com/Yesware/sequel-snowflake/fork ) +2. Create your feature branch (`git checkout -b my-new-feature`) +3. Commit your changes (`git commit -am 'Add some feature'`) +4. Push to the branch (`git push origin my-new-feature`) +5. Create a new Pull Request diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..44c6012 --- /dev/null +++ b/Rakefile @@ -0,0 +1,13 @@ +#!/usr/bin/env rake +require 'bundler/gem_tasks' + +require 'rspec/core/rake_task' + +RSpec::Core::RakeTask.new + +desc 'Run specs' +task :test => :spec +task :default => :spec + +desc 'All-in-one target for CI servers to run.' +task :ci => ['spec'] diff --git a/lib/sequel-snowflake.rb b/lib/sequel-snowflake.rb new file mode 100644 index 0000000..ce696a1 --- /dev/null +++ b/lib/sequel-snowflake.rb @@ -0,0 +1,7 @@ +require 'sequel-snowflake/version' +require 'sequel/adapters/snowflake' + +# Register our Snowflake adapter to Sequel's map. +# This allows us to specify the adapter on database connection: +# DB = Sequel.connect(adapter: :snowflake, ...) +Sequel::ADAPTER_MAP[:snowflake] = Sequel::Snowflake::Database diff --git a/lib/sequel-snowflake/version.rb b/lib/sequel-snowflake/version.rb new file mode 100644 index 0000000..e88c459 --- /dev/null +++ b/lib/sequel-snowflake/version.rb @@ -0,0 +1,6 @@ +module Sequel + module Snowflake + # sequel-snowflake version + VERSION = "1.0.0" + end +end diff --git a/lib/sequel/adapters/snowflake.rb b/lib/sequel/adapters/snowflake.rb new file mode 100644 index 0000000..5a98f23 --- /dev/null +++ b/lib/sequel/adapters/snowflake.rb @@ -0,0 +1,81 @@ +require 'sequel' +require 'sequel/adapters/odbc' + +# A lightweight adapter providing Snowflake support for the `sequel` gem. +# The only difference between this and the Sequel-provided ODBC adapter is +# how we interpret the response data, which is handled by the Dataset class. +module Sequel + module Snowflake + class Database < Sequel::ODBC::Database + # Default varchar size is the maximum (https://docs.snowflake.com/en/sql-reference/data-types-text.html#varchar) + def default_string_column_size + 16777216 + end + + def dataset_class_default + Sequel::Snowflake::Dataset + end + private :dataset_class_default + end + + # A custom Sequel Dataset class crafted specifically to handle Snowflake results. + class Dataset < Sequel::ODBC::Dataset + def fetch_rows(sql) + execute(sql) do |s| + i = -1 + cols = s.columns(true).map{|c| [output_identifier(c.name), c.type, c.scale, i+=1]} + columns = cols.map{|c| c[0]} + self.columns = columns + s.each do |row| + hash = {} + cols.each{|n,type,scale,j| hash[n] = convert_snowflake_value(row[j], type, scale)} + yield hash + end + end + self + end + + # This is similar to the ODBC adapter's Dataset#convert_odbc_value, except for some special casing + # around Snowflake numerics, which come in through ODBC as Strings instead of Numbers. + # In those cases, we need to examine the column type as well as the scale, + # to properly convert Integers and Doubles. + # Partially inspired by https://github.com/instacart/odbc_adapter. + # + # @param value The actual value to be converted + # @param column_type The type assigned to that value's column + # @param scale [Number] The number of digits to the right of the decimal point, if this is a SQL_DECIMAL value. + def convert_snowflake_value(value, column_type, scale) + return nil if value.nil? # Null values need no conversion. + + case value + when ::ODBC::TimeStamp + db.to_application_timestamp( + [value.year, value.month, value.day, value.hour, value.minute, value.second, value.fraction] + ) + when ::ODBC::Time + Sequel::SQLTime.create(value.hour, value.minute, value.second) + when ::ODBC::Date + Date.new(value.year, value.month, value.day) + else + if column_type == ::ODBC::SQL_BIT + value == 1 + elsif column_type == ::ODBC::SQL_DECIMAL && scale.zero? + value.to_i + elsif column_type == ::ODBC::SQL_DECIMAL && !scale.zero? + value.to_f + else + # Ensure strings are in UTF-8: https://stackoverflow.com/q/65946886 + value.is_a?(String) ? value.force_encoding('UTF-8') : value + end + end + end + private :convert_snowflake_value + + # Snowflake can insert multiple rows using VALUES (https://stackoverflow.com/q/64578007) + def multi_insert_sql_strategy + :values + end + private :multi_insert_sql_strategy + end + end +end diff --git a/sequel-snowflake.gemspec b/sequel-snowflake.gemspec new file mode 100644 index 0000000..773c3c5 --- /dev/null +++ b/sequel-snowflake.gemspec @@ -0,0 +1,27 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'sequel-snowflake/version' + +Gem::Specification.new do |spec| + spec.name = "sequel-snowflake" + spec.version = Sequel::Snowflake::VERSION + spec.authors = ["Yesware, Inc"] + spec.email = ["engineering@yesware.com"] + spec.summary = %q{Sequel adapter for Snowflake} + spec.description = spec.summary + spec.homepage = "https://github.com/Yesware/sequel-snowflake" + + spec.files = `git ls-files -z`.split("\x0") + spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } + spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) + spec.require_paths = ["lib"] + + + spec.add_runtime_dependency 'sequel' + spec.add_runtime_dependency 'ruby-odbc' + + spec.add_development_dependency 'rake' + spec.add_development_dependency 'rspec' + spec.add_development_dependency 'simplecov' +end diff --git a/spec/sequel/adapters/snowflake_spec.rb b/spec/sequel/adapters/snowflake_spec.rb new file mode 100644 index 0000000..32c327a --- /dev/null +++ b/spec/sequel/adapters/snowflake_spec.rb @@ -0,0 +1,71 @@ +require 'securerandom' + +describe Sequel::Snowflake::Dataset do + describe 'Converting Snowflake data types' do + # Create a test table with a reasonably-random suffix + let!(:test_table) { "SEQUEL_SNOWFLAKE_SPECS_#{SecureRandom.hex(10)}".to_sym } + let!(:db) { Sequel.connect(adapter: :snowflake, drvconnect: ENV['SNOWFLAKE_CONN_STR']) } + + before(:each) do + # Set timezone for parsing timestamps. This gives us a consistent timezone to test against below. + Sequel.default_timezone = :utc + + db.create_table(test_table, :temp => true) do + Numeric :n + BigDecimal :d, size: [38, 5] + Float :f + DateTime :t + TrueClass :b + String :str + String :str2 + end + end + + after(:each) do + db.drop_table(test_table) + end + + it 'converts Snowflake data types into equivalent Ruby types' do + db[test_table].insert( + { n: 17, d: 42.035, f: 1.2247, t: '2020-03-12 01:02:03.123456789', b: true, str: 'hi', str2: nil } + ) + + res = db[test_table].select( + :n, :d, :f, :t, :b, + Sequel.as(Sequel.function(:to_time, :t), :time), + Sequel.as(Sequel.function(:to_date, :t), :date), + :str, :str2 + ).first + + expect(res).to include( + n: 17, + d: a_value_within(0.0001).of(42.035), + f: a_value_within(0.00001).of(1.2247), + b: true, + str: 'hi', + str2: nil + ) + + expect(res[:t]).to be_a(Time) + expect(res[:t].iso8601).to eq('2020-03-12T01:02:03Z') + + expect(res[:time]).to be_a(Time) + expect(res[:time].to_s).to eq('01:02:03') + + expect(res[:date]).to be_a(Date) + expect(res[:date].to_s).to eq('2020-03-12') + end + + it 'inserts multiple records successfully using the VALUE syntax' do + db[test_table].multi_insert( + [ + { n: 17, d: 42.035, f: 1.2247, t: '2020-03-12 01:02:03.123456789', b: true, str: 'hi', str2: nil }, + { n: 18, d: 837.5, f: 3.09, t: '2020-03-15 11:22:33.12345', b: false, str: 'beware the ides', str2: 'of march' } + ] + ) + + expect(db[test_table].count).to eq(2) + expect(db[test_table].select(:n).all).to eq([{ n: 17 }, { n: 18 }]) + end + end +end diff --git a/spec/snowflake_spec.rb b/spec/snowflake_spec.rb new file mode 100644 index 0000000..e166fbf --- /dev/null +++ b/spec/snowflake_spec.rb @@ -0,0 +1,8 @@ +require 'spec_helper' +require 'sequel-snowflake' + +describe Sequel::Snowflake do + it "should have a VERSION constant" do + expect(subject.const_get('VERSION')).to_not be_empty + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..a8c71a9 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,10 @@ +$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) + +require 'simplecov' + +SimpleCov.start do + add_filter 'spec' +end +SimpleCov.minimum_coverage(100) + +require 'sequel-snowflake'