seanharr11 · musashiXXX · Oct 30, 2018 · Oct 30, 2018 · Oct 30, 2018 · Oct 31, 2018
diff --git a/etlalchemy/ETLAlchemySource.py b/etlalchemy/ETLAlchemySource.py
@@ -21,7 +21,7 @@
 from sqlalchemy.inspection import inspect
 from sqlalchemy.exc import NoSuchTableError
 from sqlalchemy.types import Text, Numeric, BigInteger, Integer, DateTime, Date, TIMESTAMP, String, BINARY, LargeBinary
-from sqlalchemy.dialects.postgresql import BYTEA
+from sqlalchemy.dialects.postgresql import BYTEA, UUID
 import inspect as ins
 import re
 import csv
@@ -50,7 +50,8 @@ def __init__(self,
                  skip_table_if_empty=False,
                  skip_column_if_empty=False,
                  compress_varchar=False,
-                 log_file=None):
+                 log_file=None,
+                 per_table_buffers={}):
         # TODO: Store unique columns in here, and ADD the unique constraints
         # after data has been migrated, rather than before
         self.unique_columns = []
@@ -59,6 +60,9 @@ def __init__(self,
         self.logger = logging.getLogger("ETLAlchemySource")
         self.logger.propagate = False
 
+        #Allow specifying of buffer size on a per-table basis when fetching rows from the source
+        self.per_table_buffers = per_table_buffers
+
         for h in list(self.logger.handlers):
             # Clean up any old loggers...(useful during testing w/ multiple
             # log_files)
@@ -205,6 +209,16 @@ def standardize_column_type(self, column, raw_rows):
             # Get the VARCHAR size of the column...
             ########################################
             varchar_length = column.type.length
+            # If varchar_length exceeds the maximum size for our target
+            # database, then convert VARCHAR -> TEXT  
+            if self.dst_engine.dialect.name.lower() == "postgresql":
+                if varchar_length == 'max' or varchar_length > 10485760:
+                    varchar_length = 0
+            elif self.dst_engine.dialect.name.lower() == "mssql":
+                if varchar_length == 'max' or varchar_length > 65532:
+                    # Note: This isn't always the case for mssql!
+                    # If using utf8, the limit is 21844. 
+                    varchar_length = 0
             ##################################
             # Strip collation here ...
             ##################################
@@ -401,6 +415,11 @@ def standardize_column_type(self, column, raw_rows):
                 "coercing to Boolean'")
             column_copy.type.__class__ = sqlalchemy.types.Boolean
         elif "TYPEENGINE" in base_classes:
+            if self.dst_engine.dialect.name.lower() == "postgresql"\
+                and column.type.__class__.__name__ == "UNIQUEIDENTIFIER":
+                column_copy.type = UUID()
+                self.logger.warning("Found column of type 'UNIQUEIDENTIFIER' -> " +
+                    "coercing to 'UUID'")
             for r in raw_rows:
                 if r[idx] is not None:
                     null = False
@@ -1005,6 +1024,9 @@ def migrate(
                 self.logger.info("Loading all rows into memory...")
                 rows = []
 
+                if T_src.name in self.per_table_buffers:
+                    buffer_size = self.per_table_buffers.get(T_src.name)
+
                 for i in range(1, (cnt / buffer_size) + 1):
                     self.logger.info(
                         "Fetched {0} rows".format(str(i * buffer_size)))
@@ -1206,9 +1228,9 @@ def add_indexes(self, destination_database_url):
                 .get(table_name)
             column_transformer = self.schema_transformer.column_transformations\
                 .get(table_name)
-            if table_transform and table_transform.newTable not in ["", None]:
+            if table_transform and table_transform.new_table not in ["", None]:
                 # Update the table_name
-                table_name = table_transform.newTable
+                table_name = table_transform.new_table
             this_idx_count = 0
             self.logger.info("Creating indexes for '" + table_name + "'...")
             for i in indexes:
@@ -1383,9 +1405,9 @@ def add_fks(self, destination_database_url):
             ####################################
             table_transform = self.schema_transformer.table_transformations.get(
                 table_name)
-            if table_transform and table_transform.newTable not in ["", None]:
+            if table_transform and table_transform.new_table not in ["", None]:
                 # Update the table_name
-                table_name = table_transform.newTable
+                table_name = table_transform.new_table
             self.logger.info(
                 "Adding FKs to table '{0}' (previously {1})".format(
                     table_name, pre_transformed_table_name))
@@ -1456,10 +1478,10 @@ def add_fks(self, destination_database_url):
                 ref_column_transformer = \
                     self.schema_transformer.column_transformations.get(
                                   ref_table)
-                if table_transform and table_transform.newTable not in [
+                if table_transform and table_transform.new_table not in [
                         "", None]:
                     # Update the table_name
-                    ref_table = table_transform.newTable
+                    ref_table = table_transform.new_table
                 T_ref = Table(ref_table, dst_meta)
                 ############################
                 # Check that referenced table

diff --git a/etlalchemy/literal_value_generator.py b/etlalchemy/literal_value_generator.py
@@ -1,6 +1,7 @@
 import shutil
 import decimal
 import datetime
+
 # Find the best implementation available on this platform
 try:
     from cStringIO import StringIO

diff --git a/etlalchemy/schema_transformer.py b/etlalchemy/schema_transformer.py
@@ -81,7 +81,7 @@ def __init__(self, column_transform_file,
 
     # Returns False if deleted...
     def transform_table(self, table):
-        thisTableTT = self.table_transformations.get(table.name.lower())
+        thisTableTT = self.table_transformations.get(table.name)
         # Update table name
         if thisTableTT:
             if thisTableTT.delete:

diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,4 @@ py==1.4.31
 six==1.9.0
 SQLAlchemy==1.0.13
 sqlalchemy-migrate==0.9.7
-SQLAlchemy-Utils==0.30.9
+SQLAlchemy-Utils==0.33.6