seanharr11 · musashiXXX · Oct 30, 2018 · Oct 30, 2018 · Oct 30, 2018 · Oct 31, 2018
diff --git a/etlalchemy/ETLAlchemySource.py b/etlalchemy/ETLAlchemySource.py
@@ -21,7 +21,7 @@
 from sqlalchemy.inspection import inspect
 from sqlalchemy.exc import NoSuchTableError
 from sqlalchemy.types import Text, Numeric, BigInteger, Integer, DateTime, Date, TIMESTAMP, String, BINARY, LargeBinary
-from sqlalchemy.dialects.postgresql import BYTEA
+from sqlalchemy.dialects.postgresql import BYTEA, UUID
 import inspect as ins
 import re
 import csv
@@ -50,7 +50,8 @@ def __init__(self,
                  skip_table_if_empty=False,
                  skip_column_if_empty=False,
                  compress_varchar=False,
-                 log_file=None):
+                 log_file=None,
+                 per_table_buffers={}):
         # TODO: Store unique columns in here, and ADD the unique constraints
         # after data has been migrated, rather than before
         self.unique_columns = []
@@ -59,6 +60,9 @@ def __init__(self,
         self.logger = logging.getLogger("ETLAlchemySource")
         self.logger.propagate = False
 
+        #Allow specifying of buffer size on a per-table basis when fetching rows from the source
+        self.per_table_buffers = per_table_buffers
+
         for h in list(self.logger.handlers):
             # Clean up any old loggers...(useful during testing w/ multiple
             # log_files)
@@ -205,6 +209,16 @@ def standardize_column_type(self, column, raw_rows):
             # Get the VARCHAR size of the column...
             ########################################
             varchar_length = column.type.length
+            # If varchar_length exceeds the maximum size for our target
+            # database, then convert VARCHAR -> TEXT  
+            if self.dst_engine.dialect.name.lower() == "postgresql":
+                if varchar_length == 'max' or varchar_length > 10485760:
+                    varchar_length = 0
+            elif self.dst_engine.dialect.name.lower() == "mssql":
+                if varchar_length == 'max' or varchar_length > 65532:
+                    # Note: This isn't always the case for mssql!
+                    # If using utf8, the limit is 21844. 
+                    varchar_length = 0
             ##################################
             # Strip collation here ...
             ##################################
@@ -249,8 +263,20 @@ def standardize_column_type(self, column, raw_rows):
             # Get the VARCHAR size of the column...
             ########################################
             varchar_length = column.type.length
-            column_copy.type = String()
-            column_copy.type.length = varchar_length
+            if varchar_length == 'max':
+                varchar_length = 0
+                column_copy.type = Text()
+            elif self.dst_engine.dialect.name.lower() == "postgresql" and varchar_length > 10485760:
+                    varchar_length = 0
+                    column_copy.type = Text()
+            elif self.dst_engine.dialect.name.lower() == "mssql" and varchar_length > 65532:
+                    # Note: This isn't always the case for mssql!
+                    # If using utf8, the limit is 21844. 
+                    varchar_length = 0
+                    column_copy.type = Text()
+            else:
+                column_copy.type = String()
+            	column_copy.type.length = varchar_length
             ##################################
             # Strip collation here ...
             ##################################
@@ -327,6 +353,7 @@ def standardize_column_type(self, column, raw_rows):
                     null = False
                 if data.__class__.__name__ == 'Decimal' or\
                    data.__class__.__name__ == 'float':
+                    continue # TODO. chamilton 22 April 2019: Skip this part entirely. Not ready to modify/remove/etc. just yet.
                     splt = str(data).split(".")
                     if len(splt) == 1:
                         intCount += 1
@@ -380,7 +407,7 @@ def standardize_column_type(self, column, raw_rows):
                     column.name +
                     "' is of type 'Decimal', but contains no mantissas " +
                     "> 0. (i.e. 3.00, 2.00, etc..)\n ")
-                if maxDigit > 4294967295:
+                if maxDigit > 4294967295: # TODO. chamilton 22 April 2019: Not sure if this is necessary. 
                     self.logger.warning("Coercing to 'BigInteger'")
                     column_copy.type = BigInteger()
                     # Do conversion...
@@ -401,6 +428,11 @@ def standardize_column_type(self, column, raw_rows):
                 "coercing to Boolean'")
             column_copy.type.__class__ = sqlalchemy.types.Boolean
         elif "TYPEENGINE" in base_classes:
+            if self.dst_engine.dialect.name.lower() == "postgresql"\
+                and column.type.__class__.__name__ == "UNIQUEIDENTIFIER":
+                column_copy.type = UUID()
+                self.logger.warning("Found column of type 'UNIQUEIDENTIFIER' -> " +
+                    "coercing to 'UUID'")
             for r in raw_rows:
                 if r[idx] is not None:
                     null = False
@@ -1005,6 +1037,9 @@ def migrate(
                 self.logger.info("Loading all rows into memory...")
                 rows = []
 
+                if T_src.name in self.per_table_buffers:
+                    buffer_size = self.per_table_buffers.get(T_src.name)
+
                 for i in range(1, (cnt / buffer_size) + 1):
                     self.logger.info(
                         "Fetched {0} rows".format(str(i * buffer_size)))
@@ -1206,9 +1241,9 @@ def add_indexes(self, destination_database_url):
                 .get(table_name)
             column_transformer = self.schema_transformer.column_transformations\
                 .get(table_name)
-            if table_transform and table_transform.newTable not in ["", None]:
+            if table_transform and table_transform.new_table not in ["", None]:
                 # Update the table_name
-                table_name = table_transform.newTable
+                table_name = table_transform.new_table
             this_idx_count = 0
             self.logger.info("Creating indexes for '" + table_name + "'...")
             for i in indexes:
@@ -1383,9 +1418,9 @@ def add_fks(self, destination_database_url):
             ####################################
             table_transform = self.schema_transformer.table_transformations.get(
                 table_name)
-            if table_transform and table_transform.newTable not in ["", None]:
+            if table_transform and table_transform.new_table not in ["", None]:
                 # Update the table_name
-                table_name = table_transform.newTable
+                table_name = table_transform.new_table
             self.logger.info(
                 "Adding FKs to table '{0}' (previously {1})".format(
                     table_name, pre_transformed_table_name))
@@ -1456,10 +1491,10 @@ def add_fks(self, destination_database_url):
                 ref_column_transformer = \
                     self.schema_transformer.column_transformations.get(
                                   ref_table)
-                if table_transform and table_transform.newTable not in [
+                if table_transform and table_transform.new_table not in [
                         "", None]:
                     # Update the table_name
-                    ref_table = table_transform.newTable
+                    ref_table = table_transform.new_table
                 T_ref = Table(ref_table, dst_meta)
                 ############################
                 # Check that referenced table

diff --git a/etlalchemy/literal_value_generator.py b/etlalchemy/literal_value_generator.py
@@ -1,6 +1,7 @@
 import shutil
 import decimal
 import datetime
+
 # Find the best implementation available on this platform
 try:
     from cStringIO import StringIO

diff --git a/etlalchemy/schema_transformer.py b/etlalchemy/schema_transformer.py
@@ -81,7 +81,7 @@ def __init__(self, column_transform_file,
 
     # Returns False if deleted...
     def transform_table(self, table):
-        thisTableTT = self.table_transformations.get(table.name.lower())
+        thisTableTT = self.table_transformations.get(table.name)
         # Update table name
         if thisTableTT:
             if thisTableTT.delete:

diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,4 @@ py==1.4.31
 six==1.9.0
 SQLAlchemy==1.0.13
 sqlalchemy-migrate==0.9.7
-SQLAlchemy-Utils==0.30.9
+SQLAlchemy-Utils==0.33.6