From 28bf901d9034c3ba500fb98821c2efe81d4da98e Mon Sep 17 00:00:00 2001 From: Edward Ribeiro Date: Thu, 1 Sep 2016 15:01:17 -0300 Subject: [PATCH] Mais refactorings --- .../unb/cassandra/etl/CassandraBulkLoader.java | 18 ++++++++++++++++++ .../cassandra/etl/CassandraClientLoader.java | 6 +++--- src/main/resources/NOTES.txt | 8 ++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 src/main/resources/NOTES.txt diff --git a/src/main/java/br/unb/cassandra/etl/CassandraBulkLoader.java b/src/main/java/br/unb/cassandra/etl/CassandraBulkLoader.java index 931d3ad..ed95275 100644 --- a/src/main/java/br/unb/cassandra/etl/CassandraBulkLoader.java +++ b/src/main/java/br/unb/cassandra/etl/CassandraBulkLoader.java @@ -2,4 +2,22 @@ public class CassandraBulkLoader { + public static final String SCHEMA = "CREATE TABLE %s.%s (ID TIMEUUID,\n" + + " UF TEXT,\n" + + " CODIGO_MUNICIPIO TEXT,\n" + + " NOME_MUNICIPIO TEXT,\n" + + " NOME_BENEFICIARIO TEXT,\n" + + " VALOR_PAGO FLOAT,\n" + + " MES_ANO TEXT,\n" + + " PRIMARY KEY (ID, VALOR_PAGO))"; + + public static final String INSERT = "INSERT INTO %s.%s (ID, UF, CODIGO_MUNICIPIO, NOME_MUNICIPIO, NOME_BENEFICIARIO, VALOR_PAGO, MES_ANO) " + + " VALUES (?, ?, ?, ?, ?, ?, ?)"; + + + public static void main(String[] args) { + + } + + } diff --git a/src/main/java/br/unb/cassandra/etl/CassandraClientLoader.java b/src/main/java/br/unb/cassandra/etl/CassandraClientLoader.java index 98f71df..2d630e3 100644 --- a/src/main/java/br/unb/cassandra/etl/CassandraClientLoader.java +++ b/src/main/java/br/unb/cassandra/etl/CassandraClientLoader.java @@ -11,8 +11,8 @@ public class CassandraClientLoader { public static final String KEYSPACE = "bolsafamilia"; public static final String TABLE = "bf"; - private String insertCmd = "INSERT INTO %s.%s (ID, UF, CODIGO_MUNICIPIO, NOME_MUNICIPIO, NOME_BENEFICIARIO, VALOR_PAGO, MES_ANO) " + - " VALUES (?, ?, ?, ?, ?, ?, ?)"; + public static final String INSERT = "INSERT INTO %s.%s (ID, UF, CODIGO_MUNICIPIO, NOME_MUNICIPIO, NOME_BENEFICIARIO, VALOR_PAGO, MES_ANO) " + + " VALUES (?, ?, ?, ?, ?, ?, ?)"; public boolean connect() { cluster = Cluster.builder().addContactPoint(host).build(); @@ -29,7 +29,7 @@ public void close() { // TODO: use BATCH insert to speedup? public void insert(Registro registro) { - PreparedStatement pstmt = session.prepare(String.format(insertCmd, KEYSPACE, TABLE)); + PreparedStatement pstmt = session.prepare(String.format(INSERT, KEYSPACE, TABLE)); BoundStatement bstmt = pstmt.bind(UUIDs.timeBased(), registro.getUf(), diff --git a/src/main/resources/NOTES.txt b/src/main/resources/NOTES.txt new file mode 100644 index 0000000..848bc87 --- /dev/null +++ b/src/main/resources/NOTES.txt @@ -0,0 +1,8 @@ +wc -l ~/Downloads/201606_BolsaFamiliaFolhaPagamento.csv +13849867 /home/eribeiro/Downloads/201606_BolsaFamiliaFolhaPagamento.csv + + +* Adicionar indices de baixa cardinalidade para UF +* Colocar VALOR_PAGO como CLUSTERING KEY da chave primária +* Usar SASI indexes para NOME_BENEFICIARIO? NOME_MUNICIPIO? CODIGO_MUNICIPIO? +