Skip to content

Latest commit

 

History

History
90 lines (58 loc) · 1.62 KB

extract-table.markdown

File metadata and controls

90 lines (58 loc) · 1.62 KB

Extracting a table

Let's normalize a little.

Actors gonna act

SELECT * FROM type_count('events', 'actor');

SELECT COUNT(DISTINCT actor) FROM events;

SELECT * FROM key_count('events', 'actor');

Give them their own place

CREATE TABLE actors (
  actor_id SERIAL PRIMARY KEY,
  payload JSONB UNIQUE NOT NULL
);

INSERT INTO actors (payload)
SELECT DISTINCT actor FROM events;

But keep them connected

ALTER TABLE events
  ADD COLUMN actor_id INTEGER
  REFERENCES actors (actor_id);

UPDATE events
  SET actor_id =
    (SELECT actor_id FROM actors WHERE actor = payload);

Note: without that UNIQUE constraint, this update would have taken far longer.

Finish the move

ALTER TABLE events
  ALTER COLUMN actor_id SET NOT NULL,
  DROP COLUMN actor;

Now we can start working on the actors table

SELECT explode_json_column('actors', 'payload');

SELECT * FROM type_count('actors', 'avatar_url');
SELECT set_concrete_type('actors', 'avatar_url', 'TEXT');

SELECT * FROM type_count('actors', 'id');
SELECT set_concrete_type('actors', 'id', 'BIGINT');
SELECT * FROM type_count('actors', 'login');
SELECT set_concrete_type('actors', 'login', 'TEXT');

SELECT * FROM type_count('actors', 'display_login');
SELECT set_concrete_type('actors', 'display_login', 'TEXT');
SELECT * FROM type_count('actors', 'url');
SELECT set_concrete_type('actors', 'url', 'TEXT');

SELECT * FROM type_count('actors', 'gravatar_id');
SELECT set_concrete_type('actors', 'gravatar_id', 'TEXT');

Finish up with actors

ALTER TABLE actors DROP COLUMN payload;