Skip to content

Commit

Permalink
Removed instances of inserting into file_tag with NULL tags (Closes #33)
Browse files Browse the repository at this point in the history
- This change reduces database file sizes by as much as half by not
  storing the many duplicate rows that resulted from having a NULL-able
  primary key in the file_tag table.
- Added data and schema migration for removal of NULL tags. Incremented
  DB_VERSION.
- Moved files_by_id and loading from the sqlite database back into tagdb
- Stopped creation of log files from test_tagdb
  • Loading branch information
mwatts15 committed Jan 24, 2015
1 parent f60b5eb commit b8f9878
Show file tree
Hide file tree
Showing 10 changed files with 199 additions and 122 deletions.
111 changes: 41 additions & 70 deletions file_cabinet.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ enum {INSERT,
TAGUNI,
RMTAGU,
RMDRWR,
INSUNT,
RALLTU,
RTUDWR,
LOOKUP,
Expand All @@ -31,24 +30,36 @@ enum {INSERT,
#define STMT_SEM(_db,_i) (&((_db)->stmt_semas[(_i)]))

struct FileCabinet {
/* An index on files. Usually provided to us by tagdb */
GHashTable *files;
/* Indicates whether we created the FILES ourselves */
gboolean own_files;
/* The sqlite database */
sqlite3 *sqlitedb;
/* The sql prepared statements that we use */
sqlite3_stmt *stmts[NUMBER_OF_STMTS];
/* Semaphores to protect prepared statements from being reset while they are executing */
sem_t stmt_semas[NUMBER_OF_STMTS];
file_id_t max_id;
};

FileCabinet *file_cabinet_new (sqlite3 *db)
FileCabinet *file_cabinet_new0 (sqlite3 *db, GHashTable *files)
{
FileCabinet *res = calloc(1,sizeof(FileCabinet));
res->sqlitedb = db;
res->files = files;
return file_cabinet_init(res);
}

void _file_cabinet_init_files(FileCabinet*);
FileCabinet *file_cabinet_new (sqlite3 *db)
{
GHashTable *files = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);
FileCabinet *fc = file_cabinet_new0(db, files);
fc->own_files = TRUE;
return fc;
}

FileCabinet *file_cabinet_init (FileCabinet *res)
{
res->files = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);
sqlite3 *db = res->sqlitedb;
assert(db);
for (int i = 0; i < NUMBER_OF_STMTS; i++)
Expand All @@ -58,8 +69,6 @@ FileCabinet *file_cabinet_init (FileCabinet *res)

/* insert statement */
sql_prepare(db, "insert into file_tag(file, tag) values(?,?)", STMT(res, INSERT));
/* insert statement */
sql_prepare(db, "insert into file_tag(file, tag) values(?,NULL)", STMT(res, INSUNT));
/* insert into tag union */
sql_prepare(db, "insert into tag_union(tag, assoc, file) values(?,?,?)", STMT(res, TAGUNI));
/* remove from tag union */
Expand All @@ -71,53 +80,19 @@ FileCabinet *file_cabinet_init (FileCabinet *res)
/* remove statement */
sql_prepare(db, "delete from file_tag where file=? and tag is ?", STMT(res, REMOVE));
/* remove statement */
sql_prepare(db, "delete from file_tag where file=? and tag is NULL", STMT(res, REMNUL));
/* remove statement */
sql_prepare(db, "delete from file_tag where tag is ?", STMT(res, RMDRWR));
/* files-with-tag statement */
sql_prepare(db, "select distinct file from file_tag where tag is ?", STMT(res, GETFIL));
sql_prepare(db, "select distinct file from file_tag where tag is NULL", STMT(res, GETUNT));
sql_prepare(db, "select distinct id from file where id not in (select file from file_tag)", STMT(res, GETUNT));
sql_prepare(db, "select distinct F.id from file_tag Z,file F where Z.tag is ? and Z.file=F.id and F.name=?", STMT(res, LOOKUP));
sql_prepare(db, "select distinct F.id from file_tag Z,file F where Z.tag is NULL and Z.file=F.id and F.name=?", STMT(res, LOOKUT));
sql_prepare(db, "select distinct F.id"
" from file F"
" where F.name=?"
" and F.id not in (select file from file_tag)", STMT(res, LOOKUT));
sql_prepare(db, "select distinct assoc from tag_union where tag=?", STMT(res, TAGUNL));
_file_cabinet_init_files(res);
return res;
}

void _file_cabinet_init_files(FileCabinet *fc)
{
/* Reads in the files from the sql database */
sqlite3_stmt *stmt;
sql_prepare(fc->sqlitedb, "select distinct * from file", stmt);
sqlite3_reset(stmt);
while (sql_next_row(stmt) == SQLITE_ROW)
{
file_id_t id = sqlite3_column_int64(stmt, 0);
const unsigned char* name = sqlite3_column_text(stmt, 1);
File *f = new_file((const char*)name);
file_id(f) = id;
if (fc->max_id < id)
fc->max_id = id;
g_hash_table_insert(fc->files, TO_SP(file_id(f)), f);
}
sqlite3_finalize(stmt);
sql_prepare(fc->sqlitedb, "select distinct * from file_tag order by file", stmt);
sqlite3_reset(stmt);
while (sql_next_row(stmt) == SQLITE_ROW)
{
file_id_t file_id = sqlite3_column_int64(stmt, 0);
file_id_t tag_id = sqlite3_column_int64(stmt, 1);
File *f = g_hash_table_lookup(fc->files, TO_SP(file_id));
file_add_tag(f, tag_id, g_strdup(""));
}
sqlite3_finalize(stmt);
}

file_id_t file_cabinet_max_id (FileCabinet *fc)
{
return fc->max_id;
}

void file_cabinet_destroy (FileCabinet *fc)
{
if (fc)
Expand All @@ -128,15 +103,15 @@ void file_cabinet_destroy (FileCabinet *fc)
sem_destroy(&(fc->stmt_semas[i]));
}

/* Delete the files */
if(fc->files)
if (fc->own_files && fc->files)
{
HL(fc->files, it, k, v)
HL (fc->files, it, k, v)
{
file_destroy_unsafe((File*) v);
} HL_END;
g_hash_table_destroy(fc->files);
}
g_hash_table_destroy(fc->files);

free(fc);
}
}
Expand All @@ -148,7 +123,7 @@ GList *file_cabinet_get_drawer_l (FileCabinet *fc, file_id_t slot_id)
return res;
}

FilesIter _sqlite_lookup_stmt (FileCabinet *fc, tagdb_key_t key, char *name)
FilesIter _sqlite_lookup_stmt (FileCabinet *fc, tagdb_key_t key, const char *name)
{
sqlite3_stmt *stmt = NULL;
if (key_is_empty(key))
Expand All @@ -169,7 +144,7 @@ FilesIter _sqlite_lookup_stmt (FileCabinet *fc, tagdb_key_t key, char *name)
return fi;
}

File *find_file(FileCabinet *fc, tagdb_key_t key, char *name)
File *_find_file(FileCabinet *fc, tagdb_key_t key, const char *name)
{
FilesIter it = _sqlite_lookup_stmt(fc, key, name);
FILES_LOOP(it, f)
Expand Down Expand Up @@ -227,7 +202,7 @@ GList *_sqlite_getfile_stmt(FileCabinet *fc, file_id_t key)
stmt_code = GETUNT;
}

sem_wait(STMT_SEM(fc,stmt_code));
sem_wait(STMT_SEM(fc, stmt_code));
sqlite3_reset(stmt);

if (key)
Expand Down Expand Up @@ -350,14 +325,8 @@ void _sqlite_ins_stmt (FileCabinet *fc, File *f, file_id_t key)
sqlite3_reset(stmt);
sqlite3_bind_int(stmt, 1, file_id(f));
sqlite3_bind_int(stmt, 2, key);
sql_step(stmt);
}
else
{
stmt = STMT(fc, INSUNT);
sqlite3_reset(stmt);
sqlite3_bind_int(stmt, 1, file_id(f));
}
sql_step(stmt);
}

void file_cabinet_remove (FileCabinet *fc, file_id_t key, File *f)
Expand All @@ -381,11 +350,15 @@ void file_cabinet_remove_v (FileCabinet *fc, tagdb_key_t key, File *f)
void file_cabinet_remove_all (FileCabinet *fc, File *f)
{
tagdb_key_t key = file_extract_key(f);
file_cabinet_remove(fc, UNTAGGED, f);
file_cabinet_remove_v(fc, key, f);
key_destroy(key);
}

GList *file_cabinet_get_untagged_files (FileCabinet *fc)
{
return _sqlite_getfile_stmt(fc, 0);
}

void file_cabinet_delete_file(FileCabinet *fc, File *f)
{
int rem = g_hash_table_remove(fc->files, TO_SP(file_id(f)));
Expand All @@ -398,8 +371,11 @@ void file_cabinet_delete_file(FileCabinet *fc, File *f)

void file_cabinet_insert (FileCabinet *fc, file_id_t key, File *f)
{
/* XXX: Consider not doing this on every insert */
g_hash_table_insert(fc->files, TO_SP(file_id(f)), f);

if (G_UNLIKELY(fc->own_files))
{
g_hash_table_insert(fc->files, TO_SP(file_id(f)), f);
}
_sqlite_ins_stmt(fc,f,key);

tagdb_key_t fkey = file_extract_key(f);
Expand Down Expand Up @@ -429,14 +405,9 @@ gulong file_cabinet_size (FileCabinet *fc)
}

/* Lookup a file with the given name and tags */
File *file_cabinet_lookup_file (FileCabinet *fc, tagdb_key_t key, char *name)
{
return find_file(fc, key, name);
}

File *file_cabinet_get_file_by_id(FileCabinet *fc, file_id_t id)
File *file_cabinet_lookup_file (FileCabinet *fc, tagdb_key_t key, const char *name)
{
return g_hash_table_lookup(fc->files, TO_P(id));
return _find_file(fc, key, name);
}

GList *file_cabinet_get_drawer_tags (FileCabinet *fc, file_id_t slot_id)
Expand Down
8 changes: 4 additions & 4 deletions file_cabinet.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ typedef struct FilesIter
}\
}

FileCabinet *file_cabinet_new0 (sqlite3 *db, GHashTable *files);
FileCabinet *file_cabinet_new (sqlite3 *db);
FileCabinet *file_cabinet_init (FileCabinet *res);
void file_cabinet_destroy (FileCabinet *fc);
Expand All @@ -50,6 +51,8 @@ void file_cabinet_delete_file(FileCabinet *fc, File *f);
/* Returns the keyed file slot as a GList */
GList *file_cabinet_get_drawer_l (FileCabinet *fc, file_id_t slot_id);
GList *file_cabinet_get_drawer_tags (FileCabinet *fc, file_id_t slot_id);
/* Returns files without any tags */
GList *file_cabinet_get_untagged_files (FileCabinet *fc);

int file_cabinet_drawer_size (FileCabinet *fc, file_id_t key);

Expand All @@ -58,9 +61,6 @@ void file_cabinet_remove_drawer (FileCabinet *fc, file_id_t slot_id);
/* returns the number of drawers */
gulong file_cabinet_size (FileCabinet *fc);

File *file_cabinet_lookup_file (FileCabinet *fc, tagdb_key_t tag_id, char *name);
File *file_cabinet_get_file_by_id(FileCabinet *fc, file_id_t id);
/* Gets the tags shared in the tag unions of every drawer named by `key' */
file_id_t file_cabinet_max_id (FileCabinet *fc);
File *file_cabinet_lookup_file (FileCabinet *fc, tagdb_key_t tag_id, const char *name);

#endif /* FILE_CABINET_H */
14 changes: 12 additions & 2 deletions sql.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@ char *upgrade_list [] =

"drop table file_tag_old;"
,
"alter table file_tag rename to file_tag_old;"
"create table file_tag(file integer not null, tag integer not null, value blob,"
" primary key (file,tag),"
" foreign key (file) references file(id),"
" foreign key (tag) references tag(id));"

"insert into file_tag"
" select file, tag, value from file_tag_old where tag is not null;"

"drop table file_tag_old;"
};

char *tables =
/* a table associating tags to files */
"create table IF NOT EXISTS file_tag(file integer, tag integer, value blob,"
"create table IF NOT EXISTS file_tag(file integer not null, tag integer not null, value blob,"
" primary key (file,tag),"
" foreign key (file) references file(id),"
" foreign key (tag) references tag(id));"
Expand Down Expand Up @@ -98,7 +108,7 @@ int _sql_step (sqlite3_stmt *stmt, const char *file, int line_number)
{
sqlite3 *db = sqlite3_db_handle(stmt);
const char *msg = sqlite3_errmsg(db);
log_msg1(ERROR, file, line_number, "sqlite3_step:We couldn't complete the statement: %s(%d)", msg, status);
log_msg1(ERROR, file, line_number, "sqlite3_step: We couldn't complete the statement: %s(%d)", msg, status);
return status;
}
else
Expand Down
2 changes: 1 addition & 1 deletion sql.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ int try_upgrade_db0 (sqlite3 *db, int target_version);
* tables because it's being managed differently, even if the schema remains the same, the DB_VERSION must be
* incremented.
*/
#define DB_VERSION 2
#define DB_VERSION 3
/* The string version of DB_VERSION */
#define xstr(s) str(s)
#define str(s) #s
Expand Down
Loading

0 comments on commit b8f9878

Please sign in to comment.