2 files changed, 28 insertions, 11 deletions
diff --git a/initialize.sql b/initialize.sql
index 0f43af5..63632c2 100644
--- a/initialize.sql
+++ b/initialize.sql
@@ -6,7 +6,7 @@ CREATE TABLE IF NOT EXISTS image(
 	PRIMARY KEY (sha1)
 ) STRICT;
 
-CREATE INDEX IF NOT EXISTS image_dhash ON image(dhash, sha1);
+CREATE INDEX IF NOT EXISTS image_dhash_idx ON image(dhash, sha1);
 
 --
 
@@ -17,9 +17,13 @@ CREATE TABLE IF NOT EXISTS directory(
 	PRIMARY KEY (id)
 ) STRICT;
 
-CREATE UNIQUE INDEX IF NOT EXISTS directory_parent ON directory(parent, name);
+CREATE UNIQUE INDEX IF NOT EXISTS directory_parent_idx
+ON directory(parent, name);
 
 CREATE TABLE IF NOT EXISTS entry(
+	-- FIXME: I want a nullable parent, but that can't be a primary key.
+	--  - Perhaps have an INTEGER for the PK, and use a UNIQUE INDEX.
+	--  - Alternatively, create a directory record for the root.
 	parent   INTEGER REFERENCES directory(id),
 	name     TEXT NOT NULL,     -- last FS path component
 	mtime    INTEGER NOT NULL,  -- Unix time of last modification in seconds
@@ -27,22 +31,31 @@ CREATE TABLE IF NOT EXISTS entry(
 	PRIMARY KEY (parent, name)
 ) STRICT;
 
-CREATE INDEX IF NOT EXISTS entry_sha1 ON entry(sha1, parent, name);
+CREATE INDEX IF NOT EXISTS entry_sha1_idx ON entry(sha1, parent, name);
 
 --
 
-CREATE TABLE IF NOT EXISTS image_tag(
-	sha1     TEXT NOT NULL REFERENCES image(sha1),
-	tag      TEXT NOT NULL,
-	PRIMARY KEY (sha1)
+-- These could also contain a description. In the future.
+CREATE TABLE IF NOT EXISTS tag_space(
+	id       INTEGER NOT NULL,
+	name     TEXT NOT NULL,
+	PRIMARY KEY (id)
 ) STRICT;
 
--- XXX: Perhaps this should be more like namespaces.
-CREATE TABLE IF NOT EXISTS image_autotag(
+CREATE UNIQUE INDEX IF NOT EXISTS tag_space_name_idx ON tag_space(name);
+
+CREATE TABLE IF NOT EXISTS tag(
 	sha1     TEXT NOT NULL REFERENCES image(sha1),
+	-- FIXME: I want a nullable tag space, but that can't be a primary key.
+	--  - Perhaps have an INTEGER for the PK, and use a UNIQUE INDEX.
+	--  - (tag, space) pairs could generally use a separate table,
+	--    so that the TEXT column is deduplicated (or rather compressed).
+	--    That table just needs garbage collection.
+	space    INTEGER REFERENCES tag_space(id),
 	tag      TEXT NOT NULL,
 	weight   REAL NOT NULL,     -- 0..1 normalized weight assigned to tag
-	PRIMARY KEY (sha1, tag)
+	PRIMARY KEY (sha1, space, tag)
 ) STRICT;
 
-CREATE INDEX IF NOT EXISTS image_autotag_tag ON image_autotag(tag, sha1);
+CREATE INDEX IF NOT EXISTS tag_space_tag_idx ON tag(space, tag);
+CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);
diff --git a/main.go b/main.go
index 45a5b51..598c5c5 100644
--- a/main.go
+++ b/main.go
@@ -456,6 +456,8 @@ func (i *importer) Import(path string) error {
 		return err
 	}
 
+	// FIXME: This disallows any entries directly in the root.
+	// TODO: Turn this into an upsert statement.
 	_, err = tx.Exec(`INSERT INTO entry(parent, name, mtime, sha1)
 		VALUES (?, ?, ?, ?)`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1)
 	if err != nil {
@@ -705,6 +707,8 @@ func cmdDhash(args []string) error {
 	defer pb.Stop()
 
 	// TODO: Also run the hasher in parallel, once it becomes a problem.
+	// And/or run it in batches, since start-up time of the hasher
+	// poses considerable overhead with large amounts of images.
 	for _, sha1 := range hexSHA1 {
 		pathThumb := thumbPath(sha1)
 		hash, err := makeDhash(hasher, pathThumb)