diff options
-rw-r--r-- | initialize.sql | 35 | ||||
-rw-r--r-- | main.go | 4 |
2 files changed, 28 insertions, 11 deletions
diff --git a/initialize.sql b/initialize.sql index 0f43af5..63632c2 100644 --- a/initialize.sql +++ b/initialize.sql @@ -6,7 +6,7 @@ CREATE TABLE IF NOT EXISTS image( PRIMARY KEY (sha1) ) STRICT; -CREATE INDEX IF NOT EXISTS image_dhash ON image(dhash, sha1); +CREATE INDEX IF NOT EXISTS image_dhash_idx ON image(dhash, sha1); -- @@ -17,9 +17,13 @@ CREATE TABLE IF NOT EXISTS directory( PRIMARY KEY (id) ) STRICT; -CREATE UNIQUE INDEX IF NOT EXISTS directory_parent ON directory(parent, name); +CREATE UNIQUE INDEX IF NOT EXISTS directory_parent_idx +ON directory(parent, name); CREATE TABLE IF NOT EXISTS entry( + -- FIXME: I want a nullable parent, but that can't be a primary key. + -- - Perhaps have an INTEGER for the PK, and use a UNIQUE INDEX. + -- - Alternatively, create a directory record for the root. parent INTEGER REFERENCES directory(id), name TEXT NOT NULL, -- last FS path component mtime INTEGER NOT NULL, -- Unix time of last modification in seconds @@ -27,22 +31,31 @@ CREATE TABLE IF NOT EXISTS entry( PRIMARY KEY (parent, name) ) STRICT; -CREATE INDEX IF NOT EXISTS entry_sha1 ON entry(sha1, parent, name); +CREATE INDEX IF NOT EXISTS entry_sha1_idx ON entry(sha1, parent, name); -- -CREATE TABLE IF NOT EXISTS image_tag( - sha1 TEXT NOT NULL REFERENCES image(sha1), - tag TEXT NOT NULL, - PRIMARY KEY (sha1) +-- These could also contain a description. In the future. +CREATE TABLE IF NOT EXISTS tag_space( + id INTEGER NOT NULL, + name TEXT NOT NULL, + PRIMARY KEY (id) ) STRICT; --- XXX: Perhaps this should be more like namespaces. -CREATE TABLE IF NOT EXISTS image_autotag( +CREATE UNIQUE INDEX IF NOT EXISTS tag_space_name_idx ON tag_space(name); + +CREATE TABLE IF NOT EXISTS tag( sha1 TEXT NOT NULL REFERENCES image(sha1), + -- FIXME: I want a nullable tag space, but that can't be a primary key. + -- - Perhaps have an INTEGER for the PK, and use a UNIQUE INDEX. + -- - (tag, space) pairs could generally use a separate table, + -- so that the TEXT column is deduplicated (or rather compressed). + -- That table just needs garbage collection. + space INTEGER REFERENCES tag_space(id), tag TEXT NOT NULL, weight REAL NOT NULL, -- 0..1 normalized weight assigned to tag - PRIMARY KEY (sha1, tag) + PRIMARY KEY (sha1, space, tag) ) STRICT; -CREATE INDEX IF NOT EXISTS image_autotag_tag ON image_autotag(tag, sha1); +CREATE INDEX IF NOT EXISTS tag_space_tag_idx ON tag(space, tag); +CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag); @@ -456,6 +456,8 @@ func (i *importer) Import(path string) error { return err } + // FIXME: This disallows any entries directly in the root. + // TODO: Turn this into an upsert statement. _, err = tx.Exec(`INSERT INTO entry(parent, name, mtime, sha1) VALUES (?, ?, ?, ?)`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1) if err != nil { @@ -705,6 +707,8 @@ func cmdDhash(args []string) error { defer pb.Stop() // TODO: Also run the hasher in parallel, once it becomes a problem. + // And/or run it in batches, since start-up time of the hasher + // poses considerable overhead with large amounts of images. for _, sha1 := range hexSHA1 { pathThumb := thumbPath(sha1) hash, err := makeDhash(hasher, pathThumb) |