aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2023-12-10 10:44:17 +0100
committerPřemysl Eric Janouch <p@janouch.name>2023-12-10 10:44:17 +0100
commit1f571a903dcf9dee07a1b1c464ff4e472ccc7abe (patch)
tree29d2e9660dce79a5572e6b8e7c623cfe3798b2b9
parentc71cf11fe495b250b6d057963fa0416e706e963b (diff)
downloadgallery-1f571a903dcf9dee07a1b1c464ff4e472ccc7abe.tar.gz
gallery-1f571a903dcf9dee07a1b1c464ff4e472ccc7abe.tar.xz
gallery-1f571a903dcf9dee07a1b1c464ff4e472ccc7abe.zip
Deep thought
-rw-r--r--initialize.sql35
-rw-r--r--main.go4
2 files changed, 28 insertions, 11 deletions
diff --git a/initialize.sql b/initialize.sql
index 0f43af5..63632c2 100644
--- a/initialize.sql
+++ b/initialize.sql
@@ -6,7 +6,7 @@ CREATE TABLE IF NOT EXISTS image(
PRIMARY KEY (sha1)
) STRICT;
-CREATE INDEX IF NOT EXISTS image_dhash ON image(dhash, sha1);
+CREATE INDEX IF NOT EXISTS image_dhash_idx ON image(dhash, sha1);
--
@@ -17,9 +17,13 @@ CREATE TABLE IF NOT EXISTS directory(
PRIMARY KEY (id)
) STRICT;
-CREATE UNIQUE INDEX IF NOT EXISTS directory_parent ON directory(parent, name);
+CREATE UNIQUE INDEX IF NOT EXISTS directory_parent_idx
+ON directory(parent, name);
CREATE TABLE IF NOT EXISTS entry(
+ -- FIXME: I want a nullable parent, but that can't be a primary key.
+ -- - Perhaps have an INTEGER for the PK, and use a UNIQUE INDEX.
+ -- - Alternatively, create a directory record for the root.
parent INTEGER REFERENCES directory(id),
name TEXT NOT NULL, -- last FS path component
mtime INTEGER NOT NULL, -- Unix time of last modification in seconds
@@ -27,22 +31,31 @@ CREATE TABLE IF NOT EXISTS entry(
PRIMARY KEY (parent, name)
) STRICT;
-CREATE INDEX IF NOT EXISTS entry_sha1 ON entry(sha1, parent, name);
+CREATE INDEX IF NOT EXISTS entry_sha1_idx ON entry(sha1, parent, name);
--
-CREATE TABLE IF NOT EXISTS image_tag(
- sha1 TEXT NOT NULL REFERENCES image(sha1),
- tag TEXT NOT NULL,
- PRIMARY KEY (sha1)
+-- These could also contain a description. In the future.
+CREATE TABLE IF NOT EXISTS tag_space(
+ id INTEGER NOT NULL,
+ name TEXT NOT NULL,
+ PRIMARY KEY (id)
) STRICT;
--- XXX: Perhaps this should be more like namespaces.
-CREATE TABLE IF NOT EXISTS image_autotag(
+CREATE UNIQUE INDEX IF NOT EXISTS tag_space_name_idx ON tag_space(name);
+
+CREATE TABLE IF NOT EXISTS tag(
sha1 TEXT NOT NULL REFERENCES image(sha1),
+ -- FIXME: I want a nullable tag space, but that can't be a primary key.
+ -- - Perhaps have an INTEGER for the PK, and use a UNIQUE INDEX.
+ -- - (tag, space) pairs could generally use a separate table,
+ -- so that the TEXT column is deduplicated (or rather compressed).
+ -- That table just needs garbage collection.
+ space INTEGER REFERENCES tag_space(id),
tag TEXT NOT NULL,
weight REAL NOT NULL, -- 0..1 normalized weight assigned to tag
- PRIMARY KEY (sha1, tag)
+ PRIMARY KEY (sha1, space, tag)
) STRICT;
-CREATE INDEX IF NOT EXISTS image_autotag_tag ON image_autotag(tag, sha1);
+CREATE INDEX IF NOT EXISTS tag_space_tag_idx ON tag(space, tag);
+CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);
diff --git a/main.go b/main.go
index 45a5b51..598c5c5 100644
--- a/main.go
+++ b/main.go
@@ -456,6 +456,8 @@ func (i *importer) Import(path string) error {
return err
}
+ // FIXME: This disallows any entries directly in the root.
+ // TODO: Turn this into an upsert statement.
_, err = tx.Exec(`INSERT INTO entry(parent, name, mtime, sha1)
VALUES (?, ?, ?, ?)`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1)
if err != nil {
@@ -705,6 +707,8 @@ func cmdDhash(args []string) error {
defer pb.Stop()
// TODO: Also run the hasher in parallel, once it becomes a problem.
+ // And/or run it in batches, since start-up time of the hasher
+ // poses considerable overhead with large amounts of images.
for _, sha1 := range hexSHA1 {
pathThumb := thumbPath(sha1)
hash, err := makeDhash(hasher, pathThumb)