diff --git a/sql/clear_tables.sql b/sql/clear_tables.sql new file mode 100644 index 0000000..9829ca0 --- /dev/null +++ b/sql/clear_tables.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE PROCEDURE clear_tables() AS $$ + DELETE FROM artist_website; + DELETE FROM website; + DELETE FROM artist; +$$ LANGUAGE SQL; \ No newline at end of file diff --git a/sql/convert_artist_name.sql b/sql/convert_artist_name.sql new file mode 100644 index 0000000..223bf24 --- /dev/null +++ b/sql/convert_artist_name.sql @@ -0,0 +1,29 @@ +CREATE OR REPLACE FUNCTION convert_artist_name(artist_name text) RETURNS text AS $$ +BEGIN + artist_name = trim(artist_name); + + -- partially deal with url encoding + artist_name = regexp_replace(artist_name, '%(20|21|22|23|24|26|27|28|29|2A|2C|2E|2F|3B|3C|3E|3F|5B|5C|5D|5E|60|7B|7C|7D|7E)|artist', '', 'g'); + artist_name = regexp_replace(artist_name, '%25', '%', 'g'); + artist_name = regexp_replace(artist_name, '%2B', '+', 'g'); + artist_name = regexp_replace(artist_name, '%2D', '-', 'g'); + artist_name = regexp_replace(artist_name, '%2E', '.', 'g'); + artist_name = regexp_replace(artist_name, '%2F', '/', 'g'); + artist_name = regexp_replace(artist_name, '%3A', ':', 'g'); + artist_name = regexp_replace(artist_name, '%3D', '=', 'g'); + artist_name = regexp_replace(artist_name, '%40', '@', 'g'); + artist_name = regexp_replace(artist_name, '%5F', '_', 'g'); + + -- check if any other url encoding still exists + IF regexp_count(artist_name, '%[0-9A-Fa-f]{2}') <> 0 THEN + RAISE EXCEPTION 'Artist % contains unconvertable percent-encoded Unicode', artist_name; + END IF; + + artist_name = rtrim(artist_name, '-_'); + + -- lower has to be last, because it can and will fuck everything else up + artist_name = lower(artist_name); + + RETURN artist_name; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/sql/create_tables.sql b/sql/create_tables.sql new file mode 100644 index 0000000..57f8b26 --- /dev/null +++ b/sql/create_tables.sql @@ -0,0 +1,25 @@ +CREATE OR REPLACE PROCEDURE create_tables() AS $$ + DROP TABLE IF EXISTS artist_website; + DROP TABLE IF EXISTS website; + DROP TABLE IF EXISTS artist; + + CREATE TABLE IF NOT EXISTS artist ( + id serial PRIMARY KEY, + name text NOT NULL + ); + + CREATE TABLE IF NOT EXISTS website ( + id serial PRIMARY KEY, + name text NOT NULL + ); + + CREATE TABLE IF NOT EXISTS artist_website ( + id serial PRIMARY KEY, + url text NOT NULL, + folder_path text, + website_id integer, + artist_id integer, + CONSTRAINT aw_artist FOREIGN KEY (artist_id) REFERENCES artist (ID), + CONSTRAINT aw_website FOREIGN KEY (website_id) REFERENCES website (ID) + ); +$$ LANGUAGE SQL; \ No newline at end of file diff --git a/sql/get_artist_name.sql b/sql/get_artist_name.sql new file mode 100644 index 0000000..a3697c9 --- /dev/null +++ b/sql/get_artist_name.sql @@ -0,0 +1,25 @@ +CREATE OR REPLACE FUNCTION get_artist_name(in_url text) RETURNS text AS $$ +DECLARE + artist_name text; +BEGIN + in_url = trim(in_url); + + IF regexp_like(in_url, 'rule34\.xxx') THEN + artist_name = substring(in_url from 52); + ELSIF regexp_like(in_url, 'allthefallen\.moe') THEN + artist_name = substring(in_url from 43); + ELSIF regexp_like(in_url, 'e621\.net') THEN + artist_name = substring(in_url from 29); + ELSIF regexp_like(in_url, 'gelbooru\.com') THEN + artist_name = substring(in_url from 54); + ELSIF regexp_like(in_url, 'hypnohub\.net') THEN + artist_name = substring(in_url from 54); + ELSIF regexp_like(in_url, 'konachan\.com') THEN + artist_name = substring(in_url from 32); + ELSE + RAISE EXCEPTION 'Cannot extract usable artist_name from url: %', in_url; + END IF; + + RETURN artist_name; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; diff --git a/sql/get_urls_and_paths.sql b/sql/get_urls_and_paths.sql new file mode 100644 index 0000000..7fadc8a --- /dev/null +++ b/sql/get_urls_and_paths.sql @@ -0,0 +1,19 @@ +CREATE OR REPLACE FUNCTION get_urls_and_paths(website_name text) +RETURNS TABLE ( + out_url text, + out_website_name text +) AS +$$ +BEGIN + RETURN QUERY SELECT aw.url, aw.folder_path + FROM artist_website aw + INNER JOIN website w ON aw.website_id = w.id + WHERE w.name ILIKE website_name; + + IF NOT FOUND THEN + RAISE EXCEPTION 'No urls and paths found for website: %', website_name; + END IF; + + RETURN; +END; +$$ LANGUAGE plpgsql; diff --git a/sql/get_website_name.sql b/sql/get_website_name.sql new file mode 100644 index 0000000..a33a85b --- /dev/null +++ b/sql/get_website_name.sql @@ -0,0 +1,35 @@ +CREATE OR REPLACE FUNCTION get_website_name(in_url text) RETURNS text AS $$ +DECLARE + website_name text; +BEGIN + in_url = trim(in_url); + + IF regexp_like(in_url, 'rule34\.xxx') THEN + website_name = 'Rule34'; + ELSIF regexp_like(in_url, 'kemono\.su') THEN + website_name = 'Kemono'; + ELSIF regexp_like(in_url, 'coomer\.su') THEN + website_name = 'Coomer'; + ELSIF regexp_like(in_url, 'gofile\.io') THEN + website_name = 'GoFile'; + ELSIF regexp_like(in_url, 'allthefallen\.moe') THEN + website_name = 'ATF'; + ELSIF regexp_like(in_url, 'e621\.net') THEN + website_name = 'e621'; + ELSIF regexp_like(in_url, 'gelbooru\.com') THEN + website_name = 'Gelbooru'; + ELSIF regexp_like(in_url, 'hypnohub\.net') THEN + website_name = 'HypnoHub'; + ELSIF regexp_like(in_url, 'konachan\.com') THEN + website_name = 'Konachan'; + ELSIF regexp_like(in_url, 'pixiv\.net') THEN + website_name = 'Pixiv'; + ELSIF regexp_like(in_url, 'fantia\.jp') THEN + website_name = 'Fantia'; + ELSE + RAISE EXCEPTION 'Domain % is currently not supported', in_url; + END IF; + + RETURN website_name; +END; +$$ LANGUAGE plpgsql IMMUTABLE STRICT; diff --git a/sql/set_artist_name.sql b/sql/set_artist_name.sql new file mode 100644 index 0000000..77d81d5 --- /dev/null +++ b/sql/set_artist_name.sql @@ -0,0 +1,23 @@ +CREATE OR REPLACE PROCEDURE set_artist_name(in_url text) AS $$ +DECLARE + temp_artist_id integer; + temp_artist_name text; + temp_artist artist%ROWTYPE; +BEGIN + temp_artist_name = get_artist_name(in_url); + temp_artist_name = convert_artist_name(temp_artist_name); + + SELECT * INTO temp_artist FROM artist + WHERE artist.name = temp_artist_name; + + IF NOT FOUND THEN + INSERT INTO artist (name) VALUES (temp_artist_name); + END IF; + + SELECT artist.id INTO temp_artist_id FROM artist + WHERE artist.name = temp_artist_name; + + UPDATE artist_website SET artist_id = temp_artist_id + WHERE url = in_url; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/sql/set_folder_path.sql b/sql/set_folder_path.sql new file mode 100644 index 0000000..80a02cd --- /dev/null +++ b/sql/set_folder_path.sql @@ -0,0 +1,26 @@ +CREATE OR REPLACE PROCEDURE set_folder_path(in_url text) AS $$ +DECLARE + temp_folder_path text; + aw_artist_id integer; + aw_website_id integer; + artist_name text; + website_name text; +BEGIN + SELECT aw.artist_id INTO aw_artist_id FROM artist_website aw + WHERE aw.url = in_url; + + SELECT aw.website_id INTO aw_website_id FROM artist_website aw + WHERE aw.url = in_url; + + SELECT artist.name INTO artist_name FROM artist + WHERE artist.id = aw_artist_id; + + SELECT website.name INTO website_name FROM website + WHERE website.id = aw_website_id; + + temp_folder_path = concat('Artist/', artist_name, '/', website_name); + + UPDATE artist_website SET folder_path = temp_folder_path + WHERE url = in_url; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/sql/set_stuff_trigger.sql b/sql/set_stuff_trigger.sql new file mode 100644 index 0000000..45f1cab --- /dev/null +++ b/sql/set_stuff_trigger.sql @@ -0,0 +1,25 @@ +CREATE OR REPLACE FUNCTION set_stuff() RETURNS trigger AS $$ +DECLARE + temp_url text; +BEGIN + SELECT aw.url INTO temp_url FROM artist_website aw + WHERE aw.id = NEW.id; + + IF NOT FOUND THEN + RAISE EXCEPTION 'No url has been found'; + END IF; + + CALL set_artist_name(NEW.url); + CALL set_website_name(NEW.url); + CALL set_folder_path(NEW.url); + + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- DROP TRIGGER IF EXISTS set_stuff_trigger ON artist_website; + +CREATE OR REPLACE TRIGGER set_stuff_trigger AFTER INSERT ON artist_website +FOR EACH ROW EXECUTE PROCEDURE set_stuff(); + +ALTER TABLE artist_website ENABLE TRIGGER set_stuff_trigger; \ No newline at end of file diff --git a/sql/set_website_name.sql b/sql/set_website_name.sql new file mode 100644 index 0000000..dbc2a64 --- /dev/null +++ b/sql/set_website_name.sql @@ -0,0 +1,22 @@ +CREATE OR REPLACE PROCEDURE set_website_name(in_url text) AS $$ +DECLARE + temp_website_id integer; + temp_website_name text; + temp_website website%ROWTYPE; +BEGIN + temp_website_name = get_website_name(in_url); + + SELECT * INTO temp_website FROM website + WHERE website.name = temp_website_name; + + IF NOT FOUND THEN + INSERT INTO website (name) VALUES (temp_website_name); + END IF; + + SELECT website.id INTO temp_website_id FROM website + WHERE website.name = temp_website_name; + + UPDATE artist_website SET website_id = temp_website_id + WHERE url = in_url; +END; +$$ LANGUAGE plpgsql;