CREATE OR REPLACE FUNCTION convert_artist_name(artist_name text) RETURNS text AS $$ BEGIN artist_name = trim(artist_name); -- partially deal with url encoding artist_name = regexp_replace(artist_name, '%(20|21|22|23|24|26|27|28|29|2A|2C|2E|2F|3B|3C|3E|3F|5B|5C|5D|5E|60|7B|7C|7D|7E)|artist', '', 'g'); artist_name = regexp_replace(artist_name, '%25', '%', 'g'); artist_name = regexp_replace(artist_name, '%2B', '+', 'g'); artist_name = regexp_replace(artist_name, '%2D', '-', 'g'); artist_name = regexp_replace(artist_name, '%2E', '.', 'g'); artist_name = regexp_replace(artist_name, '%2F', '/', 'g'); artist_name = regexp_replace(artist_name, '%3A', ':', 'g'); artist_name = regexp_replace(artist_name, '%3D', '=', 'g'); artist_name = regexp_replace(artist_name, '%40', '@', 'g'); artist_name = regexp_replace(artist_name, '%5F', '_', 'g'); -- check if any other url encoding still exists IF regexp_count(artist_name, '%[0-9A-Fa-f]{2}') <> 0 THEN RAISE EXCEPTION 'Artist % contains unconvertable percent-encoded Unicode', artist_name; END IF; artist_name = rtrim(artist_name, '-_'); -- lower has to be last, because it can and will fuck everything else up artist_name = lower(artist_name); RETURN artist_name; END; $$ LANGUAGE plpgsql;