mirror of
https://gist.github.com/6ba37e4d4084e858f917e271550ce5f6.git
synced 2024-09-20 00:34:20 +03:00
Initial commit
This commit is contained in:
commit
40aac6203b
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
__pycache__
|
||||
input
|
||||
library
|
||||
logs
|
||||
images.db
|
38
database.py
Normal file
38
database.py
Normal file
@ -0,0 +1,38 @@
|
||||
from datetime import datetime
|
||||
import sqlite3
|
||||
import logging
|
||||
|
||||
class Database:
|
||||
def __init__(self):
|
||||
self.db_name = 'images.db'
|
||||
self.__create_tables()
|
||||
|
||||
def __create_tables(self):
|
||||
conn = sqlite3.connect(self.db_name)
|
||||
c = conn.cursor()
|
||||
c.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS danbooru (
|
||||
id INTEGER PRIMARY KEY NOT NULL UNIQUE,
|
||||
tags TEXT NOT NULL,
|
||||
created_at TIMESTAMP
|
||||
);
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def is_exists(self, id) -> bool:
|
||||
conn = sqlite3.connect(self.db_name)
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT EXISTS(SELECT 1 FROM danbooru WHERE id=?)", (id, ))
|
||||
result = c.fetchone()[0]
|
||||
conn.close()
|
||||
return bool(result)
|
||||
|
||||
def add(self, id, tags):
|
||||
conn = sqlite3.connect(self.db_name)
|
||||
c = conn.cursor()
|
||||
sql = 'INSERT INTO danbooru(id, tags, created_at) VALUES (?,?,?)'
|
||||
c.execute(sql, (id, tags, datetime.now()))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
17
iqdb.py
Normal file
17
iqdb.py
Normal file
@ -0,0 +1,17 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
import requests
|
||||
|
||||
class Iqdb:
|
||||
def search(self, file):
|
||||
logging.info('Searching %s', file)
|
||||
files = {'file': open(file, 'rb')}
|
||||
resp = requests.post('https://iqdb.org/', files=files, timeout=10)
|
||||
doc = BeautifulSoup(resp.text, 'html.parser')
|
||||
for tag in doc.select(".image a"):
|
||||
url = tag.get("href")
|
||||
if "danbooru.donmai.us/posts" in url:
|
||||
if url.startswith("//"):
|
||||
url = "https:" + url
|
||||
return url
|
||||
return None
|
54
library.py
Normal file
54
library.py
Normal file
@ -0,0 +1,54 @@
|
||||
from tags import Tags
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import logging
|
||||
import os
|
||||
|
||||
class Library:
|
||||
def __init__(self, dir_root: Path):
|
||||
self.dir_root = dir_root
|
||||
self.dir_orphan = Path(dir_root, 'orphan')
|
||||
self.dir_orphan.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
def move_to_orphan(self, p: Path):
|
||||
logging.info("%s move to orphan", p)
|
||||
shutil.move(os.fspath(p), os.fspath(self.dir_orphan))
|
||||
|
||||
def move(self, p: Path, tags: Tags):
|
||||
new_path = self.__compute_path(tags)
|
||||
new_path.mkdir(exist_ok=True, parents=True)
|
||||
logging.info("%s move to %s", p.name, new_path)
|
||||
shutil.move(os.fspath(p), os.fspath(new_path))
|
||||
|
||||
def __compute_path(self, tags: Tags) -> Path:
|
||||
p = self.dir_root
|
||||
if tags.copyrights == 'original':
|
||||
# Originals groups by artist
|
||||
p = p / "_originals"
|
||||
if tags.artists != "":
|
||||
artist = tags.artists.split(" ")[0]
|
||||
p = p / self.__sanitize(artist)
|
||||
return p
|
||||
# Main section
|
||||
copyright = ""
|
||||
if tags.copyrights != "":
|
||||
copyright = tags.copyrights.split(" ")[0]
|
||||
p = p / self.__sanitize(copyright)
|
||||
if tags.characters == "":
|
||||
return p
|
||||
# Characters section
|
||||
characters = tags.characters.split(" ")
|
||||
if len(characters) == 1:
|
||||
character = characters[0] \
|
||||
.replace(copyright, "") \
|
||||
.replace("("+copyright+")", "") \
|
||||
.replace("()", "") \
|
||||
.strip()
|
||||
p = p / self.__sanitize(character)
|
||||
else:
|
||||
p = p / "_multiple"
|
||||
return p
|
||||
|
||||
def __sanitize(self, s: str) -> str:
|
||||
s = "".join(x for x in s if x.isalnum() or x in "._-()")
|
||||
return s.replace("_", " ").strip()
|
98
metadata.py
Normal file
98
metadata.py
Normal file
@ -0,0 +1,98 @@
|
||||
from typing import Optional, Union
|
||||
from tags import Tags
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import fluentpy as _
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
import subprocess
|
||||
|
||||
class Metadata:
|
||||
def __init__(self, dir_tmp: Path):
|
||||
self.dir_tmp = dir_tmp
|
||||
self.tmp_image_file = Path(self.dir_tmp, "tmp.jpg")
|
||||
|
||||
def process(self, url: str) -> Optional[Union[Path, Tags]]:
|
||||
logging.info("Retrieving metadata for %s", url)
|
||||
meta = self.__get_metadata(url)
|
||||
status = self.__download_file(meta)
|
||||
if not status:
|
||||
logging.warn("Download failed")
|
||||
return None
|
||||
return self.__write_tags(url, meta)
|
||||
|
||||
def __get_metadata(self, url: str) -> dict:
|
||||
return requests.get(url + ".json").json()
|
||||
# with open('test/test.json', 'rt', encoding='utf8') as f:
|
||||
# r = json.load(f)
|
||||
# return r
|
||||
|
||||
def __download_file(self, r: dict) -> bool:
|
||||
ext = r.get("file_ext", "")
|
||||
w = int(r.get("image_width", "0"))
|
||||
h = int(r.get("image_height", "0"))
|
||||
if (ext not in ["jpg", "jpeg", "png", "webp"]) or w == 0 or h == 0:
|
||||
return False
|
||||
file_url = r.get("file_url")
|
||||
file_size_kb = int(r.get('file_size', "0")) / 1024
|
||||
|
||||
logging.info("Downloading image")
|
||||
recompress = self.__need_recompress(ext, w, h, file_size_kb)
|
||||
return self.__download(file_url, recompress=recompress)
|
||||
|
||||
def __need_recompress(self, ext, w, h, size_kb):
|
||||
return ext == 'jpg' and size_kb > 1400 and w < 2500 and h < 2500
|
||||
|
||||
def __download(self, img_url: str, recompress: bool = False):
|
||||
opt_args = []
|
||||
if recompress:
|
||||
opt_args = ['-quality', 80]
|
||||
ret = subprocess.call([
|
||||
'magick', img_url,
|
||||
'-resize', '2500x2500>',
|
||||
*opt_args, self.tmp_image_file
|
||||
], stdout=subprocess.PIPE)
|
||||
return ret == 0
|
||||
|
||||
|
||||
def __write_tags(self, url: str, r: dict) -> tuple:
|
||||
tag_general = r.get('tag_string_general', "")
|
||||
tag_copyrights = r.get('tag_string_copyright', "")
|
||||
tag_characters = r.get('tag_string_character', "")
|
||||
tag_artists = r.get('tag_string_artist', "")
|
||||
tags = Tags(tag_general, tag_copyrights, tag_characters, tag_artists)
|
||||
|
||||
tags_file = Path(self.dir_tmp, "tags.txt")
|
||||
with open(tags_file, "w") as f:
|
||||
content = _(tags.tags) \
|
||||
.map(lambda s: "-IPTC:keywords=" + s) \
|
||||
.join("\n") \
|
||||
._
|
||||
content += "\n-Exif:ImageDescription=" + url
|
||||
content += "\n-Iptc:Caption-Abstract=" + url
|
||||
content += "\n-Xmp:Description=" + url
|
||||
f.write(content)
|
||||
|
||||
logging.info("Writing tags")
|
||||
subprocess.call([
|
||||
'exiftool', '-q', '-overwrite_original',
|
||||
'-@', tags_file,
|
||||
self.tmp_image_file
|
||||
], stdout=subprocess.PIPE)
|
||||
|
||||
filename = self.__format_filename(tags)
|
||||
result_file = Path(self.tmp_image_file.parent, filename)
|
||||
self.tmp_image_file.rename(result_file)
|
||||
return result_file, tags
|
||||
|
||||
|
||||
def __format_filename(self, tags: Tags):
|
||||
filename = '{} {} by {} at {}.jpg'.format(
|
||||
tags.copyrights.split(" ")[0] or "",
|
||||
", ".join(tags.characters.split(" ")[:2]),
|
||||
tags.artists.split(" ")[0] or "",
|
||||
datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
)
|
||||
return re.sub(r'\s+', ' ', filename).strip()
|
68
picsorter.py
Normal file
68
picsorter.py
Normal file
@ -0,0 +1,68 @@
|
||||
from iqdb import Iqdb
|
||||
from library import Library
|
||||
from metadata import Metadata
|
||||
from database import Database
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import re
|
||||
import logging
|
||||
import time
|
||||
|
||||
class PicSorter:
|
||||
def __init__(self):
|
||||
self.dir_tmp = Path('R:/')
|
||||
self.dir_input = Path('./input')
|
||||
self.dir_logs = Path('./logs')
|
||||
self.dir_library = Path('./library')
|
||||
self.setup_folders()
|
||||
self.setup_logging()
|
||||
|
||||
def setup_folders(self):
|
||||
self.dir_tmp.mkdir(exist_ok=True)
|
||||
self.dir_logs.mkdir(exist_ok=True)
|
||||
self.dir_library.mkdir(exist_ok=True)
|
||||
|
||||
def setup_logging(self):
|
||||
logfile = Path(self.dir_logs, datetime.now().strftime('%Y-%m-%d.log'))
|
||||
logging.basicConfig(
|
||||
filename=logfile,
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s %(module)s: %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
)
|
||||
|
||||
def process_folder(self):
|
||||
iqdb = Iqdb()
|
||||
library = Library(self.dir_library)
|
||||
metadata = Metadata(self.dir_tmp)
|
||||
db = Database()
|
||||
for filename in self.dir_input.rglob('*.jpg'):
|
||||
print("Process ", filename)
|
||||
try:
|
||||
url = iqdb.search(filename)
|
||||
if url is None:
|
||||
logging.warn("%s not found", filename)
|
||||
library.move_to_orphan(Path(filename))
|
||||
continue
|
||||
|
||||
m = re.search(r".*posts\/(\d{3,})", url)
|
||||
if not m:
|
||||
continue
|
||||
post_id = int(m.group(1))
|
||||
if db.is_exists(post_id):
|
||||
logging.info("Skipping exists post %d", post_id)
|
||||
continue
|
||||
|
||||
meta_result = metadata.process(url)
|
||||
if meta_result is None:
|
||||
continue
|
||||
image_path, tags = meta_result
|
||||
library.move(image_path, tags)
|
||||
db.add(post_id, tags.tags_string)
|
||||
time.sleep(5)
|
||||
except Exception as ex:
|
||||
raise ex
|
||||
|
||||
if __name__ == '__main__':
|
||||
PicSorter().process_folder()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
fluentpy==2.0
|
||||
PyYAML==5.4.1
|
||||
requests==2.24.0
|
29
tags.py
Normal file
29
tags.py
Normal file
@ -0,0 +1,29 @@
|
||||
import fluentpy as _
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
@dataclass
|
||||
class Tags:
|
||||
general: str
|
||||
copyrights: str
|
||||
characters: str
|
||||
artists: str
|
||||
tags: list = field(init=False)
|
||||
tags_string: str = field(init=False)
|
||||
|
||||
def __post_init__(self):
|
||||
self.tags = self.__union_tags()
|
||||
self.tags_string = " ".join(self.tags)
|
||||
|
||||
def __union_tags(self):
|
||||
tags = self.general.split(" ")
|
||||
tags += self.__prefix_tags(self.copyrights, 'copyright_')
|
||||
tags += self.__prefix_tags(self.characters, 'character_')
|
||||
tags += self.__prefix_tags(self.artists, 'artist_')
|
||||
return tags
|
||||
|
||||
def __prefix_tags(self, tags, prefix):
|
||||
return _(tags) \
|
||||
.split(" ") \
|
||||
.filter(lambda s: s != "") \
|
||||
.map(lambda s: prefix + s.strip()) \
|
||||
._
|
Loading…
Reference in New Issue
Block a user