diff --git a/config.py b/config.py new file mode 100644 index 0000000..82ee122 --- /dev/null +++ b/config.py @@ -0,0 +1,27 @@ +from pathlib import Path + +import yaml + + +class Config: + @staticmethod + def load(path = 'config.yml'): + with open(path, 'rt', encoding='utf8') as f: + config = yaml.load(f.read(), Loader=yaml.FullLoader) + return Config(config) + + def __init__(self, config): + dirs = config.get('dirs', {}) + self.dir_tmp = Path(dirs.get('tmp', '/tmp/')) + self.dir_input = Path(dirs.get('input', './input')) + self.dir_processed = Path(dirs.get('processed', './input/processed')) + self.dir_logs = Path(dirs.get('logs', './logs')) + self.dir_library = Path(dirs.get('library', './library')) + self.__setup_folders() + + def __setup_folders(self): + self.dir_tmp.mkdir(exist_ok=True) + self.dir_processed.mkdir(exist_ok=True) + self.dir_logs.mkdir(exist_ok=True) + self.dir_library.mkdir(exist_ok=True) + diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..c0a7458 --- /dev/null +++ b/config.yml @@ -0,0 +1,7 @@ +dirs: + tmp: /tmp/ + input: ./input + processed: ./input/processed + logs: ./logs + library: ./library + diff --git a/database.py b/database.py index f44b4d5..dd51576 100644 --- a/database.py +++ b/database.py @@ -1,6 +1,6 @@ -from datetime import datetime import sqlite3 -import logging +from datetime import datetime + class Database: def __init__(self): @@ -20,19 +20,18 @@ class Database: conn.commit() conn.close() - def is_exists(self, id) -> bool: + def is_exists(self, _id) -> bool: conn = sqlite3.connect(self.db_name) c = conn.cursor() - c.execute("SELECT EXISTS(SELECT 1 FROM danbooru WHERE id=?)", (id, )) + c.execute("SELECT EXISTS(SELECT 1 FROM danbooru WHERE id=?)", (_id, )) result = c.fetchone()[0] conn.close() return bool(result) - def add(self, id, tags): + def add(self, _id, tags): conn = sqlite3.connect(self.db_name) c = conn.cursor() sql = 'INSERT INTO danbooru(id, tags, created_at) VALUES (?,?,?)' - c.execute(sql, (id, tags, datetime.now())) + c.execute(sql, (_id, tags, datetime.now())) conn.commit() conn.close() - \ No newline at end of file diff --git a/iqdb.py b/iqdb.py index 7b553c4..5502b92 100644 --- a/iqdb.py +++ b/iqdb.py @@ -1,9 +1,13 @@ -from bs4 import BeautifulSoup import logging +from typing import Optional + import requests +from bs4 import BeautifulSoup + class Iqdb: - def search(self, file): + @staticmethod + def search(file: str) -> Optional[str]: logging.info('Searching %s', file) files = {'file': open(file, 'rb')} resp = requests.post('https://iqdb.org/', files=files, timeout=10) diff --git a/library.py b/library.py index 8f34d7c..70372f8 100644 --- a/library.py +++ b/library.py @@ -1,8 +1,10 @@ -from tags import Tags -from pathlib import Path -import shutil import logging import os +import shutil +from pathlib import Path + +from tags import Tags + class Library: def __init__(self, dir_root: Path): @@ -10,11 +12,11 @@ class Library: self.dir_orphan = Path(dir_root, '_orphan') self.dir_orphan.mkdir(exist_ok=True, parents=True) - def move_to_orphan(self, p: Path): + def move_to_orphan(self, p: Path) -> None: logging.info("%s move to orphan", p) shutil.move(os.fspath(p), os.fspath(self.dir_orphan)) - def move(self, p: Path, tags: Tags): + def move(self, p: Path, tags: Tags) -> None: new_path = self.__compute_path(tags) new_path.mkdir(exist_ok=True, parents=True) logging.info("%s move to %s", p.name, new_path) @@ -30,10 +32,9 @@ class Library: p = p / self.__sanitize(artist) return p # Main section - copyright = "" if tags.copyrights != "": - copyright = tags.copyrights.split(" ")[0] - p = p / self.__sanitize(copyright) + _copyright = tags.copyrights.split(" ")[0] + p = p / self.__sanitize(_copyright) if tags.characters == "": return p # Characters section @@ -44,6 +45,7 @@ class Library: p = p / "_multiple" return p - def __sanitize(self, s: str) -> str: + @staticmethod + def __sanitize(s: str) -> str: s = "".join(x for x in s if x.isalnum() or x in "._-()") return s.replace("_", " ").strip() diff --git a/metadata.py b/metadata.py index c668f69..a94cb2c 100644 --- a/metadata.py +++ b/metadata.py @@ -1,33 +1,33 @@ -from typing import Optional, Union -from tags import Tags -from pathlib import Path -from datetime import datetime -import fluentpy as _ import logging -import json import re -import requests import subprocess +from datetime import datetime +from pathlib import Path +from typing import Optional + +import fluentpy as _ +import requests + +from tags import Tags + class Metadata: def __init__(self, dir_tmp: Path): self.dir_tmp = dir_tmp self.tmp_image_file = Path(self.dir_tmp, "tmp.jpg") - def process(self, url: str) -> Optional[Union[Path, Tags]]: + def process(self, url: str) -> Optional[tuple[Path, Tags]]: logging.info("Retrieving metadata for %s", url) meta = self.__get_metadata(url) status = self.__download_file(meta) if not status: - logging.warn("Download failed") + logging.warning("Download failed") return None return self.__write_tags(url, meta) - def __get_metadata(self, url: str) -> dict: + @staticmethod + def __get_metadata(url: str) -> dict: return requests.get(url + ".json").json() - # with open('test/test.json', 'rt', encoding='utf8') as f: - # r = json.load(f) - # return r def __download_file(self, r: dict) -> bool: ext = r.get("file_ext", "") @@ -41,11 +41,12 @@ class Metadata: logging.info("Downloading image") recompress = self.__need_recompress(ext, w, h, file_size_kb) return self.__download(file_url, recompress=recompress) - - def __need_recompress(self, ext, w, h, size_kb): + + @staticmethod + def __need_recompress(ext, w, h, size_kb) -> bool: return ext == 'jpg' and size_kb > 1400 and w < 2500 and h < 2500 - - def __download(self, img_url: str, recompress: bool = False): + + def __download(self, img_url: str, recompress: bool = False) -> bool: opt_args = [] if recompress: opt_args = ['-quality', "80"] @@ -56,8 +57,9 @@ class Metadata: ], stdout=subprocess.PIPE) return ret == 0 - - def __write_tags(self, url: str, r: dict) -> tuple: + # noinspection PyCallingNonCallable + # noinspection PyProtectedMember + def __write_tags(self, url: str, r: dict) -> tuple[Path, Tags]: tag_general = r.get('tag_string_general', "") tag_copyrights = r.get('tag_string_copyright', "") tag_characters = r.get('tag_string_character', "") @@ -87,8 +89,8 @@ class Metadata: self.tmp_image_file.rename(result_file) return result_file, tags - - def __format_filename(self, tags: Tags): + @staticmethod + def __format_filename(tags: Tags): filename = '{} {} by {} at {}.jpg'.format( tags.copyrights.split(" ")[0] or "", ", ".join(tags.characters_sanitized()[:2]), diff --git a/picsorter.py b/picsorter.py index 6fc673e..eeb1055 100644 --- a/picsorter.py +++ b/picsorter.py @@ -1,57 +1,50 @@ +import logging +import os +import re +import shutil +import time +from datetime import datetime +from pathlib import Path + +from config import Config +from database import Database from iqdb import Iqdb from library import Library from metadata import Metadata -from database import Database -from datetime import datetime -from pathlib import Path -import re -import logging -import time -import shutil -import os class PicSorter: def __init__(self): - self.dir_tmp = Path('R:/') - self.dir_input = Path('R:/input') - self.dir_processed = Path('R:/processed') - self.dir_logs = Path('./logs') - self.dir_library = Path('./library') - self.setup_folders() + self.config = Config.load('config.yml') self.setup_logging() - def setup_folders(self): - self.dir_tmp.mkdir(exist_ok=True) - self.dir_processed.mkdir(exist_ok=True) - self.dir_logs.mkdir(exist_ok=True) - self.dir_library.mkdir(exist_ok=True) - def setup_logging(self): - logfile = Path(self.dir_logs, datetime.now().strftime('%Y-%m-%d.log')) + filename = datetime.now().strftime('%Y-%m-%d.log') + logfile = Path(self.config.dir_logs, filename) logging.basicConfig( - filename=logfile, + filename=os.fspath(logfile), level=logging.INFO, format='%(asctime)s %(levelname)s %(module)s: %(message)s', datefmt='%H:%M:%S', ) def process_folder(self): - iqdb = Iqdb() - library = Library(self.dir_library) - metadata = Metadata(self.dir_tmp) + config = self.config + library = Library(config.dir_library) + metadata = Metadata(config.dir_tmp) db = Database() - files = {p for p in self.dir_input.iterdir() if p.suffix in [".jpg", ".png"]} + files = {p for p in config.dir_input.iterdir() + if p.suffix in [".jpg", ".png"]} for filename in files: - print("Process ", filename) + print("Process", filename) try: - url = iqdb.search(filename) + url = Iqdb.search(filename) if url is None: - logging.warn("%s not found", filename) + logging.warning("%s not found", filename) library.move_to_orphan(Path(filename)) continue - m = re.search(r".*posts\/(\d{3,})", url) + m = re.search(r".*posts/(\d{3,})", url) if not m: continue post_id = int(m.group(1)) @@ -65,10 +58,11 @@ class PicSorter: image_path, tags = meta_result library.move(image_path, tags) db.add(post_id, tags.tags_string) - shutil.move(os.fspath(filename), os.fspath(self.dir_processed)) + shutil.move(os.fspath(filename), os.fspath(config.dir_processed)) time.sleep(5) except Exception as ex: raise ex - + + if __name__ == '__main__': PicSorter().process_folder() diff --git a/requirements.txt b/requirements.txt index 8a84da7..4117b65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ -fluentpy==2.0 +beautifulsoup4==4.9.3 +fluentpy>=2.0 PyYAML==5.4.1 -requests==2.24.0 +requests>=2.24 diff --git a/tags.py b/tags.py index 619858c..3e440ae 100644 --- a/tags.py +++ b/tags.py @@ -1,47 +1,55 @@ -import fluentpy as _ from dataclasses import dataclass, field +import fluentpy as _ + + @dataclass class Tags: general: str copyrights: str characters: str artists: str - tags: list = field(init=False) + tags: list[str] = field(init=False) tags_string: str = field(init=False) def __post_init__(self): self.tags = self.__union_tags() self.tags_string = " ".join(self.tags) - + + # noinspection PyCallingNonCallable + # noinspection PyProtectedMember def characters_sanitized(self) -> list: if self.copyrights == "": # No need to sanitize tags return self.characters.split(" ") copyrights = self.copyrights.split(" ") return _(self.characters) \ - .split(" ") \ - .filter(lambda s: s != "") \ - .map(lambda s: self.__rename(s, copyrights)) \ - ._ - - def __rename(self, s: str, substrings: list) -> str: + .split(" ") \ + .filter(lambda s: s != "") \ + .map(lambda s: self.__rename(s, copyrights)) \ + ._ + + @staticmethod + def __rename(s: str, substrings: list[str]) -> str: for substring in substrings: s = s.replace("_("+substring+")", "") \ .replace("("+substring+")", "") \ .strip() return s - def __union_tags(self): + def __union_tags(self) -> list[str]: tags = self.general.split(" ") tags += self.__prefix_tags(self.copyrights, 'copyright_') tags += self.__prefix_tags(self.characters, 'character_') tags += self.__prefix_tags(self.artists, 'artist_') return tags - def __prefix_tags(self, tags, prefix): + # noinspection PyCallingNonCallable + # noinspection PyProtectedMember + @staticmethod + def __prefix_tags(tags: str, prefix: str) -> list[str]: return _(tags) \ - .split(" ") \ - .filter(lambda s: s != "") \ - .map(lambda s: prefix + s.strip()) \ - ._ + .split(" ") \ + .filter(lambda s: s != "") \ + .map(lambda s: prefix + s.strip()) \ + ._