mirror of
https://gist.github.com/6ba37e4d4084e858f917e271550ce5f6.git
synced 2024-09-20 00:34:20 +03:00
Fix code style and deprecations
This commit is contained in:
parent
ee9bab992d
commit
00834d0e23
27
config.py
Normal file
27
config.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
@staticmethod
|
||||||
|
def load(path = 'config.yml'):
|
||||||
|
with open(path, 'rt', encoding='utf8') as f:
|
||||||
|
config = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||||
|
return Config(config)
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
dirs = config.get('dirs', {})
|
||||||
|
self.dir_tmp = Path(dirs.get('tmp', '/tmp/'))
|
||||||
|
self.dir_input = Path(dirs.get('input', './input'))
|
||||||
|
self.dir_processed = Path(dirs.get('processed', './input/processed'))
|
||||||
|
self.dir_logs = Path(dirs.get('logs', './logs'))
|
||||||
|
self.dir_library = Path(dirs.get('library', './library'))
|
||||||
|
self.__setup_folders()
|
||||||
|
|
||||||
|
def __setup_folders(self):
|
||||||
|
self.dir_tmp.mkdir(exist_ok=True)
|
||||||
|
self.dir_processed.mkdir(exist_ok=True)
|
||||||
|
self.dir_logs.mkdir(exist_ok=True)
|
||||||
|
self.dir_library.mkdir(exist_ok=True)
|
||||||
|
|
7
config.yml
Normal file
7
config.yml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
dirs:
|
||||||
|
tmp: /tmp/
|
||||||
|
input: ./input
|
||||||
|
processed: ./input/processed
|
||||||
|
logs: ./logs
|
||||||
|
library: ./library
|
||||||
|
|
13
database.py
13
database.py
@ -1,6 +1,6 @@
|
|||||||
from datetime import datetime
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import logging
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class Database:
|
class Database:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -20,19 +20,18 @@ class Database:
|
|||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
def is_exists(self, id) -> bool:
|
def is_exists(self, _id) -> bool:
|
||||||
conn = sqlite3.connect(self.db_name)
|
conn = sqlite3.connect(self.db_name)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute("SELECT EXISTS(SELECT 1 FROM danbooru WHERE id=?)", (id, ))
|
c.execute("SELECT EXISTS(SELECT 1 FROM danbooru WHERE id=?)", (_id, ))
|
||||||
result = c.fetchone()[0]
|
result = c.fetchone()[0]
|
||||||
conn.close()
|
conn.close()
|
||||||
return bool(result)
|
return bool(result)
|
||||||
|
|
||||||
def add(self, id, tags):
|
def add(self, _id, tags):
|
||||||
conn = sqlite3.connect(self.db_name)
|
conn = sqlite3.connect(self.db_name)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
sql = 'INSERT INTO danbooru(id, tags, created_at) VALUES (?,?,?)'
|
sql = 'INSERT INTO danbooru(id, tags, created_at) VALUES (?,?,?)'
|
||||||
c.execute(sql, (id, tags, datetime.now()))
|
c.execute(sql, (_id, tags, datetime.now()))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
8
iqdb.py
8
iqdb.py
@ -1,9 +1,13 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
import logging
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class Iqdb:
|
class Iqdb:
|
||||||
def search(self, file):
|
@staticmethod
|
||||||
|
def search(file: str) -> Optional[str]:
|
||||||
logging.info('Searching %s', file)
|
logging.info('Searching %s', file)
|
||||||
files = {'file': open(file, 'rb')}
|
files = {'file': open(file, 'rb')}
|
||||||
resp = requests.post('https://iqdb.org/', files=files, timeout=10)
|
resp = requests.post('https://iqdb.org/', files=files, timeout=10)
|
||||||
|
20
library.py
20
library.py
@ -1,8 +1,10 @@
|
|||||||
from tags import Tags
|
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tags import Tags
|
||||||
|
|
||||||
|
|
||||||
class Library:
|
class Library:
|
||||||
def __init__(self, dir_root: Path):
|
def __init__(self, dir_root: Path):
|
||||||
@ -10,11 +12,11 @@ class Library:
|
|||||||
self.dir_orphan = Path(dir_root, '_orphan')
|
self.dir_orphan = Path(dir_root, '_orphan')
|
||||||
self.dir_orphan.mkdir(exist_ok=True, parents=True)
|
self.dir_orphan.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
def move_to_orphan(self, p: Path):
|
def move_to_orphan(self, p: Path) -> None:
|
||||||
logging.info("%s move to orphan", p)
|
logging.info("%s move to orphan", p)
|
||||||
shutil.move(os.fspath(p), os.fspath(self.dir_orphan))
|
shutil.move(os.fspath(p), os.fspath(self.dir_orphan))
|
||||||
|
|
||||||
def move(self, p: Path, tags: Tags):
|
def move(self, p: Path, tags: Tags) -> None:
|
||||||
new_path = self.__compute_path(tags)
|
new_path = self.__compute_path(tags)
|
||||||
new_path.mkdir(exist_ok=True, parents=True)
|
new_path.mkdir(exist_ok=True, parents=True)
|
||||||
logging.info("%s move to %s", p.name, new_path)
|
logging.info("%s move to %s", p.name, new_path)
|
||||||
@ -30,10 +32,9 @@ class Library:
|
|||||||
p = p / self.__sanitize(artist)
|
p = p / self.__sanitize(artist)
|
||||||
return p
|
return p
|
||||||
# Main section
|
# Main section
|
||||||
copyright = ""
|
|
||||||
if tags.copyrights != "":
|
if tags.copyrights != "":
|
||||||
copyright = tags.copyrights.split(" ")[0]
|
_copyright = tags.copyrights.split(" ")[0]
|
||||||
p = p / self.__sanitize(copyright)
|
p = p / self.__sanitize(_copyright)
|
||||||
if tags.characters == "":
|
if tags.characters == "":
|
||||||
return p
|
return p
|
||||||
# Characters section
|
# Characters section
|
||||||
@ -44,6 +45,7 @@ class Library:
|
|||||||
p = p / "_multiple"
|
p = p / "_multiple"
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def __sanitize(self, s: str) -> str:
|
@staticmethod
|
||||||
|
def __sanitize(s: str) -> str:
|
||||||
s = "".join(x for x in s if x.isalnum() or x in "._-()")
|
s = "".join(x for x in s if x.isalnum() or x in "._-()")
|
||||||
return s.replace("_", " ").strip()
|
return s.replace("_", " ").strip()
|
||||||
|
44
metadata.py
44
metadata.py
@ -1,33 +1,33 @@
|
|||||||
from typing import Optional, Union
|
|
||||||
from tags import Tags
|
|
||||||
from pathlib import Path
|
|
||||||
from datetime import datetime
|
|
||||||
import fluentpy as _
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import requests
|
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import fluentpy as _
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from tags import Tags
|
||||||
|
|
||||||
|
|
||||||
class Metadata:
|
class Metadata:
|
||||||
def __init__(self, dir_tmp: Path):
|
def __init__(self, dir_tmp: Path):
|
||||||
self.dir_tmp = dir_tmp
|
self.dir_tmp = dir_tmp
|
||||||
self.tmp_image_file = Path(self.dir_tmp, "tmp.jpg")
|
self.tmp_image_file = Path(self.dir_tmp, "tmp.jpg")
|
||||||
|
|
||||||
def process(self, url: str) -> Optional[Union[Path, Tags]]:
|
def process(self, url: str) -> Optional[tuple[Path, Tags]]:
|
||||||
logging.info("Retrieving metadata for %s", url)
|
logging.info("Retrieving metadata for %s", url)
|
||||||
meta = self.__get_metadata(url)
|
meta = self.__get_metadata(url)
|
||||||
status = self.__download_file(meta)
|
status = self.__download_file(meta)
|
||||||
if not status:
|
if not status:
|
||||||
logging.warn("Download failed")
|
logging.warning("Download failed")
|
||||||
return None
|
return None
|
||||||
return self.__write_tags(url, meta)
|
return self.__write_tags(url, meta)
|
||||||
|
|
||||||
def __get_metadata(self, url: str) -> dict:
|
@staticmethod
|
||||||
|
def __get_metadata(url: str) -> dict:
|
||||||
return requests.get(url + ".json").json()
|
return requests.get(url + ".json").json()
|
||||||
# with open('test/test.json', 'rt', encoding='utf8') as f:
|
|
||||||
# r = json.load(f)
|
|
||||||
# return r
|
|
||||||
|
|
||||||
def __download_file(self, r: dict) -> bool:
|
def __download_file(self, r: dict) -> bool:
|
||||||
ext = r.get("file_ext", "")
|
ext = r.get("file_ext", "")
|
||||||
@ -41,11 +41,12 @@ class Metadata:
|
|||||||
logging.info("Downloading image")
|
logging.info("Downloading image")
|
||||||
recompress = self.__need_recompress(ext, w, h, file_size_kb)
|
recompress = self.__need_recompress(ext, w, h, file_size_kb)
|
||||||
return self.__download(file_url, recompress=recompress)
|
return self.__download(file_url, recompress=recompress)
|
||||||
|
|
||||||
def __need_recompress(self, ext, w, h, size_kb):
|
@staticmethod
|
||||||
|
def __need_recompress(ext, w, h, size_kb) -> bool:
|
||||||
return ext == 'jpg' and size_kb > 1400 and w < 2500 and h < 2500
|
return ext == 'jpg' and size_kb > 1400 and w < 2500 and h < 2500
|
||||||
|
|
||||||
def __download(self, img_url: str, recompress: bool = False):
|
def __download(self, img_url: str, recompress: bool = False) -> bool:
|
||||||
opt_args = []
|
opt_args = []
|
||||||
if recompress:
|
if recompress:
|
||||||
opt_args = ['-quality', "80"]
|
opt_args = ['-quality', "80"]
|
||||||
@ -56,8 +57,9 @@ class Metadata:
|
|||||||
], stdout=subprocess.PIPE)
|
], stdout=subprocess.PIPE)
|
||||||
return ret == 0
|
return ret == 0
|
||||||
|
|
||||||
|
# noinspection PyCallingNonCallable
|
||||||
def __write_tags(self, url: str, r: dict) -> tuple:
|
# noinspection PyProtectedMember
|
||||||
|
def __write_tags(self, url: str, r: dict) -> tuple[Path, Tags]:
|
||||||
tag_general = r.get('tag_string_general', "")
|
tag_general = r.get('tag_string_general', "")
|
||||||
tag_copyrights = r.get('tag_string_copyright', "")
|
tag_copyrights = r.get('tag_string_copyright', "")
|
||||||
tag_characters = r.get('tag_string_character', "")
|
tag_characters = r.get('tag_string_character', "")
|
||||||
@ -87,8 +89,8 @@ class Metadata:
|
|||||||
self.tmp_image_file.rename(result_file)
|
self.tmp_image_file.rename(result_file)
|
||||||
return result_file, tags
|
return result_file, tags
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
def __format_filename(self, tags: Tags):
|
def __format_filename(tags: Tags):
|
||||||
filename = '{} {} by {} at {}.jpg'.format(
|
filename = '{} {} by {} at {}.jpg'.format(
|
||||||
tags.copyrights.split(" ")[0] or "",
|
tags.copyrights.split(" ")[0] or "",
|
||||||
", ".join(tags.characters_sanitized()[:2]),
|
", ".join(tags.characters_sanitized()[:2]),
|
||||||
|
58
picsorter.py
58
picsorter.py
@ -1,57 +1,50 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from config import Config
|
||||||
|
from database import Database
|
||||||
from iqdb import Iqdb
|
from iqdb import Iqdb
|
||||||
from library import Library
|
from library import Library
|
||||||
from metadata import Metadata
|
from metadata import Metadata
|
||||||
from database import Database
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
import shutil
|
|
||||||
import os
|
|
||||||
|
|
||||||
class PicSorter:
|
class PicSorter:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.dir_tmp = Path('R:/')
|
self.config = Config.load('config.yml')
|
||||||
self.dir_input = Path('R:/input')
|
|
||||||
self.dir_processed = Path('R:/processed')
|
|
||||||
self.dir_logs = Path('./logs')
|
|
||||||
self.dir_library = Path('./library')
|
|
||||||
self.setup_folders()
|
|
||||||
self.setup_logging()
|
self.setup_logging()
|
||||||
|
|
||||||
def setup_folders(self):
|
|
||||||
self.dir_tmp.mkdir(exist_ok=True)
|
|
||||||
self.dir_processed.mkdir(exist_ok=True)
|
|
||||||
self.dir_logs.mkdir(exist_ok=True)
|
|
||||||
self.dir_library.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
def setup_logging(self):
|
def setup_logging(self):
|
||||||
logfile = Path(self.dir_logs, datetime.now().strftime('%Y-%m-%d.log'))
|
filename = datetime.now().strftime('%Y-%m-%d.log')
|
||||||
|
logfile = Path(self.config.dir_logs, filename)
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename=logfile,
|
filename=os.fspath(logfile),
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format='%(asctime)s %(levelname)s %(module)s: %(message)s',
|
format='%(asctime)s %(levelname)s %(module)s: %(message)s',
|
||||||
datefmt='%H:%M:%S',
|
datefmt='%H:%M:%S',
|
||||||
)
|
)
|
||||||
|
|
||||||
def process_folder(self):
|
def process_folder(self):
|
||||||
iqdb = Iqdb()
|
config = self.config
|
||||||
library = Library(self.dir_library)
|
library = Library(config.dir_library)
|
||||||
metadata = Metadata(self.dir_tmp)
|
metadata = Metadata(config.dir_tmp)
|
||||||
db = Database()
|
db = Database()
|
||||||
files = {p for p in self.dir_input.iterdir() if p.suffix in [".jpg", ".png"]}
|
files = {p for p in config.dir_input.iterdir()
|
||||||
|
if p.suffix in [".jpg", ".png"]}
|
||||||
for filename in files:
|
for filename in files:
|
||||||
print("Process ", filename)
|
print("Process", filename)
|
||||||
try:
|
try:
|
||||||
url = iqdb.search(filename)
|
url = Iqdb.search(filename)
|
||||||
if url is None:
|
if url is None:
|
||||||
logging.warn("%s not found", filename)
|
logging.warning("%s not found", filename)
|
||||||
library.move_to_orphan(Path(filename))
|
library.move_to_orphan(Path(filename))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
m = re.search(r".*posts\/(\d{3,})", url)
|
m = re.search(r".*posts/(\d{3,})", url)
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
post_id = int(m.group(1))
|
post_id = int(m.group(1))
|
||||||
@ -65,10 +58,11 @@ class PicSorter:
|
|||||||
image_path, tags = meta_result
|
image_path, tags = meta_result
|
||||||
library.move(image_path, tags)
|
library.move(image_path, tags)
|
||||||
db.add(post_id, tags.tags_string)
|
db.add(post_id, tags.tags_string)
|
||||||
shutil.move(os.fspath(filename), os.fspath(self.dir_processed))
|
shutil.move(os.fspath(filename), os.fspath(config.dir_processed))
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise ex
|
raise ex
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
PicSorter().process_folder()
|
PicSorter().process_folder()
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
fluentpy==2.0
|
beautifulsoup4==4.9.3
|
||||||
|
fluentpy>=2.0
|
||||||
PyYAML==5.4.1
|
PyYAML==5.4.1
|
||||||
requests==2.24.0
|
requests>=2.24
|
||||||
|
38
tags.py
38
tags.py
@ -1,47 +1,55 @@
|
|||||||
import fluentpy as _
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
import fluentpy as _
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Tags:
|
class Tags:
|
||||||
general: str
|
general: str
|
||||||
copyrights: str
|
copyrights: str
|
||||||
characters: str
|
characters: str
|
||||||
artists: str
|
artists: str
|
||||||
tags: list = field(init=False)
|
tags: list[str] = field(init=False)
|
||||||
tags_string: str = field(init=False)
|
tags_string: str = field(init=False)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
self.tags = self.__union_tags()
|
self.tags = self.__union_tags()
|
||||||
self.tags_string = " ".join(self.tags)
|
self.tags_string = " ".join(self.tags)
|
||||||
|
|
||||||
|
# noinspection PyCallingNonCallable
|
||||||
|
# noinspection PyProtectedMember
|
||||||
def characters_sanitized(self) -> list:
|
def characters_sanitized(self) -> list:
|
||||||
if self.copyrights == "":
|
if self.copyrights == "":
|
||||||
# No need to sanitize tags
|
# No need to sanitize tags
|
||||||
return self.characters.split(" ")
|
return self.characters.split(" ")
|
||||||
copyrights = self.copyrights.split(" ")
|
copyrights = self.copyrights.split(" ")
|
||||||
return _(self.characters) \
|
return _(self.characters) \
|
||||||
.split(" ") \
|
.split(" ") \
|
||||||
.filter(lambda s: s != "") \
|
.filter(lambda s: s != "") \
|
||||||
.map(lambda s: self.__rename(s, copyrights)) \
|
.map(lambda s: self.__rename(s, copyrights)) \
|
||||||
._
|
._
|
||||||
|
|
||||||
def __rename(self, s: str, substrings: list) -> str:
|
@staticmethod
|
||||||
|
def __rename(s: str, substrings: list[str]) -> str:
|
||||||
for substring in substrings:
|
for substring in substrings:
|
||||||
s = s.replace("_("+substring+")", "") \
|
s = s.replace("_("+substring+")", "") \
|
||||||
.replace("("+substring+")", "") \
|
.replace("("+substring+")", "") \
|
||||||
.strip()
|
.strip()
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def __union_tags(self):
|
def __union_tags(self) -> list[str]:
|
||||||
tags = self.general.split(" ")
|
tags = self.general.split(" ")
|
||||||
tags += self.__prefix_tags(self.copyrights, 'copyright_')
|
tags += self.__prefix_tags(self.copyrights, 'copyright_')
|
||||||
tags += self.__prefix_tags(self.characters, 'character_')
|
tags += self.__prefix_tags(self.characters, 'character_')
|
||||||
tags += self.__prefix_tags(self.artists, 'artist_')
|
tags += self.__prefix_tags(self.artists, 'artist_')
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
def __prefix_tags(self, tags, prefix):
|
# noinspection PyCallingNonCallable
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
@staticmethod
|
||||||
|
def __prefix_tags(tags: str, prefix: str) -> list[str]:
|
||||||
return _(tags) \
|
return _(tags) \
|
||||||
.split(" ") \
|
.split(" ") \
|
||||||
.filter(lambda s: s != "") \
|
.filter(lambda s: s != "") \
|
||||||
.map(lambda s: prefix + s.strip()) \
|
.map(lambda s: prefix + s.strip()) \
|
||||||
._
|
._
|
||||||
|
Loading…
Reference in New Issue
Block a user