commit 509c0c9ee9bd1991c8041976c48efd26eddf1bbc Author: Victor Date: Sun Sep 27 13:08:23 2020 +0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..11d2870 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea +.gradle +build +*.db \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..2bc932f --- /dev/null +++ b/build.gradle @@ -0,0 +1,22 @@ +plugins { + id 'java' + id 'application' +} + +sourceCompatibility = 11 +targetCompatibility = 11 +mainClassName = 'com.annimon.similarimagesbot.Main' +group 'com.annimon' +version '1.0-SNAPSHOT' + +repositories { + jcenter() +} + +dependencies { + implementation 'com.github.pengrad:java-telegram-bot-api:4.9.0' + implementation 'com.github.kilianB:JImageHash:3.0.0' + implementation 'com.h2database:h2:1.4.200' + + testCompile 'junit:junit:4.12' +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..62d4c05 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..622ab64 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100644 index 0000000..fbd7c51 --- /dev/null +++ b/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..a9f778a --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,104 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..42adeb8 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,2 @@ +rootProject.name = 'SimilarImagesBot' + diff --git a/src/main/java/com/annimon/similarimagesbot/BotHandler.java b/src/main/java/com/annimon/similarimagesbot/BotHandler.java new file mode 100644 index 0000000..aea36ba --- /dev/null +++ b/src/main/java/com/annimon/similarimagesbot/BotHandler.java @@ -0,0 +1,107 @@ +package com.annimon.similarimagesbot; + +import com.annimon.similarimagesbot.data.Post; +import com.annimon.similarimagesbot.data.SimilarImagesInfo; +import com.pengrad.telegrambot.TelegramBot; +import com.pengrad.telegrambot.UpdatesListener; +import com.pengrad.telegrambot.model.Message; +import com.pengrad.telegrambot.model.PhotoSize; +import com.pengrad.telegrambot.model.Update; +import com.pengrad.telegrambot.model.request.ParseMode; +import com.pengrad.telegrambot.request.GetFile; +import com.pengrad.telegrambot.request.SendMessage; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.net.URL; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import javax.imageio.ImageIO; + +public class BotHandler { + + private final TelegramBot bot; + private final ImageIndexer indexer; + private long adminId; + + public BotHandler(String botToken, ImageIndexer indexer) { + bot = new TelegramBot.Builder(botToken) + .updateListenerSleep(20_000L) + .build(); + this.indexer = indexer; + } + + public void setAdminId(long adminId) { + this.adminId = adminId; + } + + public void run() { + bot.setUpdatesListener(updates -> { + final List channelPosts = updates.stream() + .map(Update::channelPost) + .filter(Objects::nonNull) + .filter(msg -> msg.photo() != null) + .collect(Collectors.toList()); + + final var similarImagesInfos = new ArrayList(); + for (var post : channelPosts) { + final PhotoSize photo = getSmallestPhoto(post.photo()); + try { + final var tgFile = bot.execute(new GetFile(photo.fileId())).file(); + final var url = new URL(bot.getFullFilePath(tgFile)); + final BufferedImage image = ImageIO.read(url); + final var originalPost = new Post(post.chat().id(), post.messageId()); + final SimilarImagesInfo info = indexer.processImage(originalPost, image); + if (info.hasResults()) { + similarImagesInfos.add(info); + } + } catch (IOException | SQLException e) { + System.err.format("Error while processing photo in %s%n", linkToMessage(post)); + } + } + if (!similarImagesInfos.isEmpty()) { + sendReport(similarImagesInfos); + } + return UpdatesListener.CONFIRMED_UPDATES_ALL; + }); + } + + private void sendReport(List infos) { + String report = infos.stream().map(info -> { + String text = "For post " + formatPostLink(info.getOriginalPost()) + " found:\n"; + text += info.getResults().stream() + .map(r -> String.format(" %s, dst: %.2f", formatPostLink(r.getPost()), r.getDistance())) + .collect(Collectors.joining("\n")); + return text; + }).collect(Collectors.joining("\n\n")); + + if (adminId == 0) { + System.out.println(report); + } else { + bot.execute(new SendMessage(adminId, report).parseMode(ParseMode.Markdown)); + } + } + + private String formatPostLink(Post post) { + String link = linkToMessage(post.getChannelId(), post.getMessageId()); + return String.format("[#%d](%s)", post.getMessageId(), link); + } + + private String linkToMessage(Message msg) { + return linkToMessage(msg.chat().id(), msg.messageId()); + } + + private String linkToMessage(Long chatId, Integer messageId) { + return "https://t.me/c/" + chatId.toString().replace("-100", "") + "/" + messageId; + } + + private PhotoSize getSmallestPhoto(PhotoSize[] photoSizes) { + return Arrays.stream(photoSizes) + .min(Comparator.comparingInt(ps -> ps.width() * ps.height())) + .orElse(photoSizes[0]); + } +} diff --git a/src/main/java/com/annimon/similarimagesbot/ImageIndexer.java b/src/main/java/com/annimon/similarimagesbot/ImageIndexer.java new file mode 100644 index 0000000..034e24f --- /dev/null +++ b/src/main/java/com/annimon/similarimagesbot/ImageIndexer.java @@ -0,0 +1,54 @@ +package com.annimon.similarimagesbot; + +import com.annimon.similarimagesbot.data.ImageResult; +import com.annimon.similarimagesbot.data.Post; +import com.annimon.similarimagesbot.data.SimilarImagesInfo; +import com.github.kilianB.hashAlgorithms.DifferenceHash; +import com.github.kilianB.hashAlgorithms.PerceptiveHash; +import com.github.kilianB.matcher.persistent.database.H2DatabaseImageMatcher; +import java.awt.image.BufferedImage; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class ImageIndexer { + + private final Map databases; + + public ImageIndexer() { + databases = new HashMap<>(5); + } + + public SimilarImagesInfo processImage(Post originalPost, BufferedImage image) + throws SQLException { + final Long channelId = originalPost.getChannelId(); + final var db = getDatabaseForChannel(channelId); + final List results = db.getMatchingImages(image) + .stream() + .map(r -> { + final var similarPost = new Post(channelId, Integer.parseInt(r.value)); + return new ImageResult(similarPost, r.distance); + }) + .filter(r -> !r.isSamePost(originalPost)) + .collect(Collectors.toList()); + db.addImage(originalPost.getMessageId().toString(), image); + return new SimilarImagesInfo(originalPost, results); + } + + private H2DatabaseImageMatcher getDatabaseForChannel(Long channelId) throws SQLException { + var db = databases.get(channelId); + if (db != null) { + return db; + } + var jdbcUrl = "jdbc:h2:./imagesdb_" + channelId; + var conn = DriverManager.getConnection(jdbcUrl, "root", ""); + db = new H2DatabaseImageMatcher(conn); + db.addHashingAlgorithm(new DifferenceHash(32, DifferenceHash.Precision.Double), .4); + db.addHashingAlgorithm(new PerceptiveHash(32), .2); + databases.put(channelId, db); + return db; + } +} diff --git a/src/main/java/com/annimon/similarimagesbot/Main.java b/src/main/java/com/annimon/similarimagesbot/Main.java new file mode 100644 index 0000000..298b8cd --- /dev/null +++ b/src/main/java/com/annimon/similarimagesbot/Main.java @@ -0,0 +1,24 @@ +package com.annimon.similarimagesbot; + +import java.util.Optional; + +public class Main { + + public static void main(String[] args) { + final String botToken = stringProp("BOT_TOKEN") + .orElseThrow(() -> new IllegalArgumentException("BOT_TOKEN is required")); + final ImageIndexer indexer = new ImageIndexer(); + final var handler = new BotHandler(botToken, indexer); + handler.setAdminId(longProp("ADMIN_ID").orElse(0L)); + handler.run(); + } + + private static Optional stringProp(String name) { + return Optional.ofNullable(System.getenv(name)) + .or(() -> Optional.ofNullable(System.getProperty(name))); + } + + private static Optional longProp(String name) { + return stringProp(name).map(Long::parseLong); + } +} diff --git a/src/main/java/com/annimon/similarimagesbot/data/ImageResult.java b/src/main/java/com/annimon/similarimagesbot/data/ImageResult.java new file mode 100644 index 0000000..8a3f94c --- /dev/null +++ b/src/main/java/com/annimon/similarimagesbot/data/ImageResult.java @@ -0,0 +1,24 @@ +package com.annimon.similarimagesbot.data; + +public class ImageResult { + + private final Post post; + private final double distance; + + public ImageResult(Post post, double distance) { + this.post = post; + this.distance = distance; + } + + public Post getPost() { + return post; + } + + public double getDistance() { + return distance; + } + + public boolean isSamePost(Post other) { + return post.equals(other); + } +} diff --git a/src/main/java/com/annimon/similarimagesbot/data/Post.java b/src/main/java/com/annimon/similarimagesbot/data/Post.java new file mode 100644 index 0000000..014ba16 --- /dev/null +++ b/src/main/java/com/annimon/similarimagesbot/data/Post.java @@ -0,0 +1,36 @@ +package com.annimon.similarimagesbot.data; + +import java.util.Objects; + +public class Post { + + private final Long channelId; + private final Integer messageId; + + public Post(Long channelId, Integer messageId) { + this.channelId = channelId; + this.messageId = messageId; + } + + public Long getChannelId() { + return channelId; + } + + public Integer getMessageId() { + return messageId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Post post = (Post) o; + return Objects.equals(channelId, post.channelId) && + Objects.equals(messageId, post.messageId); + } + + @Override + public int hashCode() { + return Objects.hash(channelId, messageId); + } +} diff --git a/src/main/java/com/annimon/similarimagesbot/data/SimilarImagesInfo.java b/src/main/java/com/annimon/similarimagesbot/data/SimilarImagesInfo.java new file mode 100644 index 0000000..29c93b3 --- /dev/null +++ b/src/main/java/com/annimon/similarimagesbot/data/SimilarImagesInfo.java @@ -0,0 +1,26 @@ +package com.annimon.similarimagesbot.data; + +import java.util.List; + +public class SimilarImagesInfo { + + private final Post originalPost; + private final List results; + + public SimilarImagesInfo(Post originalPost, List results) { + this.originalPost = originalPost; + this.results = results; + } + + public boolean hasResults() { + return !results.isEmpty(); + } + + public Post getOriginalPost() { + return originalPost; + } + + public List getResults() { + return results; + } +}