mirror of
				https://github.com/mihonapp/mihon.git
				synced 2025-11-04 16:18:55 +01:00 
			
		
		
		
	Rewrote ChapterRecognition to Kotlin. (#293)
This commit is contained in:
		
				
					committed by
					
						
						inorichi
					
				
			
			
				
	
			
			
			
						parent
						
							c64bd81339
						
					
				
				
					commit
					a6df745daa
				
			@@ -1,205 +0,0 @@
 | 
			
		||||
package eu.kanade.tachiyomi.util;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.regex.Matcher;
 | 
			
		||||
import java.util.regex.Pattern;
 | 
			
		||||
 | 
			
		||||
import eu.kanade.tachiyomi.data.database.models.Chapter;
 | 
			
		||||
import eu.kanade.tachiyomi.data.database.models.Manga;
 | 
			
		||||
 | 
			
		||||
public final class ChapterRecognition {
 | 
			
		||||
 | 
			
		||||
    private static final Pattern cleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d+)($|\\b)");
 | 
			
		||||
    private static final Pattern uncleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d*)");
 | 
			
		||||
    private static final Pattern withAlphaPostfix = Pattern.compile("(\\d+[\\.,]?\\d*\\s*)([a-z])($|\\b)");
 | 
			
		||||
    private static final Pattern cleanNumber = Pattern.compile("(\\d+[\\.,]?\\d+)($|\\b)");
 | 
			
		||||
    private static final Pattern uncleanNumber = Pattern.compile("(\\d+[\\.,]?\\d*)");
 | 
			
		||||
    private static final Pattern withColon = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)([^\\d]|$)");
 | 
			
		||||
    private static final Pattern startingNumber = Pattern.compile("^(\\d+[\\.,]?\\d*)");
 | 
			
		||||
 | 
			
		||||
    private static final Pattern pUnwanted =
 | 
			
		||||
            Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b");
 | 
			
		||||
    private static final Pattern pPart =
 | 
			
		||||
            Pattern.compile("(\\b|\\d)part\\s*\\d+.+");
 | 
			
		||||
 | 
			
		||||
    private ChapterRecognition() throws InstantiationException {
 | 
			
		||||
        throw new InstantiationException("This class is not for instantiation");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static void parseChapterNumber(Chapter chapter, Manga manga) {
 | 
			
		||||
        if (chapter.chapter_number != -1)
 | 
			
		||||
            return;
 | 
			
		||||
 | 
			
		||||
        String name = chapter.name.toLowerCase();
 | 
			
		||||
        Matcher matcher;
 | 
			
		||||
 | 
			
		||||
        // Safest option, the chapter has a token prepended and nothing at the end of the number
 | 
			
		||||
        matcher = cleanWithToken.matcher(name);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            chapter.chapter_number = Float.parseFloat(matcher.group(1));
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // a number with a single alpha prefix is parsed as sub-chapter
 | 
			
		||||
        matcher = withAlphaPostfix.matcher(name);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            chapter.chapter_number = Float.parseFloat(matcher.group(1)) + parseAlphaPostFix(matcher.group(2));
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // the chapter has a token prepended and something at the end of the number
 | 
			
		||||
        matcher = uncleanWithToken.matcher(name);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            chapter.chapter_number = Float.parseFloat(matcher.group(1));
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Remove anything related to the volume or version
 | 
			
		||||
        name = pUnwanted.matcher(name).replaceAll("$1");
 | 
			
		||||
 | 
			
		||||
        List<Float> occurrences;
 | 
			
		||||
 | 
			
		||||
        // If there's only one number, use it
 | 
			
		||||
        matcher = uncleanNumber.matcher(name);
 | 
			
		||||
        occurrences = getAllOccurrences(matcher);
 | 
			
		||||
        if (occurrences.size() == 1) {
 | 
			
		||||
            chapter.chapter_number =  occurrences.get(0);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // If it has a colon, the chapter number should be that one
 | 
			
		||||
        matcher = withColon.matcher(name);
 | 
			
		||||
        occurrences = getAllOccurrences(matcher);
 | 
			
		||||
        if (occurrences.size() == 1) {
 | 
			
		||||
            chapter.chapter_number =  occurrences.get(0);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Prefer numbers without anything appended
 | 
			
		||||
        matcher = cleanNumber.matcher(name);
 | 
			
		||||
        occurrences = getAllOccurrences(matcher);
 | 
			
		||||
        if (occurrences.size() == 1) {
 | 
			
		||||
            chapter.chapter_number =  occurrences.get(0);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // This can lead to issues if two numbers are separated by an space
 | 
			
		||||
        name = name.replaceAll("\\s+", "");
 | 
			
		||||
 | 
			
		||||
        // Try to remove the manga name from the chapter, and try again
 | 
			
		||||
        String mangaName = replaceIrrelevantCharacters(manga.title);
 | 
			
		||||
        String nameWithoutManga = difference(mangaName, name).trim();
 | 
			
		||||
        if (!nameWithoutManga.isEmpty()) {
 | 
			
		||||
            matcher = uncleanNumber.matcher(nameWithoutManga);
 | 
			
		||||
            occurrences = getAllOccurrences(matcher);
 | 
			
		||||
            if (occurrences.size() == 1) {
 | 
			
		||||
                chapter.chapter_number =  occurrences.get(0);
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // TODO more checks (maybe levenshtein?)
 | 
			
		||||
 | 
			
		||||
        // try splitting the name in parts an pick the first valid one
 | 
			
		||||
        String[] nameParts = chapter.name.split("-");
 | 
			
		||||
        Chapter dummyChapter = Chapter.create();
 | 
			
		||||
        if (nameParts.length > 1) {
 | 
			
		||||
            for (String part : nameParts) {
 | 
			
		||||
                dummyChapter.name = part;
 | 
			
		||||
                parseChapterNumber(dummyChapter, manga);
 | 
			
		||||
                if (dummyChapter.chapter_number >= 0) {
 | 
			
		||||
                    chapter.chapter_number = dummyChapter.chapter_number;
 | 
			
		||||
                    return;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Strip anything after "part xxx" and try that
 | 
			
		||||
        matcher = pPart.matcher(name);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            name = pPart.matcher(name).replaceAll("$1");
 | 
			
		||||
            dummyChapter.name = name;
 | 
			
		||||
            parseChapterNumber(dummyChapter, manga);
 | 
			
		||||
            if (dummyChapter.chapter_number >= 0) {
 | 
			
		||||
                chapter.chapter_number = dummyChapter.chapter_number;
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        // check for a number either at the start or right after the manga title
 | 
			
		||||
        matcher = startingNumber.matcher(name);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            chapter.chapter_number = Float.parseFloat(matcher.group(1));
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        matcher = startingNumber.matcher(nameWithoutManga);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            chapter.chapter_number = Float.parseFloat(matcher.group(1));
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * x.a -> x.1, x.b -> x.2, etc
 | 
			
		||||
     */
 | 
			
		||||
    private static float parseAlphaPostFix(String postfix) {
 | 
			
		||||
        char alpha = postfix.charAt(0);
 | 
			
		||||
        return Float.parseFloat("0." + Integer.toString((int)alpha - 96));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static List<Float> getAllOccurrences(Matcher matcher) {
 | 
			
		||||
        List<Float> occurences = new ArrayList<>();
 | 
			
		||||
        while (matcher.find()) {
 | 
			
		||||
            // Match again to get only numbers from the captured text
 | 
			
		||||
            String text = matcher.group();
 | 
			
		||||
            Matcher m = uncleanNumber.matcher(text);
 | 
			
		||||
            if (m.find()) {
 | 
			
		||||
                try {
 | 
			
		||||
                    Float value = Float.parseFloat(m.group(1).replaceAll(",", "."));
 | 
			
		||||
                    if (!occurences.contains(value)) {
 | 
			
		||||
                        occurences.add(value);
 | 
			
		||||
                    }
 | 
			
		||||
                } catch (NumberFormatException e) { /* Do nothing */ }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return occurences;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static String replaceIrrelevantCharacters(String str) {
 | 
			
		||||
        return str.replaceAll("\\s+", "").toLowerCase();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static String difference(String str1, String str2) {
 | 
			
		||||
        if (str1 == null) {
 | 
			
		||||
            return str2;
 | 
			
		||||
        }
 | 
			
		||||
        if (str2 == null) {
 | 
			
		||||
            return str1;
 | 
			
		||||
        }
 | 
			
		||||
        int at = indexOfDifference(str1, str2);
 | 
			
		||||
        if (at == -1) {
 | 
			
		||||
            return "";
 | 
			
		||||
        }
 | 
			
		||||
        return str2.substring(at);
 | 
			
		||||
    }
 | 
			
		||||
    public static int indexOfDifference(String str1, String str2) {
 | 
			
		||||
        if (str1 == str2) {
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
        if (str1 == null || str2 == null) {
 | 
			
		||||
            return 0;
 | 
			
		||||
        }
 | 
			
		||||
        int i;
 | 
			
		||||
        for (i = 0; i < str1.length() && i < str2.length(); ++i) {
 | 
			
		||||
            if (str1.charAt(i) != str2.charAt(i)) {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        if (i < str2.length() || i < str1.length()) {
 | 
			
		||||
            return i;
 | 
			
		||||
        }
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										140
									
								
								app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.kt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.kt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,140 @@
 | 
			
		||||
package eu.kanade.tachiyomi.util
 | 
			
		||||
 | 
			
		||||
import eu.kanade.tachiyomi.data.database.models.Chapter
 | 
			
		||||
import eu.kanade.tachiyomi.data.database.models.Manga
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * -R> = regex conversion.
 | 
			
		||||
 */
 | 
			
		||||
object ChapterRecognition {
 | 
			
		||||
    /**
 | 
			
		||||
     * All cases with Ch.xx
 | 
			
		||||
     * Mokushiroku Alice Vol.1 Ch. 4: Misrepresentation -R> 4
 | 
			
		||||
     */
 | 
			
		||||
    private val basic = Regex("""(?<=ch\.)([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Regex used when only one number occurrence
 | 
			
		||||
     * Example: Bleach 567: Down With Snowwhite -R> 567
 | 
			
		||||
     */
 | 
			
		||||
    private val occurrence = Regex("""([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Regex used when manga title removed
 | 
			
		||||
     * Example: Solanin 028 Vol. 2 -> 028 Vol.2 -> 028Vol.2 -R> 028
 | 
			
		||||
     */
 | 
			
		||||
    private val withoutMange = Regex("""^([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Regex used to remove unwanted tags
 | 
			
		||||
     * Example Prison School 12 v.1 vol004 version1243 volume64 -R> Prison School 12
 | 
			
		||||
     */
 | 
			
		||||
    private val unwanted = Regex("""(?:(v|ver|vol|version|volume).?[0-9]+)""")
 | 
			
		||||
 | 
			
		||||
    private val unwantedWhiteSpace = Regex("""(\s)(extra|special|omake)""")
 | 
			
		||||
 | 
			
		||||
    fun parseChapterNumber(chapter: Chapter, manga: Manga) {
 | 
			
		||||
        //If chapter number is known return.
 | 
			
		||||
        if (chapter.chapter_number != -1f)
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        // Get chapter title with lower case
 | 
			
		||||
        var name = chapter.name.toLowerCase()
 | 
			
		||||
 | 
			
		||||
        // Remove comma's from chapter.
 | 
			
		||||
        name = name.replace(',', '.')
 | 
			
		||||
 | 
			
		||||
        // Remove unwanted white spaces.
 | 
			
		||||
        unwantedWhiteSpace.findAll(name).let {
 | 
			
		||||
            it.forEach { occurrence -> name = name.replace(occurrence.value, occurrence.value.trim()) }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Remove unwanted tags.
 | 
			
		||||
        unwanted.findAll(name).let {
 | 
			
		||||
            it.forEach { occurrence -> name = name.replace(occurrence.value, "") }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Check base case ch.xx
 | 
			
		||||
        if (updateChapter(basic.find(name), chapter))
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        // Check one number occurrence.
 | 
			
		||||
        val occurrences: MutableList<MatchResult> = arrayListOf()
 | 
			
		||||
        occurrence.findAll(name).let {
 | 
			
		||||
            it.forEach { occurrence -> occurrences.add(occurrence) }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (occurrences.size == 1) {
 | 
			
		||||
            if (updateChapter(occurrences[0], chapter))
 | 
			
		||||
                return
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Remove manga title from chapter title.
 | 
			
		||||
        val nameWithoutManga = name.replace(manga.title.toLowerCase(), "").trim()
 | 
			
		||||
 | 
			
		||||
        // Check if first value is number after title remove.
 | 
			
		||||
        if (updateChapter(withoutMange.find(nameWithoutManga), chapter))
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        // Take the first number encountered.
 | 
			
		||||
        if (updateChapter(occurrence.find(nameWithoutManga), chapter))
 | 
			
		||||
            return
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Check if volume is found and update chapter
 | 
			
		||||
     * @param match result of regex
 | 
			
		||||
     * @param chapter chapter object
 | 
			
		||||
     * @return true if volume is found
 | 
			
		||||
     */
 | 
			
		||||
    fun updateChapter(match: MatchResult?, chapter: Chapter): Boolean {
 | 
			
		||||
        match?.let {
 | 
			
		||||
            val initial = it.groups[1]?.value?.toFloat()!!
 | 
			
		||||
            val subChapterDecimal = it.groups[2]?.value
 | 
			
		||||
            val subChapterAlpha = it.groups[3]?.value
 | 
			
		||||
            val addition = checkForDecimal(subChapterDecimal, subChapterAlpha)
 | 
			
		||||
            chapter.chapter_number = initial.plus(addition)
 | 
			
		||||
            return true
 | 
			
		||||
        }
 | 
			
		||||
        return false
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Check for decimal in received strings
 | 
			
		||||
     * @param decimal decimal value of regex
 | 
			
		||||
     * @param alpha alpha value of regex
 | 
			
		||||
     * @return decimal/alpha float value
 | 
			
		||||
     */
 | 
			
		||||
    fun checkForDecimal(decimal: String?, alpha: String?): Float {
 | 
			
		||||
        if (!decimal.isNullOrEmpty())
 | 
			
		||||
            return decimal?.toFloat()!!
 | 
			
		||||
 | 
			
		||||
        if (!alpha.isNullOrEmpty()) {
 | 
			
		||||
            if (alpha!!.contains("extra"))
 | 
			
		||||
                return .99f
 | 
			
		||||
 | 
			
		||||
            if (alpha.contains("omake"))
 | 
			
		||||
                return .98f
 | 
			
		||||
 | 
			
		||||
            if (alpha.contains("special"))
 | 
			
		||||
                return .97f
 | 
			
		||||
 | 
			
		||||
            if (alpha[0].equals('.') ) {
 | 
			
		||||
                // Take value after (.)
 | 
			
		||||
                return parseAlphaPostFix(alpha[1])
 | 
			
		||||
            } else {
 | 
			
		||||
                return parseAlphaPostFix(alpha[0])
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return .0f
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * x.a -> x.1, x.b -> x.2, etc
 | 
			
		||||
     */
 | 
			
		||||
    private fun parseAlphaPostFix(alpha: Char): Float {
 | 
			
		||||
        return ("0." + Integer.toString(alpha.toInt() - 96)).toFloat()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user