mirror of
				https://github.com/mihonapp/mihon.git
				synced 2025-10-30 22:07:57 +01:00 
			
		
		
		
	Rewrote ChapterRecognition to Kotlin. (#293)
This commit is contained in:
		
				
					committed by
					
						 inorichi
						inorichi
					
				
			
			
				
	
			
			
			
						parent
						
							c64bd81339
						
					
				
				
					commit
					a6df745daa
				
			| @@ -1,205 +0,0 @@ | ||||
| package eu.kanade.tachiyomi.util; | ||||
|  | ||||
| import java.util.ArrayList; | ||||
| import java.util.List; | ||||
| import java.util.regex.Matcher; | ||||
| import java.util.regex.Pattern; | ||||
|  | ||||
| import eu.kanade.tachiyomi.data.database.models.Chapter; | ||||
| import eu.kanade.tachiyomi.data.database.models.Manga; | ||||
|  | ||||
| public final class ChapterRecognition { | ||||
|  | ||||
|     private static final Pattern cleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d+)($|\\b)"); | ||||
|     private static final Pattern uncleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d*)"); | ||||
|     private static final Pattern withAlphaPostfix = Pattern.compile("(\\d+[\\.,]?\\d*\\s*)([a-z])($|\\b)"); | ||||
|     private static final Pattern cleanNumber = Pattern.compile("(\\d+[\\.,]?\\d+)($|\\b)"); | ||||
|     private static final Pattern uncleanNumber = Pattern.compile("(\\d+[\\.,]?\\d*)"); | ||||
|     private static final Pattern withColon = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)([^\\d]|$)"); | ||||
|     private static final Pattern startingNumber = Pattern.compile("^(\\d+[\\.,]?\\d*)"); | ||||
|  | ||||
|     private static final Pattern pUnwanted = | ||||
|             Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b"); | ||||
|     private static final Pattern pPart = | ||||
|             Pattern.compile("(\\b|\\d)part\\s*\\d+.+"); | ||||
|  | ||||
|     private ChapterRecognition() throws InstantiationException { | ||||
|         throw new InstantiationException("This class is not for instantiation"); | ||||
|     } | ||||
|  | ||||
|     public static void parseChapterNumber(Chapter chapter, Manga manga) { | ||||
|         if (chapter.chapter_number != -1) | ||||
|             return; | ||||
|  | ||||
|         String name = chapter.name.toLowerCase(); | ||||
|         Matcher matcher; | ||||
|  | ||||
|         // Safest option, the chapter has a token prepended and nothing at the end of the number | ||||
|         matcher = cleanWithToken.matcher(name); | ||||
|         if (matcher.find()) { | ||||
|             chapter.chapter_number = Float.parseFloat(matcher.group(1)); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         // a number with a single alpha prefix is parsed as sub-chapter | ||||
|         matcher = withAlphaPostfix.matcher(name); | ||||
|         if (matcher.find()) { | ||||
|             chapter.chapter_number = Float.parseFloat(matcher.group(1)) + parseAlphaPostFix(matcher.group(2)); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         // the chapter has a token prepended and something at the end of the number | ||||
|         matcher = uncleanWithToken.matcher(name); | ||||
|         if (matcher.find()) { | ||||
|             chapter.chapter_number = Float.parseFloat(matcher.group(1)); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         // Remove anything related to the volume or version | ||||
|         name = pUnwanted.matcher(name).replaceAll("$1"); | ||||
|  | ||||
|         List<Float> occurrences; | ||||
|  | ||||
|         // If there's only one number, use it | ||||
|         matcher = uncleanNumber.matcher(name); | ||||
|         occurrences = getAllOccurrences(matcher); | ||||
|         if (occurrences.size() == 1) { | ||||
|             chapter.chapter_number =  occurrences.get(0); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         // If it has a colon, the chapter number should be that one | ||||
|         matcher = withColon.matcher(name); | ||||
|         occurrences = getAllOccurrences(matcher); | ||||
|         if (occurrences.size() == 1) { | ||||
|             chapter.chapter_number =  occurrences.get(0); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         // Prefer numbers without anything appended | ||||
|         matcher = cleanNumber.matcher(name); | ||||
|         occurrences = getAllOccurrences(matcher); | ||||
|         if (occurrences.size() == 1) { | ||||
|             chapter.chapter_number =  occurrences.get(0); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         // This can lead to issues if two numbers are separated by an space | ||||
|         name = name.replaceAll("\\s+", ""); | ||||
|  | ||||
|         // Try to remove the manga name from the chapter, and try again | ||||
|         String mangaName = replaceIrrelevantCharacters(manga.title); | ||||
|         String nameWithoutManga = difference(mangaName, name).trim(); | ||||
|         if (!nameWithoutManga.isEmpty()) { | ||||
|             matcher = uncleanNumber.matcher(nameWithoutManga); | ||||
|             occurrences = getAllOccurrences(matcher); | ||||
|             if (occurrences.size() == 1) { | ||||
|                 chapter.chapter_number =  occurrences.get(0); | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // TODO more checks (maybe levenshtein?) | ||||
|  | ||||
|         // try splitting the name in parts an pick the first valid one | ||||
|         String[] nameParts = chapter.name.split("-"); | ||||
|         Chapter dummyChapter = Chapter.create(); | ||||
|         if (nameParts.length > 1) { | ||||
|             for (String part : nameParts) { | ||||
|                 dummyChapter.name = part; | ||||
|                 parseChapterNumber(dummyChapter, manga); | ||||
|                 if (dummyChapter.chapter_number >= 0) { | ||||
|                     chapter.chapter_number = dummyChapter.chapter_number; | ||||
|                     return; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Strip anything after "part xxx" and try that | ||||
|         matcher = pPart.matcher(name); | ||||
|         if (matcher.find()) { | ||||
|             name = pPart.matcher(name).replaceAll("$1"); | ||||
|             dummyChapter.name = name; | ||||
|             parseChapterNumber(dummyChapter, manga); | ||||
|             if (dummyChapter.chapter_number >= 0) { | ||||
|                 chapter.chapter_number = dummyChapter.chapter_number; | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|  | ||||
|         // check for a number either at the start or right after the manga title | ||||
|         matcher = startingNumber.matcher(name); | ||||
|         if (matcher.find()) { | ||||
|             chapter.chapter_number = Float.parseFloat(matcher.group(1)); | ||||
|             return; | ||||
|         } | ||||
|         matcher = startingNumber.matcher(nameWithoutManga); | ||||
|         if (matcher.find()) { | ||||
|             chapter.chapter_number = Float.parseFloat(matcher.group(1)); | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * x.a -> x.1, x.b -> x.2, etc | ||||
|      */ | ||||
|     private static float parseAlphaPostFix(String postfix) { | ||||
|         char alpha = postfix.charAt(0); | ||||
|         return Float.parseFloat("0." + Integer.toString((int)alpha - 96)); | ||||
|     } | ||||
|  | ||||
|     public static List<Float> getAllOccurrences(Matcher matcher) { | ||||
|         List<Float> occurences = new ArrayList<>(); | ||||
|         while (matcher.find()) { | ||||
|             // Match again to get only numbers from the captured text | ||||
|             String text = matcher.group(); | ||||
|             Matcher m = uncleanNumber.matcher(text); | ||||
|             if (m.find()) { | ||||
|                 try { | ||||
|                     Float value = Float.parseFloat(m.group(1).replaceAll(",", ".")); | ||||
|                     if (!occurences.contains(value)) { | ||||
|                         occurences.add(value); | ||||
|                     } | ||||
|                 } catch (NumberFormatException e) { /* Do nothing */ } | ||||
|             } | ||||
|         } | ||||
|         return occurences; | ||||
|     } | ||||
|  | ||||
|     public static String replaceIrrelevantCharacters(String str) { | ||||
|         return str.replaceAll("\\s+", "").toLowerCase(); | ||||
|     } | ||||
|  | ||||
|     public static String difference(String str1, String str2) { | ||||
|         if (str1 == null) { | ||||
|             return str2; | ||||
|         } | ||||
|         if (str2 == null) { | ||||
|             return str1; | ||||
|         } | ||||
|         int at = indexOfDifference(str1, str2); | ||||
|         if (at == -1) { | ||||
|             return ""; | ||||
|         } | ||||
|         return str2.substring(at); | ||||
|     } | ||||
|     public static int indexOfDifference(String str1, String str2) { | ||||
|         if (str1 == str2) { | ||||
|             return -1; | ||||
|         } | ||||
|         if (str1 == null || str2 == null) { | ||||
|             return 0; | ||||
|         } | ||||
|         int i; | ||||
|         for (i = 0; i < str1.length() && i < str2.length(); ++i) { | ||||
|             if (str1.charAt(i) != str2.charAt(i)) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         if (i < str2.length() || i < str1.length()) { | ||||
|             return i; | ||||
|         } | ||||
|         return -1; | ||||
|     } | ||||
| } | ||||
							
								
								
									
										140
									
								
								app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.kt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.kt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,140 @@ | ||||
| package eu.kanade.tachiyomi.util | ||||
|  | ||||
| import eu.kanade.tachiyomi.data.database.models.Chapter | ||||
| import eu.kanade.tachiyomi.data.database.models.Manga | ||||
|  | ||||
| /** | ||||
|  * -R> = regex conversion. | ||||
|  */ | ||||
| object ChapterRecognition { | ||||
|     /** | ||||
|      * All cases with Ch.xx | ||||
|      * Mokushiroku Alice Vol.1 Ch. 4: Misrepresentation -R> 4 | ||||
|      */ | ||||
|     private val basic = Regex("""(?<=ch\.)([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""") | ||||
|  | ||||
|     /** | ||||
|      * Regex used when only one number occurrence | ||||
|      * Example: Bleach 567: Down With Snowwhite -R> 567 | ||||
|      */ | ||||
|     private val occurrence = Regex("""([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""") | ||||
|  | ||||
|     /** | ||||
|      * Regex used when manga title removed | ||||
|      * Example: Solanin 028 Vol. 2 -> 028 Vol.2 -> 028Vol.2 -R> 028 | ||||
|      */ | ||||
|     private val withoutMange = Regex("""^([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""") | ||||
|  | ||||
|     /** | ||||
|      * Regex used to remove unwanted tags | ||||
|      * Example Prison School 12 v.1 vol004 version1243 volume64 -R> Prison School 12 | ||||
|      */ | ||||
|     private val unwanted = Regex("""(?:(v|ver|vol|version|volume).?[0-9]+)""") | ||||
|  | ||||
|     private val unwantedWhiteSpace = Regex("""(\s)(extra|special|omake)""") | ||||
|  | ||||
|     fun parseChapterNumber(chapter: Chapter, manga: Manga) { | ||||
|         //If chapter number is known return. | ||||
|         if (chapter.chapter_number != -1f) | ||||
|             return | ||||
|  | ||||
|         // Get chapter title with lower case | ||||
|         var name = chapter.name.toLowerCase() | ||||
|  | ||||
|         // Remove comma's from chapter. | ||||
|         name = name.replace(',', '.') | ||||
|  | ||||
|         // Remove unwanted white spaces. | ||||
|         unwantedWhiteSpace.findAll(name).let { | ||||
|             it.forEach { occurrence -> name = name.replace(occurrence.value, occurrence.value.trim()) } | ||||
|         } | ||||
|  | ||||
|         // Remove unwanted tags. | ||||
|         unwanted.findAll(name).let { | ||||
|             it.forEach { occurrence -> name = name.replace(occurrence.value, "") } | ||||
|         } | ||||
|  | ||||
|         // Check base case ch.xx | ||||
|         if (updateChapter(basic.find(name), chapter)) | ||||
|             return | ||||
|  | ||||
|         // Check one number occurrence. | ||||
|         val occurrences: MutableList<MatchResult> = arrayListOf() | ||||
|         occurrence.findAll(name).let { | ||||
|             it.forEach { occurrence -> occurrences.add(occurrence) } | ||||
|         } | ||||
|  | ||||
|         if (occurrences.size == 1) { | ||||
|             if (updateChapter(occurrences[0], chapter)) | ||||
|                 return | ||||
|         } | ||||
|  | ||||
|         // Remove manga title from chapter title. | ||||
|         val nameWithoutManga = name.replace(manga.title.toLowerCase(), "").trim() | ||||
|  | ||||
|         // Check if first value is number after title remove. | ||||
|         if (updateChapter(withoutMange.find(nameWithoutManga), chapter)) | ||||
|             return | ||||
|  | ||||
|         // Take the first number encountered. | ||||
|         if (updateChapter(occurrence.find(nameWithoutManga), chapter)) | ||||
|             return | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Check if volume is found and update chapter | ||||
|      * @param match result of regex | ||||
|      * @param chapter chapter object | ||||
|      * @return true if volume is found | ||||
|      */ | ||||
|     fun updateChapter(match: MatchResult?, chapter: Chapter): Boolean { | ||||
|         match?.let { | ||||
|             val initial = it.groups[1]?.value?.toFloat()!! | ||||
|             val subChapterDecimal = it.groups[2]?.value | ||||
|             val subChapterAlpha = it.groups[3]?.value | ||||
|             val addition = checkForDecimal(subChapterDecimal, subChapterAlpha) | ||||
|             chapter.chapter_number = initial.plus(addition) | ||||
|             return true | ||||
|         } | ||||
|         return false | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Check for decimal in received strings | ||||
|      * @param decimal decimal value of regex | ||||
|      * @param alpha alpha value of regex | ||||
|      * @return decimal/alpha float value | ||||
|      */ | ||||
|     fun checkForDecimal(decimal: String?, alpha: String?): Float { | ||||
|         if (!decimal.isNullOrEmpty()) | ||||
|             return decimal?.toFloat()!! | ||||
|  | ||||
|         if (!alpha.isNullOrEmpty()) { | ||||
|             if (alpha!!.contains("extra")) | ||||
|                 return .99f | ||||
|  | ||||
|             if (alpha.contains("omake")) | ||||
|                 return .98f | ||||
|  | ||||
|             if (alpha.contains("special")) | ||||
|                 return .97f | ||||
|  | ||||
|             if (alpha[0].equals('.') ) { | ||||
|                 // Take value after (.) | ||||
|                 return parseAlphaPostFix(alpha[1]) | ||||
|             } else { | ||||
|                 return parseAlphaPostFix(alpha[0]) | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return .0f | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * x.a -> x.1, x.b -> x.2, etc | ||||
|      */ | ||||
|     private fun parseAlphaPostFix(alpha: Char): Float { | ||||
|         return ("0." + Integer.toString(alpha.toInt() - 96)).toFloat() | ||||
|     } | ||||
|  | ||||
| } | ||||
		Reference in New Issue
	
	Block a user