From 5977e9f47f77819707b9ad91aac590233982511c Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 9 Feb 2016 20:26:51 +0100 Subject: [PATCH 1/5] handle chapter versions which are attached to the chapter number --- .../java/eu/kanade/tachiyomi/util/ChapterRecognition.java | 4 ++-- .../java/eu/kanade/tachiyomi/ChapterRecognitionTest.java | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java index 287c288edc..26d505f699 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java +++ b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java @@ -15,7 +15,7 @@ public class ChapterRecognition { private static final Pattern p3 = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)"); private static final Pattern pUnwanted = - Pattern.compile("\\b(v|ver|vol|version|volume)\\.?\\s*\\d+\\b"); + Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b"); public static void parseChapterNumber(Chapter chapter, Manga manga) { if (chapter.chapter_number != -1) @@ -32,7 +32,7 @@ public class ChapterRecognition { } // Remove anything related to the volume or version - name = pUnwanted.matcher(name).replaceAll(""); + name = pUnwanted.matcher(name).replaceAll("$1"); List occurrences; diff --git a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java index 8ff1b06d7c..8626d8bb60 100644 --- a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java +++ b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java @@ -135,4 +135,10 @@ public class ChapterRecognitionTest { assertThat(c.chapter_number).isEqualTo(28f); } + @Test + public void testWithVolumeAttachedToChapter() { + Chapter c = createChapter("Ansatsu Kyoushitsu 011v002: Assembly Time"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number).isEqualTo(11f); + } } From fa4a8204a4987fc38c056d023e6071c52c8ee133 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 9 Feb 2016 20:43:10 +0100 Subject: [PATCH 2/5] prefer numbers without anything appended when parsing chapter numbers --- .../tachiyomi/util/ChapterRecognition.java | 35 ++++++++++++++----- .../tachiyomi/ChapterRecognitionTest.java | 7 ++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java index 26d505f699..69c3b98ac5 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java +++ b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java @@ -10,9 +10,11 @@ import eu.kanade.tachiyomi.data.database.models.Manga; public class ChapterRecognition { - private static final Pattern p1 = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d*)"); - private static final Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)"); - private static final Pattern p3 = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)"); + private static final Pattern cleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d+)($|\\b)"); + private static final Pattern uncleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d*)"); + private static final Pattern cleanNumber = Pattern.compile("(\\d+[\\.,]?\\d+)($|\\b)"); + private static final Pattern uncleanNumber = Pattern.compile("(\\d+[\\.,]?\\d*)"); + private static final Pattern withColon = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)"); private static final Pattern pUnwanted = Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b"); @@ -24,8 +26,15 @@ public class ChapterRecognition { String name = chapter.name.toLowerCase(); Matcher matcher; - // Safest option, the chapter has a token prepended - matcher = p1.matcher(name); + // Safest option, the chapter has a token prepended and nothing at the end of the number + matcher = cleanWithToken.matcher(name); + if (matcher.find()) { + chapter.chapter_number = Float.parseFloat(matcher.group(1)); + return; + } + + // the chapter has a token prepended and something at the end of the number + matcher = uncleanWithToken.matcher(name); if (matcher.find()) { chapter.chapter_number = Float.parseFloat(matcher.group(1)); return; @@ -37,7 +46,7 @@ public class ChapterRecognition { List occurrences; // If there's only one number, use it - matcher = p2.matcher(name); + matcher = uncleanNumber.matcher(name); occurrences = getAllOccurrences(matcher); if (occurrences.size() == 1) { chapter.chapter_number = occurrences.get(0); @@ -45,7 +54,15 @@ public class ChapterRecognition { } // If it has a colon, the chapter number should be that one - matcher = p3.matcher(name); + matcher = withColon.matcher(name); + occurrences = getAllOccurrences(matcher); + if (occurrences.size() == 1) { + chapter.chapter_number = occurrences.get(0); + return; + } + + // Prefer numbers without anything appended + matcher = cleanNumber.matcher(name); occurrences = getAllOccurrences(matcher); if (occurrences.size() == 1) { chapter.chapter_number = occurrences.get(0); @@ -59,7 +76,7 @@ public class ChapterRecognition { String mangaName = replaceIrrelevantCharacters(manga.title); String nameWithoutManga = difference(mangaName, name); if (!nameWithoutManga.isEmpty()) { - matcher = p2.matcher(nameWithoutManga); + matcher = uncleanNumber.matcher(nameWithoutManga); occurrences = getAllOccurrences(matcher); if (occurrences.size() == 1) { chapter.chapter_number = occurrences.get(0); @@ -76,7 +93,7 @@ public class ChapterRecognition { while (matcher.find()) { // Match again to get only numbers from the captured text String text = matcher.group(); - Matcher m = p2.matcher(text); + Matcher m = uncleanNumber.matcher(text); if (m.find()) { try { Float value = Float.parseFloat(m.group(1)); diff --git a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java index 8626d8bb60..e8e8a7eac1 100644 --- a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java +++ b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java @@ -141,4 +141,11 @@ public class ChapterRecognitionTest { ChapterRecognition.parseChapterNumber(c, randomManga); assertThat(c.chapter_number).isEqualTo(11f); } + + @Test + public void testWithNumberInChapterTitle() { + Chapter c = createChapter("Ansatsu Kyoushitsu 099 Present Time - 2nd Hour"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number).isEqualTo(99f); + } } From 1611a274b9803efd368aba8f0601364e483a4be3 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 9 Feb 2016 20:57:26 +0100 Subject: [PATCH 3/5] differentiate subchapters denoted by an alpha prefix --- .../tachiyomi/util/ChapterRecognition.java | 16 ++++++++++++++++ .../kanade/tachiyomi/ChapterRecognitionTest.java | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java index 69c3b98ac5..af4bfd7aaf 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java +++ b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java @@ -12,6 +12,7 @@ public class ChapterRecognition { private static final Pattern cleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d+)($|\\b)"); private static final Pattern uncleanWithToken = Pattern.compile("ch[^0-9]?\\s*(\\d+[\\.,]?\\d*)"); + private static final Pattern withAlphaPostfix = Pattern.compile("(\\d+[\\.,]?\\d*\\s*)([a-z])($|\\b)"); private static final Pattern cleanNumber = Pattern.compile("(\\d+[\\.,]?\\d+)($|\\b)"); private static final Pattern uncleanNumber = Pattern.compile("(\\d+[\\.,]?\\d*)"); private static final Pattern withColon = Pattern.compile("(\\d+[\\.,]?\\d*\\s*:)"); @@ -33,6 +34,13 @@ public class ChapterRecognition { return; } + // a number with a single alpha prefix is parsed as sub-chapter + matcher = withAlphaPostfix.matcher(name); + if (matcher.find()) { + chapter.chapter_number = Float.parseFloat(matcher.group(1)) + parseAlphaPostFix(matcher.group(2)); + return; + } + // the chapter has a token prepended and something at the end of the number matcher = uncleanWithToken.matcher(name); if (matcher.find()) { @@ -88,6 +96,14 @@ public class ChapterRecognition { } + /** + * x.a -> x.1, x.b -> x.2, etc + */ + private static float parseAlphaPostFix(String postfix) { + char alpha = postfix.charAt(0); + return Float.parseFloat("0." + Integer.toString((int)alpha - 96)); + } + public static List getAllOccurrences(Matcher matcher) { List occurences = new ArrayList<>(); while (matcher.find()) { diff --git a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java index e8e8a7eac1..0e33b5f179 100644 --- a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java +++ b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java @@ -148,4 +148,14 @@ public class ChapterRecognitionTest { ChapterRecognition.parseChapterNumber(c, randomManga); assertThat(c.chapter_number).isEqualTo(99f); } + + @Test + public void testAlphaSubChapters() { + Chapter c = createChapter("Asu No Yoichi 19a"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number).isEqualTo(19.1f); + c = createChapter("Asu No Yoichi 19b"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number).isEqualTo(19.2f); + } } From bc1ddd43798cc3ce7a9c08a38b2526a38b1a454c Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 9 Feb 2016 21:19:34 +0100 Subject: [PATCH 4/5] fallback to parsing parts to handle arc numbers --- .../kanade/tachiyomi/util/ChapterRecognition.java | 13 +++++++++++++ .../eu/kanade/tachiyomi/ChapterRecognitionTest.java | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java index af4bfd7aaf..7178417775 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java +++ b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java @@ -94,6 +94,19 @@ public class ChapterRecognition { // TODO more checks (maybe levenshtein?) + // try splitting the name in parts an pick the first valid one + String[] nameParts = chapter.name.split("-"); + if (nameParts.length > 1) { + Chapter dummyChapter = Chapter.create(); + for (String part : nameParts) { + dummyChapter.name = part; + parseChapterNumber(dummyChapter, manga); + if (dummyChapter.chapter_number >= 0) { + chapter.chapter_number = dummyChapter.chapter_number; + return; + } + } + } } /** diff --git a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java index 0e33b5f179..40ac33f049 100644 --- a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java +++ b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java @@ -158,4 +158,11 @@ public class ChapterRecognitionTest { ChapterRecognition.parseChapterNumber(c, randomManga); assertThat(c.chapter_number).isEqualTo(19.2f); } + + @Test + public void testChapterWithArcNumber() { + Chapter c = createChapter("Manga title 123 - Vol 016 Arc title 002"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number).isEqualTo(123f); + } } From e6faee97792c1608716a2c6ea699a42fead2dfb5 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 9 Feb 2016 21:23:57 +0100 Subject: [PATCH 5/5] handle chapters with part numbers --- .../kanade/tachiyomi/util/ChapterRecognition.java | 13 ++++++++++++- .../eu/kanade/tachiyomi/ChapterRecognitionTest.java | 7 +++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java index 7178417775..045b4609e1 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java +++ b/app/src/main/java/eu/kanade/tachiyomi/util/ChapterRecognition.java @@ -19,6 +19,8 @@ public class ChapterRecognition { private static final Pattern pUnwanted = Pattern.compile("(\\b|\\d)(v|ver|vol|version|volume)\\.?\\s*\\d+\\b"); + private static final Pattern pPart = + Pattern.compile("(\\b|\\d)part\\s*\\d+.+"); public static void parseChapterNumber(Chapter chapter, Manga manga) { if (chapter.chapter_number != -1) @@ -96,8 +98,8 @@ public class ChapterRecognition { // try splitting the name in parts an pick the first valid one String[] nameParts = chapter.name.split("-"); + Chapter dummyChapter = Chapter.create(); if (nameParts.length > 1) { - Chapter dummyChapter = Chapter.create(); for (String part : nameParts) { dummyChapter.name = part; parseChapterNumber(dummyChapter, manga); @@ -107,6 +109,15 @@ public class ChapterRecognition { } } } + + // Strip anything after "part xxx" and try that + name = pPart.matcher(name).replaceAll("$1"); + dummyChapter.name = name; + parseChapterNumber(dummyChapter, manga); + if (dummyChapter.chapter_number >= 0) { + chapter.chapter_number = dummyChapter.chapter_number; + return; + } } /** diff --git a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java index 40ac33f049..28ad19ed89 100644 --- a/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java +++ b/app/src/test/java/eu/kanade/tachiyomi/ChapterRecognitionTest.java @@ -165,4 +165,11 @@ public class ChapterRecognitionTest { ChapterRecognition.parseChapterNumber(c, randomManga); assertThat(c.chapter_number).isEqualTo(123f); } + + @Test + public void testChapterWithChapterPrefixAfterPart() { + Chapter c = createChapter("Tokyo ESP 027: Part 002: Chapter 001"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number).isEqualTo(027f); + } }