mirror of
				https://github.com/mihonapp/mihon.git
				synced 2025-11-04 08:08:55 +01:00 
			
		
		
		
	Initial chapter number recognition (needs improvement). Remove an old class.
This commit is contained in:
		@@ -32,6 +32,9 @@ public class Chapter {
 | 
			
		||||
    @StorIOSQLiteColumn(name = ChaptersTable.COLUMN_DATE_UPLOAD)
 | 
			
		||||
    public long date_upload;
 | 
			
		||||
 | 
			
		||||
    @StorIOSQLiteColumn(name = ChaptersTable.COLUMN_CHAPTER_NUMBER)
 | 
			
		||||
    public float chapter_number;
 | 
			
		||||
 | 
			
		||||
    public int downloaded;
 | 
			
		||||
 | 
			
		||||
    public static final int UNKNOWN = 0;
 | 
			
		||||
@@ -58,7 +61,7 @@ public class Chapter {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static Chapter newChapter() {
 | 
			
		||||
        Chapter c = new Chapter();
 | 
			
		||||
        return c;
 | 
			
		||||
        return new Chapter();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -31,6 +31,9 @@ public class ChaptersTable {
 | 
			
		||||
	@NonNull
 | 
			
		||||
	public static final String COLUMN_LAST_PAGE_READ = "last_page_read";
 | 
			
		||||
 | 
			
		||||
	@NonNull
 | 
			
		||||
	public static final String COLUMN_CHAPTER_NUMBER = "chapter_number";
 | 
			
		||||
 | 
			
		||||
	@NonNull
 | 
			
		||||
	public static String getCreateTableQuery() {
 | 
			
		||||
		return "CREATE TABLE " + TABLE + "("
 | 
			
		||||
@@ -40,6 +43,7 @@ public class ChaptersTable {
 | 
			
		||||
				+ COLUMN_NAME + " TEXT NOT NULL, "
 | 
			
		||||
				+ COLUMN_READ + " BOOLEAN NOT NULL, "
 | 
			
		||||
				+ COLUMN_LAST_PAGE_READ + " INT NOT NULL, "
 | 
			
		||||
				+ COLUMN_CHAPTER_NUMBER + " FLOAT NOT NULL, "
 | 
			
		||||
				+ COLUMN_DATE_FETCH + " LONG NOT NULL, "
 | 
			
		||||
				+ COLUMN_DATE_UPLOAD + " LONG NOT NULL, "
 | 
			
		||||
				+ "FOREIGN KEY(" + COLUMN_MANGA_ID + ") REFERENCES " + MangasTable.TABLE + "(" + MangasTable.COLUMN_ID + ") "
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,102 @@
 | 
			
		||||
package eu.kanade.mangafeed.util;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.regex.Matcher;
 | 
			
		||||
import java.util.regex.Pattern;
 | 
			
		||||
 | 
			
		||||
import eu.kanade.mangafeed.data.models.Chapter;
 | 
			
		||||
import eu.kanade.mangafeed.data.models.Manga;
 | 
			
		||||
 | 
			
		||||
public class ChapterRecognition {
 | 
			
		||||
 | 
			
		||||
    private static Pattern p1 = Pattern.compile("ch.?(\\d+[\\.,]?\\d*)");
 | 
			
		||||
    private static Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)");
 | 
			
		||||
 | 
			
		||||
    public static void parseChapterNumber(Chapter chapter, Manga manga) {
 | 
			
		||||
        if (chapter.chapter_number != 0)
 | 
			
		||||
            return;
 | 
			
		||||
 | 
			
		||||
        // Remove spaces and convert to lower case
 | 
			
		||||
        String name = replaceIrrelevantCharacters(chapter.name);
 | 
			
		||||
        Matcher matcher;
 | 
			
		||||
 | 
			
		||||
        // Safest option, the chapter has a token prepended
 | 
			
		||||
        matcher = p1.matcher(name);
 | 
			
		||||
        if (matcher.find()) {
 | 
			
		||||
            chapter.chapter_number = Float.parseFloat(matcher.group(1));
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // If there's only one number, use it
 | 
			
		||||
        matcher = p2.matcher(name);
 | 
			
		||||
        List<Float> occurences = getAllOccurrences(matcher);
 | 
			
		||||
        if (occurences.size() == 1) {
 | 
			
		||||
            chapter.chapter_number =  occurences.get(0);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Try to remove the manga name from the chapter, and try again
 | 
			
		||||
        String mangaName = replaceIrrelevantCharacters(manga.title);
 | 
			
		||||
        String nameWithoutManga = difference(mangaName, name);
 | 
			
		||||
        if (!nameWithoutManga.isEmpty()) {
 | 
			
		||||
            matcher = p2.matcher(nameWithoutManga);
 | 
			
		||||
            occurences = getAllOccurrences(matcher);
 | 
			
		||||
            if (occurences.size() == 1) {
 | 
			
		||||
                chapter.chapter_number =  occurences.get(0);
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // TODO more checks (maybe levenshtein?)
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static List<Float> getAllOccurrences(Matcher matcher) {
 | 
			
		||||
        List<Float> occurences = new ArrayList<>();
 | 
			
		||||
        while (matcher.find()) {
 | 
			
		||||
            try {
 | 
			
		||||
                float value = Float.parseFloat(matcher.group());
 | 
			
		||||
                if (!occurences.contains(value))
 | 
			
		||||
                    occurences.add(value);
 | 
			
		||||
            } catch (NumberFormatException e) { /* Do nothing */ }
 | 
			
		||||
        }
 | 
			
		||||
        return occurences;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static String replaceIrrelevantCharacters(String str) {
 | 
			
		||||
        return str.replaceAll("\\s+", "").toLowerCase();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static String difference(String str1, String str2) {
 | 
			
		||||
        if (str1 == null) {
 | 
			
		||||
            return str2;
 | 
			
		||||
        }
 | 
			
		||||
        if (str2 == null) {
 | 
			
		||||
            return str1;
 | 
			
		||||
        }
 | 
			
		||||
        int at = indexOfDifference(str1, str2);
 | 
			
		||||
        if (at == -1) {
 | 
			
		||||
            return "";
 | 
			
		||||
        }
 | 
			
		||||
        return str2.substring(at);
 | 
			
		||||
    }
 | 
			
		||||
    public static int indexOfDifference(String str1, String str2) {
 | 
			
		||||
        if (str1 == str2) {
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
        if (str1 == null || str2 == null) {
 | 
			
		||||
            return 0;
 | 
			
		||||
        }
 | 
			
		||||
        int i;
 | 
			
		||||
        for (i = 0; i < str1.length() && i < str2.length(); ++i) {
 | 
			
		||||
            if (str1.charAt(i) != str2.charAt(i)) {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        if (i < str2.length() || i < str1.length()) {
 | 
			
		||||
            return i;
 | 
			
		||||
        }
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -1,52 +0,0 @@
 | 
			
		||||
package eu.kanade.mangafeed.util;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
import eu.kanade.mangafeed.data.models.Chapter;
 | 
			
		||||
import eu.kanade.mangafeed.data.models.Manga;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Created by len on 8/10/15.
 | 
			
		||||
 */
 | 
			
		||||
public class DummyDataUtil {
 | 
			
		||||
 | 
			
		||||
    public static List<Manga> createDummyManga() {
 | 
			
		||||
        ArrayList<Manga> mangas = new ArrayList<>();
 | 
			
		||||
        mangas.add(createDummyManga("One Piece"));
 | 
			
		||||
        mangas.add(createDummyManga("Berserk"));
 | 
			
		||||
        mangas.add(createDummyManga("Horimiya"));
 | 
			
		||||
        mangas.add(createDummyManga("Übel Blatt"));
 | 
			
		||||
 | 
			
		||||
        return mangas;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static Manga createDummyManga(String title) {
 | 
			
		||||
        Manga m = new Manga();
 | 
			
		||||
        m.title = title;
 | 
			
		||||
        m.url="http://example.com";
 | 
			
		||||
        m.artist="Eiichiro Oda";
 | 
			
		||||
        m.author="Eiichiro Oda";
 | 
			
		||||
        m.description="...";
 | 
			
		||||
        m.genre="Action, Drama";
 | 
			
		||||
        m.status="Ongoing";
 | 
			
		||||
        m.thumbnail_url="http://example.com/pic.png";
 | 
			
		||||
        return m;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static List<Chapter> createDummyChapters() {
 | 
			
		||||
        List<Chapter> chapters = new ArrayList<>();
 | 
			
		||||
        Chapter c;
 | 
			
		||||
 | 
			
		||||
        for (int i = 1; i < 50; i++) {
 | 
			
		||||
            c = new Chapter();
 | 
			
		||||
            c.manga_id = 1L;
 | 
			
		||||
            c.name = "Chapter " + i;
 | 
			
		||||
            c.url = "http://example.com/1";
 | 
			
		||||
            chapters.add(c);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return chapters;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,87 @@
 | 
			
		||||
package eu.kanade.mangafeed;
 | 
			
		||||
 | 
			
		||||
import org.junit.Before;
 | 
			
		||||
import org.junit.Test;
 | 
			
		||||
 | 
			
		||||
import eu.kanade.mangafeed.data.models.Chapter;
 | 
			
		||||
import eu.kanade.mangafeed.data.models.Manga;
 | 
			
		||||
import eu.kanade.mangafeed.util.ChapterRecognition;
 | 
			
		||||
 | 
			
		||||
import static org.hamcrest.Matchers.is;
 | 
			
		||||
import static org.junit.Assert.assertThat;
 | 
			
		||||
 | 
			
		||||
public class ChapterRecognitionTest {
 | 
			
		||||
 | 
			
		||||
    Manga randomManga;
 | 
			
		||||
 | 
			
		||||
    private Chapter createChapter(String title) {
 | 
			
		||||
        Chapter chapter = new Chapter();
 | 
			
		||||
        chapter.name = title;
 | 
			
		||||
        return chapter;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Before
 | 
			
		||||
    public void setUp() {
 | 
			
		||||
        randomManga = new Manga();
 | 
			
		||||
        randomManga.title = "Something";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithOneDigit() {
 | 
			
		||||
        Chapter c = createChapter("Ch.3: Self-proclaimed Genius");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(3f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithVolumeBefore() {
 | 
			
		||||
        Chapter c = createChapter("Vol.1 Ch.4: Misrepresentation");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(4f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithVolumeAndVersionNumber() {
 | 
			
		||||
        Chapter c = createChapter("Vol.1 Ch.3 (v2) Read Online");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(3f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithVolumeAndNumberInTitle() {
 | 
			
		||||
        Chapter c = createChapter("Vol.15 Ch.90: Here Blooms the Daylily, Part 4");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(90f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithVolumeAndSpecialChapter() {
 | 
			
		||||
        Chapter c = createChapter("Vol.10 Ch.42.5: Homecoming (Beginning)");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(42.5f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithJustANumber() {
 | 
			
		||||
        Chapter c = createChapter("Homecoming (Beginning) 42");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(42f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithJustASpecialChapter() {
 | 
			
		||||
        Chapter c = createChapter("Homecoming (Beginning) 42.5");
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, randomManga);
 | 
			
		||||
        assertThat(c.chapter_number, is(42.5f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Test
 | 
			
		||||
    public void testWithNumberinMangaTitle() {
 | 
			
		||||
        Chapter c = createChapter("3x3 Eyes 96");
 | 
			
		||||
        Manga m = new Manga();
 | 
			
		||||
        m.title = "3x3 Eyes";
 | 
			
		||||
        ChapterRecognition.parseChapterNumber(c, m);
 | 
			
		||||
        assertThat(c.chapter_number, is(96f));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user