Initial chapter number recognition (needs improvement). Remove an old class.

This commit is contained in:
inorichi 2015-11-10 19:31:18 +01:00
parent bc83bd7033
commit 692d3c1b2c
5 changed files with 198 additions and 54 deletions

View File

@ -32,6 +32,9 @@ public class Chapter {
@StorIOSQLiteColumn(name = ChaptersTable.COLUMN_DATE_UPLOAD)
public long date_upload;
@StorIOSQLiteColumn(name = ChaptersTable.COLUMN_CHAPTER_NUMBER)
public float chapter_number;
public int downloaded;
public static final int UNKNOWN = 0;
@ -58,7 +61,7 @@ public class Chapter {
}
public static Chapter newChapter() {
Chapter c = new Chapter();
return c;
return new Chapter();
}
}

View File

@ -31,6 +31,9 @@ public class ChaptersTable {
@NonNull
public static final String COLUMN_LAST_PAGE_READ = "last_page_read";
@NonNull
public static final String COLUMN_CHAPTER_NUMBER = "chapter_number";
@NonNull
public static String getCreateTableQuery() {
return "CREATE TABLE " + TABLE + "("
@ -40,6 +43,7 @@ public class ChaptersTable {
+ COLUMN_NAME + " TEXT NOT NULL, "
+ COLUMN_READ + " BOOLEAN NOT NULL, "
+ COLUMN_LAST_PAGE_READ + " INT NOT NULL, "
+ COLUMN_CHAPTER_NUMBER + " FLOAT NOT NULL, "
+ COLUMN_DATE_FETCH + " LONG NOT NULL, "
+ COLUMN_DATE_UPLOAD + " LONG NOT NULL, "
+ "FOREIGN KEY(" + COLUMN_MANGA_ID + ") REFERENCES " + MangasTable.TABLE + "(" + MangasTable.COLUMN_ID + ") "

View File

@ -0,0 +1,102 @@
package eu.kanade.mangafeed.util;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import eu.kanade.mangafeed.data.models.Chapter;
import eu.kanade.mangafeed.data.models.Manga;
public class ChapterRecognition {
private static Pattern p1 = Pattern.compile("ch.?(\\d+[\\.,]?\\d*)");
private static Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)");
public static void parseChapterNumber(Chapter chapter, Manga manga) {
if (chapter.chapter_number != 0)
return;
// Remove spaces and convert to lower case
String name = replaceIrrelevantCharacters(chapter.name);
Matcher matcher;
// Safest option, the chapter has a token prepended
matcher = p1.matcher(name);
if (matcher.find()) {
chapter.chapter_number = Float.parseFloat(matcher.group(1));
return;
}
// If there's only one number, use it
matcher = p2.matcher(name);
List<Float> occurences = getAllOccurrences(matcher);
if (occurences.size() == 1) {
chapter.chapter_number = occurences.get(0);
return;
}
// Try to remove the manga name from the chapter, and try again
String mangaName = replaceIrrelevantCharacters(manga.title);
String nameWithoutManga = difference(mangaName, name);
if (!nameWithoutManga.isEmpty()) {
matcher = p2.matcher(nameWithoutManga);
occurences = getAllOccurrences(matcher);
if (occurences.size() == 1) {
chapter.chapter_number = occurences.get(0);
return;
}
}
// TODO more checks (maybe levenshtein?)
}
public static List<Float> getAllOccurrences(Matcher matcher) {
List<Float> occurences = new ArrayList<>();
while (matcher.find()) {
try {
float value = Float.parseFloat(matcher.group());
if (!occurences.contains(value))
occurences.add(value);
} catch (NumberFormatException e) { /* Do nothing */ }
}
return occurences;
}
public static String replaceIrrelevantCharacters(String str) {
return str.replaceAll("\\s+", "").toLowerCase();
}
public static String difference(String str1, String str2) {
if (str1 == null) {
return str2;
}
if (str2 == null) {
return str1;
}
int at = indexOfDifference(str1, str2);
if (at == -1) {
return "";
}
return str2.substring(at);
}
public static int indexOfDifference(String str1, String str2) {
if (str1 == str2) {
return -1;
}
if (str1 == null || str2 == null) {
return 0;
}
int i;
for (i = 0; i < str1.length() && i < str2.length(); ++i) {
if (str1.charAt(i) != str2.charAt(i)) {
break;
}
}
if (i < str2.length() || i < str1.length()) {
return i;
}
return -1;
}
}

View File

@ -1,52 +0,0 @@
package eu.kanade.mangafeed.util;
import java.util.ArrayList;
import java.util.List;
import eu.kanade.mangafeed.data.models.Chapter;
import eu.kanade.mangafeed.data.models.Manga;
/**
* Created by len on 8/10/15.
*/
public class DummyDataUtil {
public static List<Manga> createDummyManga() {
ArrayList<Manga> mangas = new ArrayList<>();
mangas.add(createDummyManga("One Piece"));
mangas.add(createDummyManga("Berserk"));
mangas.add(createDummyManga("Horimiya"));
mangas.add(createDummyManga("Übel Blatt"));
return mangas;
}
private static Manga createDummyManga(String title) {
Manga m = new Manga();
m.title = title;
m.url="http://example.com";
m.artist="Eiichiro Oda";
m.author="Eiichiro Oda";
m.description="...";
m.genre="Action, Drama";
m.status="Ongoing";
m.thumbnail_url="http://example.com/pic.png";
return m;
}
public static List<Chapter> createDummyChapters() {
List<Chapter> chapters = new ArrayList<>();
Chapter c;
for (int i = 1; i < 50; i++) {
c = new Chapter();
c.manga_id = 1L;
c.name = "Chapter " + i;
c.url = "http://example.com/1";
chapters.add(c);
}
return chapters;
}
}

View File

@ -0,0 +1,87 @@
package eu.kanade.mangafeed;
import org.junit.Before;
import org.junit.Test;
import eu.kanade.mangafeed.data.models.Chapter;
import eu.kanade.mangafeed.data.models.Manga;
import eu.kanade.mangafeed.util.ChapterRecognition;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
public class ChapterRecognitionTest {
Manga randomManga;
private Chapter createChapter(String title) {
Chapter chapter = new Chapter();
chapter.name = title;
return chapter;
}
@Before
public void setUp() {
randomManga = new Manga();
randomManga.title = "Something";
}
@Test
public void testWithOneDigit() {
Chapter c = createChapter("Ch.3: Self-proclaimed Genius");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(3f));
}
@Test
public void testWithVolumeBefore() {
Chapter c = createChapter("Vol.1 Ch.4: Misrepresentation");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(4f));
}
@Test
public void testWithVolumeAndVersionNumber() {
Chapter c = createChapter("Vol.1 Ch.3 (v2) Read Online");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(3f));
}
@Test
public void testWithVolumeAndNumberInTitle() {
Chapter c = createChapter("Vol.15 Ch.90: Here Blooms the Daylily, Part 4");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(90f));
}
@Test
public void testWithVolumeAndSpecialChapter() {
Chapter c = createChapter("Vol.10 Ch.42.5: Homecoming (Beginning)");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(42.5f));
}
@Test
public void testWithJustANumber() {
Chapter c = createChapter("Homecoming (Beginning) 42");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(42f));
}
@Test
public void testWithJustASpecialChapter() {
Chapter c = createChapter("Homecoming (Beginning) 42.5");
ChapterRecognition.parseChapterNumber(c, randomManga);
assertThat(c.chapter_number, is(42.5f));
}
@Test
public void testWithNumberinMangaTitle() {
Chapter c = createChapter("3x3 Eyes 96");
Manga m = new Manga();
m.title = "3x3 Eyes";
ChapterRecognition.parseChapterNumber(c, m);
assertThat(c.chapter_number, is(96f));
}
}