Skip to content
This repository has been archived by the owner on Aug 26, 2019. It is now read-only.

Commit

Permalink
Fix parse error
Browse files Browse the repository at this point in the history
  • Loading branch information
seven332 committed Mar 21, 2015
1 parent f29090e commit 245bdc1
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 37 deletions.
48 changes: 26 additions & 22 deletions app/src/main/java/com/hippo/ehviewer/ehclient/DetailParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@

package com.hippo.ehviewer.ehclient;

import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hippo.ehviewer.data.Comment;
import com.hippo.ehviewer.data.LargePreviewList;
import com.hippo.ehviewer.data.NormalPreviewList;
import com.hippo.ehviewer.data.PreviewList;
import com.hippo.ehviewer.util.EhUtils;
import com.hippo.ehviewer.util.Utils;

import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DetailParser {

private static final String OFFENSIVE_STRING =
Expand Down Expand Up @@ -125,24 +125,27 @@ public int parser(String body, int mode) {
// Get detail
if ((mode & DETAIL) != 0) {
p = Pattern
.compile("<div id=\"gd1\"><img src=\"([^\"]+)\"[^<>]+/></div>" // thumb
.compile("<div id=\"gd1\"><img src=\"([^\"]+)\"[^<>]+></div>" // thumb
+ "</div>"
+ "<div id=\"gd2\">"
+ "<h1 id=\"gn\">([^<>]+)</h1>" // title
+ "<h1 id=\"gj\">([^<>]*)</h1>" // title_jpn might be empty string
+ "</div>"
+ ".+"
+ "<div id=\"gdc\"><a[^<>]+><[^<>]*alt=\"([\\w|\\-]+)\"[^<>]*/></a></div>" // category
+ "<div id=\"gdc\"><a[^<>]+><[^<>]*alt=\"([\\w|\\-]+)\"[^<>]*></a></div>" // category
+ "<div id=\"gdn\"><a[^<>]+>([^<>]+)</a>" // uploader
+ ".+"
+ "<tr><td[^<>]*>Posted:</td><td[^<>]*>([\\w|\\-|\\s|:]+)</td></tr>" // posted
+ "<tr><td[^<>]*>Images:</td><td[^<>]*>([\\d]+) @ ([\\w|\\.|\\s]+)</td></tr>" // pages and size
+ "<tr><td[^<>]*>Resized:</td><td[^<>]*>([^<>]+)</td></tr>" // resized
//+ "<tr><td[^<>]*>Images:</td><td[^<>]*>([\\d]+) @ ([\\w|\\.|\\s]+)</td></tr>" // pages and size
//+ "<tr><td[^<>]*>Resized:</td><td[^<>]*>([^<>]+)</td></tr>" // resized
+ "<tr><td[^<>]*>Parent:</td><td[^<>]*>(?:<a[^<>]*>)?([^<>]+)(?:</a>)?</td></tr>" // parent
+ "<tr><td[^<>]*>Visible:</td><td[^<>]*>([^<>]+)</td></tr>" // visible
+ "<tr><td[^<>]*>Language:</td><td[^<>]*>([^<>]+)</td></tr>" // language
+ "<tr><td[^<>]*>File Size:</td><td[^<>]*>([^<>]+)<span[^<>]*>([^<>]+)</span></td></tr>" // File size and resize
+ "<tr><td[^<>]*>Length:</td><td[^<>]*>([\\d|,]+) pages</td></tr>" // pages
+ "<tr><td[^<>]*>Favorited:</td><[^<>]*>([^<>]+)</td></tr>" // Favorite times ([\d|,]+) times or Never
+ ".+"
+ "<td id=\"grt3\">\\(<span id=\"rating_count\">([\\d|,]+)</span>\\)</td>" // people
+ "<td id=\"grt3\"><span id=\"rating_count\">([\\d|,]+)</span></td>" // people
+ "</tr>"
+ "<tr><td[^<>]*>([^<>]+)</td>" // rating
+ ".+"
Expand All @@ -157,27 +160,28 @@ public int parser(String body, int mode) {
category = EhUtils.getCategory(m.group(4));
uploader = m.group(5);
posted = m.group(6);
pages = Integer.parseInt(m.group(7));
size = m.group(8);
resized = m.group(9);
parent = m.group(10);
visible = m.group(11);
language = m.group(12);
people = Integer.parseInt(m.group(13).replace(",", ""));
parent = m.group(7);
visible = m.group(8);
language = Utils.unescapeXml(m.group(9)).trim();
size = Utils.unescapeXml(m.group(10)).trim();
resized = m.group(11);
pages = Integer.parseInt(m.group(12).replace(",", ""));
// favoriteTimes = m.group(13)
people = Integer.parseInt(m.group(14).replace(",", ""));

Pattern pattern = Pattern.compile("([\\d|\\.]+)");
Matcher matcher = pattern.matcher(m.group(14));
Matcher matcher = pattern.matcher(m.group(15));
if (matcher.find())
rating = Float.parseFloat(matcher.group(1));
else
rating = Float.NaN;

firstPage = m.group(15);
firstPage = m.group(16);
}
}
// Get tag
if ((mode & TAG) != 0) {
tags = new LinkedHashMap<String, LinkedList<String>>();
tags = new LinkedHashMap<>();
p = Pattern
.compile("<tr><td[^<>]+>([\\w\\s]+):</td><td>(?:<div[^<>]+><a[^<>]+>[\\w\\s]+</a></div>)+</td></tr>");
m = p.matcher(body);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

package com.hippo.ehviewer.ehclient;

import com.hippo.ehviewer.util.Log;

import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hippo.ehviewer.util.Log;

public class EdDetailParser {

int previewPageNum;
Expand Down Expand Up @@ -80,7 +80,7 @@ public boolean parser(String body, int mode, boolean needPreviewInfo) {
return false;
}

p = Pattern.compile("<td class=\"gdt1\">Images:</td><td class=\"gdt2\">([\\d,]+) ");
p = Pattern.compile("<tr><td[^<>]*>Length:</td><td[^<>]*>([\\d|,]+) pages</td></tr>");
m = p.matcher(body);
if (m.find())
imageNum = Integer.valueOf(m.group(1).replace(",", ""));
Expand Down
26 changes: 14 additions & 12 deletions app/src/main/java/com/hippo/ehviewer/util/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -281,21 +281,23 @@ public static <T> void execute(final boolean forceSerial, final AsyncTask<T, ?,
}

private static final String[] ESCAPE_CHARATER_LIST = {
"&amp;",
"&lt;",
"&gt;",
"&quot;",
"&#039;",
"&times;"
"&amp;",
"&lt;",
"&gt;",
"&quot;",
"&#039;",
"&times;",
"&nbsp;"
};

private static final String[] UNESCAPE_CHARATER_LIST = {
"&",
"<",
">",
"\"",
"'",
"×"
"&",
"<",
">",
"\"",
"'",
"×",
" "
};

/**
Expand Down

0 comments on commit 245bdc1

Please sign in to comment.