Skip to content

Commit

Permalink
fixed compress file empty bug
Browse files Browse the repository at this point in the history
  • Loading branch information
abola committed Mar 22, 2016
1 parent 038526c commit 49400de
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions src/main/java/com/github/abola/crawler/CrawlerPack.java
Original file line number Diff line number Diff line change
Expand Up @@ -233,17 +233,18 @@ public String getFromRemote(String uri){
HttpFileSystemConfigBuilder.getInstance().setCookies(fsOptions, getCookies(uri) );

String remoteContent = "";
String remoteEncoding = "utf-8";

try {
log.debug("Loading remote URI:" + uri);

FileContent fileContent = fileSystem.resolveFile(uri, fsOptions).getContent();
fileContent.getSize(); // pass a bug {@link https://issues.apache.org/jira/browse/VFS-427}

String remoteEncoding = fileContent.getContentInfo().getContentEncoding();

// 2016-03-22 only pure http/https auto detect encoding
if ( "http".equalsIgnoreCase( uri.substring(0,4) ) ) {
fileContent.getSize(); // pass a bug {@link https://issues.apache.org/jira/browse/VFS-427}
remoteEncoding = fileContent.getContentInfo().getContentEncoding();
}
// 2016-03-21 修正zip file getContentEncoding 為null
if ( null == remoteEncoding) remoteEncoding = "utf8";
if ( null == remoteEncoding) remoteEncoding = "utf-8";

if (! "utf".equalsIgnoreCase(remoteEncoding.substring(0,3)) ){
log.debug("remote content encoding: " + remoteEncoding);
Expand All @@ -265,6 +266,9 @@ public String getFromRemote(String uri){
}catch(IOException ioe){
// return empty
log.warn(ioe.getMessage());
}catch(StringIndexOutOfBoundsException stre){
log.warn("uri: " + uri );
log.warn(stre.getMessage());
}

clearCookies();
Expand Down

0 comments on commit 49400de

Please sign in to comment.