From e6f0617a0216fb947948d448ae550575a874e206 Mon Sep 17 00:00:00 2001 From: hankcs Date: Sun, 21 May 2017 14:59:26 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DHDFS=E4=B8=8A=E7=9A=84readByt?= =?UTF-8?q?esFromOtherInputStream=EF=BC=9Ahttps://github.com/hankcs/HanLP/?= =?UTF-8?q?issues/536#issuecomment-302918045?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/hankcs/hanlp/corpus/io/IOUtil.java | 19 ++++++++--- .../hankcs/hanlp/corpus/io/IOUtilTest.java | 34 +++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 src/test/java/com/hankcs/hanlp/corpus/io/IOUtilTest.java diff --git a/src/main/java/com/hankcs/hanlp/corpus/io/IOUtil.java b/src/main/java/com/hankcs/hanlp/corpus/io/IOUtil.java index 5d0fd93f4..94c8637af 100644 --- a/src/main/java/com/hankcs/hanlp/corpus/io/IOUtil.java +++ b/src/main/java/com/hankcs/hanlp/corpus/io/IOUtil.java @@ -255,7 +255,7 @@ private static byte[] readBytesFromFileInputStream(FileInputStream fis) throws I } /** - * 将InputStream中的数据读入到字节数组中 + * 将非FileInputStream的某InputStream中的全部数据读入到字节数组中 * * @param is * @return @@ -263,10 +263,19 @@ private static byte[] readBytesFromFileInputStream(FileInputStream fis) throws I */ public static byte[] readBytesFromOtherInputStream(InputStream is) throws IOException { - byte[] targetArray = new byte[is.available()]; - readBytesFromOtherInputStream(is, targetArray); - is.close(); - return targetArray; + ByteArrayOutputStream data = new ByteArrayOutputStream(); + + int readBytes; + byte[] buffer = new byte[Math.max(is.available(), 4096)]; // 最低4KB的缓冲区 + + while ((readBytes = is.read(buffer, 0, buffer.length)) != -1) + { + data.write(buffer, 0, readBytes); + } + + data.flush(); + + return data.toByteArray(); } /** diff --git a/src/test/java/com/hankcs/hanlp/corpus/io/IOUtilTest.java b/src/test/java/com/hankcs/hanlp/corpus/io/IOUtilTest.java new file mode 100644 index 000000000..a0e2b96d9 --- /dev/null +++ b/src/test/java/com/hankcs/hanlp/corpus/io/IOUtilTest.java @@ -0,0 +1,34 @@ +package com.hankcs.hanlp.corpus.io; + +import junit.framework.TestCase; + +import java.io.ByteArrayInputStream; +import java.util.Random; + +public class IOUtilTest extends TestCase +{ + public void testReadBytesFromOtherInputStream() throws Exception + { + Random random = new Random(System.currentTimeMillis()); + byte[] originalData = new byte[1024 * 1024]; // 1MB + random.nextBytes(originalData); + ByteArrayInputStream is = new ByteArrayInputStream(originalData){ + @Override + public synchronized int available() + { + int realAvailable = super.available(); + if (realAvailable > 0) + { + return 2048; // 模拟某些网络InputStream + } + return realAvailable; + } + }; + byte[] readData = IOUtil.readBytesFromOtherInputStream(is); + assertEquals(originalData.length, readData.length); + for (int i = 0; i < originalData.length; i++) + { + assertEquals(originalData[i], readData[i]); + } + } +} \ No newline at end of file