Skip to content

Commit

Permalink
Timezone, Float / double / long formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
imathews committed May 16, 2019
1 parent dbdb2fb commit 03e3f2b
Showing 1 changed file with 33 additions and 33 deletions.
66 changes: 33 additions & 33 deletions src/main/java/thehyve/sas/Convert.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
import java.io.OutputStreamWriter;

import java.util.Date;
import java.util.Locale;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.TimeZone;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
Expand Down Expand Up @@ -64,6 +67,8 @@ public class Convert {
private static final Logger log = LoggerFactory.getLogger(Convert.class);

public void convert(InputStream in , OutputStream out, OutputStream metadataOut, String progressFileName) throws IOException {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));

Date start = new Date();
SasFileReader reader = new SasFileReaderImpl( in );
CSVWriter writer = new CSVWriter(new OutputStreamWriter(out));
Expand All @@ -79,6 +84,8 @@ public void convert(InputStream in , OutputStream out, OutputStream metadataOut,
String[] outData = new String[columns.size()];
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
DateFormat dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd' 'HH:mm:ss");
DecimalFormat decimalFormat = new DecimalFormat("0", DecimalFormatSymbols.getInstance(Locale.ENGLISH));
decimalFormat.setMaximumFractionDigits(340); // 340 = DecimalFormat.DOUBLE_FRACTION_DIGITS

dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
dateTimeFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
Expand All @@ -97,14 +104,6 @@ public void convert(InputStream in , OutputStream out, OutputStream metadataOut,
metadataWriter.flush();
metadataWriter.close();

// if (!onlyColumnNames) {
// // Writing column format
// for(int i=0; i < columns.size(); i++) {
// outData[i] = columns.get(i).getFormat();
// }
// writer.writeNext(outData);
// }

for (int i = 0; i < columns.size(); i++) {
outData[i] = columns.get(i).getName();
}
Expand All @@ -115,7 +114,7 @@ public void convert(InputStream in , OutputStream out, OutputStream metadataOut,
long rowCount = 0;
long progress = -1;
while ((data = reader.readNext()) != null) {
assert(columns.size() == data.length);

if (progressFileName != null && rowCount * 100 / totalRowCount != progress) {
progress = rowCount * 100 / totalRowCount;
// log.info("Progress: " + progress);
Expand All @@ -124,31 +123,35 @@ public void convert(InputStream in , OutputStream out, OutputStream metadataOut,
progressFileWriter.close();
}

// if (rowCount > 10) {
// break;
// }

rowCount++;

assert(columns.size() == data.length);

for (int i = 0; i < data.length; i++) {
if (data[i] instanceof Date) {
if (data[i] == null) {
outData[i] = "";
if (data[i] == null) {
outData[i] = "";
}
else if (data[i] instanceof Date) {
Date value = (Date) data[i];
if (value.getHours() != 0 || value.getMinutes() != 0 || value.getSeconds() != 0) {
outData[i] = dateTimeFormat.format(value);
} else {
Date value = (Date) data[i];
if (value.getHours() != 0 || value.getMinutes() != 0 || value.getSeconds() != 0) {
outData[i] = dateTimeFormat.format(value);
} else {
outData[i] = dateFormat.format(value);
}
outData[i] = dateFormat.format(value);
}

// Date date = data[i];

} else {
outData[i] = data[i] == null ? "" : data[i].toString();
}
else if (data[i] instanceof Double || data[i] instanceof Long){
outData[i] = decimalFormat.format(data[i]);
}
else{
outData[i] = data[i].toString();
}

}
writer.writeNext(outData);
rowCount++;
// if (rowCount > 2){
// break;
// }
}
log.info("Done writing data.");
log.info(rowCount + " rows written.");
Expand Down Expand Up @@ -218,7 +221,6 @@ public static void main(String[] args) {
log.info("Reading from GCS: " + bucketName + "/" + objectName);

ReadChannel reader = storage.reader(bucketName, objectName);
// reader.setChunkSize(33554432);
fin = new BufferedInputStream(Channels.newInputStream(reader), 33554432);
} else {
log.info("Reading from file: " + in_filename);
Expand All @@ -235,8 +237,6 @@ public static void main(String[] args) {
BlobId blobId = BlobId.of(outBucketName, outObjectName);
BlobInfo blobInfo = BlobInfo.newBuilder(blobId).setContentType("text/csv").build();
WriteChannel writer = storage.writer(blobInfo);
// writer.setChunkSize(33554432);

fout = new BufferedOutputStream(Channels.newOutputStream(writer), 33554432 * 2);
} else {
log.info("Writing to file: " + out_filename);
Expand All @@ -252,9 +252,9 @@ public static void main(String[] args) {
} catch (IOException e) {
e.printStackTrace();
}
catch (InterruptedException e){
e.printStackTrace();
}
catch (InterruptedException e){
e.printStackTrace();
}
} catch (ParseException e) {
System.err.printf(USAGE + "\n");
e.printStackTrace();
Expand Down

0 comments on commit 03e3f2b

Please sign in to comment.