Skip to content

Commit

Permalink
Merge pull request #64 from ahorek/cp50220
Browse files Browse the repository at this point in the history
Fix AIOOB in CP50220 transcoding
  • Loading branch information
headius authored Jan 16, 2025
2 parents edc4b7d + 253ec47 commit 40844c0
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 84 deletions.
136 changes: 52 additions & 84 deletions src/org/jcodings/transcode/TranscodeFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -539,16 +539,13 @@ public static int funSoCp50220Encoder(byte[] statep, byte[] s, int sStart, int l
int output0 = oStart;
byte[] sp = statep;

if (sp[0] == G0_JISX0201_KATAKANA) {
if (sp[0] == G0_JISX0201_KATAKANA && sp[2] != 0) {
int c = sp[2] & 0x7F;
int p = (c - 0x21) * 2;
sp[2] = 0;
byte[] pBytes = tbl0208;
if (sp[1] != G0_JISX0208_1983) {
o[oStart++] = 0x1B;
o[oStart++] = (byte)'$';
o[oStart++] = (byte)'B';
}
sp[0] = G0_JISX0208_1983;
oStart = iso2022jp_put_state(sp, o, (int)sp[1], G0_JISX0208_1983, oStart);

o[oStart++] = pBytes[p++];
s0 = toUnsignedInt(s[sStart]);
s1 = toUnsignedInt(s[sStart+1]);
Expand All @@ -568,22 +565,25 @@ public static int funSoCp50220Encoder(byte[] statep, byte[] s, int sStart, int l
if (l == 2 && s0 == 0x8E) {
s1 = toUnsignedInt(s[sStart+1]);
int p = (s1 - 0xA1) * 2;
byte[] pBytes = tbl0208;
if ((0xA1 <= s1 && s1 <= 0xB5) ||
(0xC5 <= s1 && s1 <= 0xC9) ||
(0xCF <= s1 && s1 <= 0xDF)) {
if (sp[0] != G0_JISX0208_1983) {
o[oStart++] = 0x1b;
o[oStart++] = '$';
o[oStart++] = 'B';
sp[0] = G0_JISX0208_1983;
}
byte[] pBytes = tbl0208;
oStart = iso2022jp_put_state(sp, o, (int)sp[0], G0_JISX0208_1983, oStart);

o[oStart++] = pBytes[p++];
o[oStart++] = pBytes[p];
return oStart - output0;
}

sp[2] = (byte)s1;
if (s1 > 0xDF) { /* undef */
oStart = iso2022jp_put_state(sp, o, (int)sp[0], G0_JISX0201_KATAKANA, oStart);
o[oStart++] = (byte) (s1 & 0x7f);
sp[2] = 0;
return oStart - output0;
}

sp[2] = (byte) s1;
sp[1] = sp[0];
sp[0] = G0_JISX0201_KATAKANA;
return oStart - output0;
Expand All @@ -609,24 +609,7 @@ public static int funSoCp5022xEncoder(byte[] statep, byte[] s, int sStart, int l
newstate = G0_JISX0208_1983;
}

if (sp[0] != newstate) {
if (newstate == G0_ASCII) {
o[oStart++] = 0x1b;
o[oStart++] = '(';
o[oStart++] = 'B';
}
else if (newstate == G0_JISX0201_KATAKANA) {
o[oStart++] = 0x1b;
o[oStart++] = '(';
o[oStart++] = 'I';
}
else {
o[oStart++] = 0x1b;
o[oStart++] = '$';
o[oStart++] = 'B';
}
sp[0] = (byte)newstate;
}
oStart = iso2022jp_put_state(sp, o, (int)sp[0], newstate, oStart);

s0 = toUnsignedInt(s[sStart]);
if (l == 1) {
Expand All @@ -647,16 +630,13 @@ public static int finishCp50220Encoder(byte[] statep, byte[] o, int oStart, int

if (sp[0] == G0_ASCII) return 0;

if (sp[0] == G0_JISX0201_KATAKANA) {
if (sp[0] == G0_JISX0201_KATAKANA && sp[2] != 0) {
int c = sp[2] & 0x7F;
int p = (c - 0x21) * 2;
byte[] pBytes = tbl0208;
if (sp[1] != G0_JISX0208_1983) {
o[oStart++] = 0x1b;
o[oStart++] = '$';
o[oStart++] = 'B';
}
sp[0] = G0_JISX0208_1983;

oStart = iso2022jp_put_state(sp, o, (int)sp[1], G0_JISX0208_1983, oStart);

o[oStart++] = pBytes[p++];
o[oStart++] = pBytes[p];
}
Expand Down Expand Up @@ -739,6 +719,32 @@ public static int funSoEucjpToStatelessIso2022jp(byte[] statep, byte[] s, int sS
return 3;
}

private static int iso2022jp_put_state(byte[] sp, byte[] o, int oldstate, int newstate, int oStart) {
if (oldstate != newstate) {
o[oStart++] = 0x1b;
switch (newstate) {
case G0_ASCII:
o[oStart++] = '(';
o[oStart++] = 'B';
break;
case G0_JISX0201_KATAKANA:
o[oStart++] = '(';
o[oStart++] = 'I';
break;
case G0_JISX0208_1978:
o[oStart++] = '$';
o[oStart++] = '@';
break;
default:
o[oStart++] = '$';
o[oStart++] = 'B';
break;
}
sp[0] = (byte) newstate;
}
return oStart;
}

public static int funSoIso2022jpEncoder(byte[] statep, byte[] s, int sStart, int l, byte[] o, int oStart, int oSize) {
byte[] sp = statep;
int output0 = oStart;
Expand All @@ -751,24 +757,7 @@ else if (toUnsignedInt(s[sStart]) == EMACS_MULE_LEADING_CODE_JISX0208_1978)
else
newstate = G0_JISX0208_1983;

if (sp[0] != newstate) {
if (newstate == G0_ASCII) {
o[oStart++] = 0x1b;
o[oStart++] = '(';
o[oStart++] = 'B';
}
else if (newstate == G0_JISX0208_1978) {
o[oStart++] = 0x1b;
o[oStart++] = '$';
o[oStart++] = '@';
}
else {
o[oStart++] = 0x1b;
o[oStart++] = '$';
o[oStart++] = 'B';
}
sp[0] = (byte)newstate;
}
oStart = iso2022jp_put_state(sp, o, (int)sp[0], newstate, oStart);

if (l == 1) {
o[oStart++] = (byte)(s[sStart] & 0x7f);
Expand All @@ -787,10 +776,8 @@ public static int finishIso2022jpEncoder(byte[] statep, byte[] o, int oStart, in

if (sp[0] == G0_ASCII) return 0;

o[oStart++] = 0x1b;
o[oStart++] = '(';
o[oStart++] = 'B';
sp[0] = G0_ASCII;

oStart = iso2022jp_put_state(sp, o, (int)sp[0], G0_ASCII, oStart);

return oStart - output0;
}
Expand Down Expand Up @@ -960,24 +947,7 @@ else if (s0 == EMACS_MULE_LEADING_CODE_JISX0208_1978)
else
newstate = G0_JISX0208_1983;

if (sp[0] != newstate) {
o[oStart++] = 0x1b;
switch (newstate) {
case G0_ASCII:
o[oStart++] = '(';
o[oStart++] = 'B';
break;
case G0_JISX0208_1978:
o[oStart++] = '$';
o[oStart++] = '@';
break;
default:
o[oStart++] = '$';
o[oStart++] = 'B';
break;
}
sp[0] = (byte)newstate;
}
oStart = iso2022jp_put_state(sp, o, (int)sp[0], G0_ASCII, oStart);

if (l == 1) {
o[oStart++] = (byte)(s0 & 0x7f);
Expand All @@ -999,10 +969,8 @@ public static int finishIso2022jpKddiEncoder(byte[] statep, byte[] o, int oStart

if (sp[0] == G0_ASCII) return 0;

o[oStart++] = 0x1b;
o[oStart++] = '(';
o[oStart++] = 'B';
sp[0] = G0_ASCII;

oStart = iso2022jp_put_state(sp, o, (int)sp[0], G0_ASCII, oStart);

return oStart - output0;
}
Expand Down
24 changes: 24 additions & 0 deletions test/org/jcodings/transcode/TestCP51932ToCP50220.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package org.jcodings.transcode;

import org.jcodings.Ptr;
import org.junit.Test;
import org.junit.Assert;
import java.util.Arrays;

public class TestCP51932ToCP50220 {
@Test
public void testCP51932ToCP50220() throws Exception {
byte[] src = "\u008E\u00A1\u008E\u00FE".getBytes("iso-8859-1");
byte[] dst = new byte[100];
Ptr srcPtr = new Ptr(0);
Ptr dstPtr = new Ptr(0);
EConv econv = TranscoderDB.open("CP51932", "CP50220", 0);
econv.convert(src, srcPtr, src.length, dst, dstPtr, dst.length, 0);

byte[] str = Arrays.copyOf(dst, dstPtr.p);

byte[] expected = "\u001B\u0024\u0042\u0021\u0023\u001B\u0028\u0049\u007E\u001B\u0028\u0042".getBytes("iso-8859-1");
byte[] actual = Arrays.copyOf(dst, dstPtr.p);
Assert.assertEquals(new String(expected), new String(actual));
}
}

0 comments on commit 40844c0

Please sign in to comment.