THRIFT-765. java: Revert the changes applied by THRIFT-765, as they appear to be unstable

git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@940013 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/java/src/org/apache/thrift/Utf8Helper.java b/lib/java/src/org/apache/thrift/Utf8Helper.java
deleted file mode 100644
index 2d3fd26..0000000
--- a/lib/java/src/org/apache/thrift/Utf8Helper.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package org.apache.thrift;
-
-public final class Utf8Helper {
-  private Utf8Helper() {}
-
-  public static final int getByteLength(final String s) {
-    int byteLength = 0;
-    int codePoint;
-    for (int i = 0; i < s.length(); i++) {
-      codePoint = s.charAt(i);
-      if (codePoint >= 0x07FF) {
-        codePoint = s.codePointAt(i);
-        if (Character.isSupplementaryCodePoint(codePoint)) {
-          i++;
-        }
-      }
-      if (codePoint >= 0 && codePoint <= 0x007F) {
-        byteLength++;
-      } else if (codePoint >= 0x80 && codePoint <= 0x07FF) {
-        byteLength += 2;
-      } else if ((codePoint >= 0x0800 && codePoint < 0xD800) || (codePoint > 0xDFFF && codePoint <= 0xFFFD)) {
-        byteLength+=3;
-      } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) {
-        byteLength+=4;
-      } else {
-        throw new RuntimeException("Unknown unicode codepoint in string! "
-            + Integer.toHexString(codePoint));
-      }
-    }
-    return byteLength;
-  }
-
-  public static byte[] encode(String s) {
-    byte[] buf = new byte[getByteLength(s)];
-    encode(s, buf, 0);
-    return buf;
-  }
-
-  public static void encode(final String s, final byte[] buf, final int offset) {
-    int nextByte = 0;
-    int codePoint;
-    final int strLen = s.length();
-    for (int i = 0; i < strLen; i++) {
-      codePoint = s.charAt(i);
-      if (codePoint >= 0x07FF) {
-        codePoint = s.codePointAt(i);
-        if (Character.isSupplementaryCodePoint(codePoint)) {
-          i++;
-        }
-      }
-      if (codePoint <= 0x007F) {
-        buf[offset + nextByte] = (byte)codePoint;
-        nextByte++;
-      } else if (codePoint <= 0x7FF) {
-        buf[offset + nextByte    ] = (byte)(0xC0 | ((codePoint >> 6) & 0x1F));
-        buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 0) & 0x3F));
-        nextByte+=2;
-      } else if ((codePoint < 0xD800) || (codePoint > 0xDFFF && codePoint <= 0xFFFD)) {
-        buf[offset + nextByte    ] = (byte)(0xE0 | ((codePoint >> 12) & 0x0F));
-        buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >>  6) & 0x3F));
-        buf[offset + nextByte + 2] = (byte)(0x80 | ((codePoint >>  0) & 0x3F));
-        nextByte+=3;
-      } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) {
-        buf[offset + nextByte    ] = (byte)(0xF0 | ((codePoint >> 18) & 0x07));
-        buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 12) & 0x3F));
-        buf[offset + nextByte + 2] = (byte)(0x80 | ((codePoint >>  6) & 0x3F));
-        buf[offset + nextByte + 3] = (byte)(0x80 | ((codePoint >>  0) & 0x3F));
-        nextByte+=4;
-      } else {
-        throw new RuntimeException("Unknown unicode codepoint in string! "
-            + Integer.toHexString(codePoint));
-      }
-    }
-  }
-
-  public static String decode(byte[] buf) {
-    char[] charBuf = new char[buf.length];
-    int charsDecoded = decode(buf, 0, buf.length, charBuf);
-    return new String(charBuf, 0, charsDecoded);
-  }
-
-  public static final int UNI_SUR_HIGH_START = 0xD800;
-  public static final int UNI_SUR_HIGH_END = 0xDBFF;
-  public static final int UNI_SUR_LOW_START = 0xDC00;
-  public static final int UNI_SUR_LOW_END = 0xDFFF;
-  public static final int UNI_REPLACEMENT_CHAR = 0xFFFD;
-
-  private static final int HALF_BASE = 0x0010000;
-  private static final long HALF_SHIFT = 10;
-  private static final long HALF_MASK = 0x3FFL;
-
-  public static int decode(final byte[] buf, final int offset, final int byteLength, final char[] charBuf) {
-    int curByteIdx = offset;
-    int endByteIdx = offset + byteLength;
-
-    int curCharIdx = 0;
-
-    while (curByteIdx < endByteIdx) {
-      final int b = buf[curByteIdx++]&0xff;
-      final int ch;
-
-      if (b < 0xC0) {
-        ch = b;
-      } else if (b < 0xE0) {
-        ch = ((b & 0x1F) << 6) + (buf[curByteIdx++] & 0x3F);
-      } else if (b < 0xf0) {
-        ch = ((b & 0xF) << 12) + ((buf[curByteIdx++] & 0x3F) << 6) + (buf[curByteIdx++] & 0x3F);
-      } else {
-        ch = ((b & 0x7) << 18) + ((buf[curByteIdx++]& 0x3F) << 12) + ((buf[curByteIdx++] & 0x3F) << 6) + (buf[curByteIdx++] & 0x3F);
-      }
-
-      if (ch <= 0xFFFF) {
-        // target is a character <= 0xFFFF
-        charBuf[curCharIdx++] = (char) ch;
-      } else {
-        // target is a character in range 0xFFFF - 0x10FFFF
-        final int chHalf = ch - HALF_BASE;
-        charBuf[curCharIdx++] = (char) ((chHalf >> HALF_SHIFT) + UNI_SUR_HIGH_START);
-        charBuf[curCharIdx++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START);
-      }
-    }
-    return curCharIdx;
-  }
-}
diff --git a/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java b/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java
index 9e76348..1cfa69d 100644
--- a/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java
+++ b/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java
@@ -19,8 +19,9 @@
 
 package org.apache.thrift.protocol;
 
+import java.io.UnsupportedEncodingException;
+
 import org.apache.thrift.TException;
-import org.apache.thrift.Utf8Helper;
 import org.apache.thrift.transport.TTransport;
 
 /**
@@ -179,9 +180,13 @@
   }
 
   public void writeString(String str) throws TException {
-    byte[] dat = Utf8Helper.encode(str);
-    writeI32(dat.length);
-    trans_.write(dat, 0, dat.length);
+    try {
+      byte[] dat = str.getBytes("UTF-8");
+      writeI32(dat.length);
+      trans_.write(dat, 0, dat.length);
+    } catch (UnsupportedEncodingException uex) {
+      throw new TException("JVM DOES NOT SUPPORT UTF-8");
+    }
   }
 
   public void writeBinary(byte[] bin) throws TException {
@@ -328,20 +333,27 @@
     int size = readI32();
 
     if (trans_.getBytesRemainingInBuffer() >= size) {
-      char[] charBuf = new char[size];
-      int charsDecoded = Utf8Helper.decode(trans_.getBuffer(), trans_.getBufferPosition(), size, charBuf);
-      trans_.consumeBuffer(size);
-      return new String(charBuf, 0, charsDecoded);
+      try {
+        String s = new String(trans_.getBuffer(), trans_.getBufferPosition(), size, "UTF-8");
+        trans_.consumeBuffer(size);
+        return s;
+      } catch (UnsupportedEncodingException e) {
+        throw new TException("JVM DOES NOT SUPPORT UTF-8");
+      }
     }
 
     return readStringBody(size);
   }
 
   public String readStringBody(int size) throws TException {
-    checkReadLength(size);
-    byte[] buf = new byte[size];
-    trans_.readAll(buf, 0, size);
-    return Utf8Helper.decode(buf);
+    try {
+      checkReadLength(size);
+      byte[] buf = new byte[size];
+      trans_.readAll(buf, 0, size);
+      return new String(buf, "UTF-8");
+    } catch (UnsupportedEncodingException uex) {
+      throw new TException("JVM DOES NOT SUPPORT UTF-8");
+    }
   }
 
   public byte[] readBinary() throws TException {
diff --git a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
index e81ed82..f497942 100755
--- a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
+++ b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
@@ -20,9 +20,10 @@
 
 package org.apache.thrift.protocol;
 
+import java.io.UnsupportedEncodingException;
+
 import org.apache.thrift.ShortStack;
 import org.apache.thrift.TException;
-import org.apache.thrift.Utf8Helper;
 import org.apache.thrift.transport.TTransport;
 
 /**
@@ -292,7 +293,11 @@
    * Write a string to the wire with a varint size preceeding.
    */
   public void writeString(String str) throws TException {
-    writeBinary(Utf8Helper.encode(str));
+    try {
+      writeBinary(str.getBytes("UTF-8"));
+    } catch (UnsupportedEncodingException e) {
+      throw new TException("UTF-8 not supported!");
+    }
   }
 
   /**
@@ -605,13 +610,16 @@
       return "";
     }
 
-    if (trans_.getBytesRemainingInBuffer() >= length) {
-      char[] charBuf = new char[length];
-      int charsDecoded = Utf8Helper.decode(trans_.getBuffer(), trans_.getBufferPosition(), length, charBuf);
-      trans_.consumeBuffer(length);
-      return new String(charBuf, 0, charsDecoded);
-    } else {
-      return Utf8Helper.decode(readBinary(length));
+    try {
+      if (trans_.getBytesRemainingInBuffer() >= length) {
+        String str = new String(trans_.getBuffer(), trans_.getBufferPosition(), length, "UTF-8");
+        trans_.consumeBuffer(length);
+        return str;
+      } else {
+        return new String(readBinary(length), "UTF-8");
+      }
+    } catch (UnsupportedEncodingException e) {
+      throw new TException("UTF-8 not supported!");
     }
   }
 
diff --git a/lib/java/test/org/apache/thrift/BenchStringEncoding.java b/lib/java/test/org/apache/thrift/BenchStringEncoding.java
deleted file mode 100644
index 3ae22c7..0000000
--- a/lib/java/test/org/apache/thrift/BenchStringEncoding.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.apache.thrift;
-
-import java.io.UnsupportedEncodingException;
-
-public class BenchStringEncoding {
-  private static final String STRING = "a moderately long (but not overly long) string";
-  private static final int HOW_MANY = 100000;
-  private static final byte[] BYTES;
-  static {
-    try {
-      BYTES = STRING.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public static void main(String[] args) throws UnsupportedEncodingException {
-    for (int trial = 0; trial < 5; trial++) {
-      benchGetBytes();
-      benchFromBytes();
-      benchEncode();
-      benchDecode();
-    }
-  }
-
-  private static void benchDecode() {
-    char[] charBuf = new char[256];
-    long start = System.currentTimeMillis();
-    for (int i = 0; i < HOW_MANY; i++) {
-      Utf8Helper.decode(BYTES, 0, BYTES.length, charBuf);
-    }
-    long end = System.currentTimeMillis();
-    System.out.println("decode: decode: " + (end-start) + "ms");
-  }
-
-  private static void benchFromBytes() {
-    long start = System.currentTimeMillis();
-    for (int i = 0; i < HOW_MANY; i++) {
-      try {
-        new String(BYTES, "UTF-8");
-      } catch (UnsupportedEncodingException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    long end = System.currentTimeMillis();
-    System.out.println("decode: fromBytes: " + (end-start) + "ms");
-  }
-
-  private static void benchEncode() {
-    long start = System.currentTimeMillis();
-    byte[] outbuf = new byte[256];
-    for (int i = 0; i < HOW_MANY; i++) {
-      Utf8Helper.encode(STRING, outbuf, 0);
-    }
-    long end = System.currentTimeMillis();
-    System.out.println("encode: directEncode: " + (end-start) + "ms");
-  }
-
-  private static void benchGetBytes() throws UnsupportedEncodingException {
-    long start = System.currentTimeMillis();
-    for (int i = 0; i < HOW_MANY; i++) {
-      STRING.getBytes("UTF-8");
-    }
-    long end = System.currentTimeMillis();
-    System.out.println("encode: getBytes(UTF-8): " + (end-start) + "ms");
-  }
-}
diff --git a/lib/java/test/org/apache/thrift/TestUtf8Helper.java b/lib/java/test/org/apache/thrift/TestUtf8Helper.java
deleted file mode 100644
index bdfd35a..0000000
--- a/lib/java/test/org/apache/thrift/TestUtf8Helper.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package org.apache.thrift;
-
-import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
-
-import junit.framework.TestCase;
-
-public class TestUtf8Helper extends TestCase {
-  private static final String NON_UNICODE_STRING = "here's some text";
-
-  private static final byte[] kUnicodeBytes = {
-    (byte)0xd3, (byte)0x80, (byte)0xe2, (byte)0x85, (byte)0xae, (byte)0xce,
-    (byte)0x9d, (byte)0x20, (byte)0xd0, (byte)0x9d, (byte)0xce, (byte)0xbf,
-    (byte)0xe2, (byte)0x85, (byte)0xbf, (byte)0xd0, (byte)0xbe, (byte)0xc9,
-    (byte)0xa1, (byte)0xd0, (byte)0xb3, (byte)0xd0, (byte)0xb0, (byte)0xcf,
-    (byte)0x81, (byte)0xe2, (byte)0x84, (byte)0x8e, (byte)0x20, (byte)0xce,
-    (byte)0x91, (byte)0x74, (byte)0x74, (byte)0xce, (byte)0xb1, (byte)0xe2,
-    (byte)0x85, (byte)0xbd, (byte)0xce, (byte)0xba, (byte)0x83, (byte)0xe2,
-    (byte)0x80, (byte)0xbc
-  };
-
-  private static final String UNICODE_STRING = "abc\u5639\u563b";
-  private static final byte[] UNICODE_STRING_BYTES;
-
-  private static final String UNICODE_STRING_2;
-  private static final byte[] UNICODE_STRING_BYTES_2;
-
-  private static final String REALLY_WHACKY_ONE = "\uD841\uDC91";
-  private static final byte[] REALLY_WHACKY_ONE_BYTES;
-
-  private static final String TWO_CHAR_CHAR = "\uD801\uDC00";
-  private static final byte[] TWO_CHAR_CHAR_BYTES;
-
-  static {
-    try {
-      UNICODE_STRING_BYTES = UNICODE_STRING.getBytes("UTF-8");
-      UNICODE_STRING_2 = new String(kUnicodeBytes, "UTF-8");
-      UNICODE_STRING_BYTES_2 = UNICODE_STRING_2.getBytes("UTF-8");
-      REALLY_WHACKY_ONE_BYTES = REALLY_WHACKY_ONE.getBytes("UTF-8");
-      TWO_CHAR_CHAR_BYTES = TWO_CHAR_CHAR.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-
-  public void testEncode() throws Exception {
-    byte[] bytes = NON_UNICODE_STRING.getBytes("UTF-8");
-    byte[] otherBytes = Utf8Helper.encode(NON_UNICODE_STRING);
-    assertTrue(Arrays.equals(bytes, otherBytes));
-
-    otherBytes = Utf8Helper.encode(UNICODE_STRING);
-    assertTrue(Arrays.equals(UNICODE_STRING_BYTES, otherBytes));
-
-    otherBytes = Utf8Helper.encode(UNICODE_STRING_2);
-    assertTrue(Arrays.equals(UNICODE_STRING_BYTES_2, otherBytes));
-
-    otherBytes = Utf8Helper.encode(REALLY_WHACKY_ONE);
-    assertTrue(Arrays.equals(REALLY_WHACKY_ONE_BYTES, otherBytes));
-
-    otherBytes = Utf8Helper.encode(TWO_CHAR_CHAR);
-    assertTrue(Arrays.equals(TWO_CHAR_CHAR_BYTES, otherBytes));
-  }
-
-  public void testDecode() throws Exception {
-    byte[] bytes = NON_UNICODE_STRING.getBytes("UTF-8");
-    assertEquals(NON_UNICODE_STRING, Utf8Helper.decode(bytes));
-
-    assertEquals(UNICODE_STRING, Utf8Helper.decode(UNICODE_STRING_BYTES));
-    assertEquals(UNICODE_STRING_2, Utf8Helper.decode(UNICODE_STRING_BYTES_2));
-    assertEquals(REALLY_WHACKY_ONE, Utf8Helper.decode(REALLY_WHACKY_ONE_BYTES));
-    assertEquals(TWO_CHAR_CHAR, Utf8Helper.decode(TWO_CHAR_CHAR_BYTES));
-  }
-}