THRIFT-2641 Improvements to Haskell Compiler/Libraries

- test/test.sh integration
- add json and compact protocol

This closes #175

Signed-off-by: Roger Meier <roger@apache.org>
diff --git a/lib/hs/src/Thrift/Protocol/Binary.hs b/lib/hs/src/Thrift/Protocol/Binary.hs
index 1bc9add..ac78483 100644
--- a/lib/hs/src/Thrift/Protocol/Binary.hs
+++ b/lib/hs/src/Thrift/Protocol/Binary.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE ExistentialQuantification #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE OverloadedStrings #-}
 --
 -- Licensed to the Apache Software Foundation (ASF) under one
 -- or more contributor license agreements. See the NOTICE file
@@ -20,145 +17,169 @@
 -- under the License.
 --
 
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
 module Thrift.Protocol.Binary
     ( module Thrift.Protocol
     , BinaryProtocol(..)
     ) where
 
 import Control.Exception ( throw )
-import Control.Monad ( liftM )
-
-import qualified Data.Binary
+import Control.Monad
 import Data.Bits
+import Data.ByteString.Lazy.Builder
+import Data.Functor
 import Data.Int
+import Data.Monoid
 import Data.Text.Lazy.Encoding ( decodeUtf8, encodeUtf8 )
 
-import GHC.Exts
-import GHC.Word
-
 import Thrift.Protocol
 import Thrift.Transport
+import Thrift.Types
 
+import qualified Data.Attoparsec.ByteString as P
+import qualified Data.Attoparsec.ByteString.Lazy as LP
+import qualified Data.Binary as Binary
 import qualified Data.ByteString.Lazy as LBS
+import qualified Data.HashMap.Strict as Map
+import qualified Data.Text.Lazy as LT
 
-version_mask :: Int32
-version_mask = 0xffff0000
+data BinaryProtocol a = BinaryProtocol a
 
-version_1 :: Int32
-version_1    = 0x80010000
-
-data BinaryProtocol a = Transport a => BinaryProtocol a
-
-
+-- NOTE: Reading and Writing functions rely on Builders and Data.Binary to
+-- encode and decode data.  Data.Binary assumes that the binary values it is
+-- encoding to and decoding from are in BIG ENDIAN format, and converts the
+-- endianness as necessary to match the local machine.
 instance Protocol BinaryProtocol where
     getTransport (BinaryProtocol t) = t
 
-    writeMessageBegin p (n, t, s) = do
-        writeI32 p (version_1 .|. (fromIntegral $ fromEnum t))
-        writeString p n
-        writeI32 p s
-    writeMessageEnd _ = return ()
+    writeMessageBegin p (n, t, s) = tWrite (getTransport p) $ toLazyByteString $
+        buildBinaryValue (TI32 (version1 .|. fromIntegral (fromEnum t))) <>
+        buildBinaryValue (TString $ encodeUtf8 n) <>
+        buildBinaryValue (TI32 s)
 
-    writeStructBegin _ _ = return ()
-    writeStructEnd _ = return ()
-    writeFieldBegin p (_, t, i) = writeType p t >> writeI16 p i
-    writeFieldEnd _ = return ()
-    writeFieldStop p = writeType p T_STOP
-    writeMapBegin p (k, v, n) = writeType p k >> writeType p v >> writeI32 p n
-    writeMapEnd _ = return ()
-    writeListBegin p (t, n) = writeType p t >> writeI32 p n
-    writeListEnd _ = return ()
-    writeSetBegin p (t, n) = writeType p t >> writeI32 p n
-    writeSetEnd _ = return ()
+    readMessageBegin p = runParser p $ do
+      TI32 ver <- parseBinaryValue T_I32
+      if ver .&. versionMask /= version1
+        then throw $ ProtocolExn PE_BAD_VERSION "Missing version identifier"
+        else do
+          TString s <- parseBinaryValue T_STRING
+          TI32 sz <- parseBinaryValue T_I32
+          return (decodeUtf8 s, toEnum $ fromIntegral $ ver .&. 0xFF, sz)
 
-    writeBool p b = tWrite (getTransport p) $ LBS.singleton $ toEnum $ if b then 1 else 0
-    writeByte p b = tWrite (getTransport p) $ Data.Binary.encode b
-    writeI16 p b = tWrite (getTransport p) $ Data.Binary.encode b
-    writeI32 p b = tWrite (getTransport p) $ Data.Binary.encode b
-    writeI64 p b = tWrite (getTransport p) $ Data.Binary.encode b
-    writeDouble p d = writeI64 p (fromIntegral $ floatBits d)
-    writeString p s = writeI32 p (fromIntegral $ LBS.length s') >> tWrite (getTransport p) s'
-      where
-        s' = encodeUtf8 s
-    writeBinary p s = writeI32 p (fromIntegral $ LBS.length s) >> tWrite (getTransport p) s
+    serializeVal _ = toLazyByteString . buildBinaryValue
+    deserializeVal _ ty bs =
+      case LP.eitherResult $ LP.parse (parseBinaryValue ty) bs of
+        Left s -> error s
+        Right val -> val
 
-    readMessageBegin p = do
-        ver <- readI32 p
-        if (ver .&. version_mask /= version_1)
-            then throw $ ProtocolExn PE_BAD_VERSION "Missing version identifier"
-            else do
-              s <- readString p
-              sz <- readI32 p
-              return (s, toEnum $ fromIntegral $ ver .&. 0xFF, sz)
-    readMessageEnd _ = return ()
-    readStructBegin _ = return ""
-    readStructEnd _ = return ()
-    readFieldBegin p = do
-        t <- readType p
-        n <- if t /= T_STOP then readI16 p else return 0
-        return ("", t, n)
-    readFieldEnd _ = return ()
-    readMapBegin p = do
-        kt <- readType p
-        vt <- readType p
-        n <- readI32 p
-        return (kt, vt, n)
-    readMapEnd _ = return ()
-    readListBegin p = do
-        t <- readType p
-        n <- readI32 p
-        return (t, n)
-    readListEnd _ = return ()
-    readSetBegin p = do
-        t <- readType p
-        n <- readI32 p
-        return (t, n)
-    readSetEnd _ = return ()
+    readVal p = runParser p . parseBinaryValue
 
-    readBool p = (== 1) `fmap` readByte p
+-- | Writing Functions
+buildBinaryValue :: ThriftVal -> Builder
+buildBinaryValue (TStruct fields) = buildBinaryStruct fields <> buildType T_STOP
+buildBinaryValue (TMap ky vt entries) =
+  buildType ky <>
+  buildType vt <>
+  int32BE (fromIntegral (length entries)) <>
+  buildBinaryMap entries
+buildBinaryValue (TList ty entries) =
+  buildType ty <>
+  int32BE (fromIntegral (length entries)) <>
+  buildBinaryList entries
+buildBinaryValue (TSet ty entries) =
+  buildType ty <>
+  int32BE (fromIntegral (length entries)) <>
+  buildBinaryList entries
+buildBinaryValue (TBool b) =
+  word8 $ toEnum $ if b then 1 else 0
+buildBinaryValue (TByte b) = int8 b
+buildBinaryValue (TI16 i) = int16BE i
+buildBinaryValue (TI32 i) = int32BE i
+buildBinaryValue (TI64 i) = int64BE i
+buildBinaryValue (TDouble d) = doubleBE d
+buildBinaryValue (TString s) = int32BE len <> lazyByteString s
+  where
+    len :: Int32 = fromIntegral (LBS.length s)
 
-    readByte p = do
-        bs <- tReadAll (getTransport p) 1
-        return $ Data.Binary.decode bs
+buildBinaryStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder
+buildBinaryStruct = Map.foldrWithKey combine mempty
+  where
+    combine fid (_,val) s =
+      buildTypeOf val <> int16BE fid <> buildBinaryValue val <> s
 
-    readI16 p = do
-        bs <- tReadAll (getTransport p) 2
-        return $ Data.Binary.decode bs
+buildBinaryMap :: [(ThriftVal, ThriftVal)] -> Builder
+buildBinaryMap = foldl combine mempty
+  where
+    combine s (key, val) = s <> buildBinaryValue key <> buildBinaryValue val
 
-    readI32 p = do
-        bs <- tReadAll (getTransport p) 4
-        return $ Data.Binary.decode bs
+buildBinaryList :: [ThriftVal] -> Builder
+buildBinaryList = foldr (mappend . buildBinaryValue) mempty
 
-    readI64 p = do
-        bs <- tReadAll (getTransport p) 8
-        return $ Data.Binary.decode bs
+-- | Reading Functions
+parseBinaryValue :: ThriftType -> P.Parser ThriftVal
+parseBinaryValue (T_STRUCT _) = TStruct <$> parseBinaryStruct
+parseBinaryValue (T_MAP _ _) = do
+  kt <- parseType
+  vt <- parseType
+  n <- Binary.decode . LBS.fromStrict <$> P.take 4
+  TMap kt vt <$> parseBinaryMap kt vt n
+parseBinaryValue (T_LIST _) = do
+  t <- parseType
+  n <- Binary.decode . LBS.fromStrict <$> P.take 4
+  TList t <$> parseBinaryList t n
+parseBinaryValue (T_SET _) = do
+  t <- parseType
+  n <- Binary.decode . LBS.fromStrict <$> P.take 4
+  TSet t <$> parseBinaryList t n
+parseBinaryValue T_BOOL = TBool . (/=0) <$> P.anyWord8
+parseBinaryValue T_BYTE = TByte . Binary.decode . LBS.fromStrict <$> P.take 1
+parseBinaryValue T_I16 = TI16 . Binary.decode . LBS.fromStrict <$> P.take 2
+parseBinaryValue T_I32 = TI32 . Binary.decode . LBS.fromStrict <$> P.take 4
+parseBinaryValue T_I64 = TI64 . Binary.decode . LBS.fromStrict <$> P.take 8
+parseBinaryValue T_DOUBLE = TDouble . bsToDouble <$> P.take 8
+parseBinaryValue T_STRING = do
+  i :: Int32  <- Binary.decode . LBS.fromStrict <$> P.take 4
+  TString . LBS.fromStrict <$> P.take (fromIntegral i)
+parseBinaryValue ty = error $ "Cannot read value of type " ++ show ty
 
-    readDouble p = do
-        bs <- readI64 p
-        return $ floatOfBits $ fromIntegral bs
+parseBinaryStruct :: P.Parser (Map.HashMap Int16 (LT.Text, ThriftVal))
+parseBinaryStruct = Map.fromList <$> P.manyTill parseField (matchType T_STOP)
+  where
+    parseField = do
+      t <- parseType
+      n <- Binary.decode . LBS.fromStrict <$> P.take 2
+      v <- parseBinaryValue t
+      return (n, ("", v))
 
-    readString p = do
-        i <- readI32 p
-        decodeUtf8 `liftM` tReadAll (getTransport p) (fromIntegral i)
+parseBinaryMap :: ThriftType -> ThriftType -> Int32 -> P.Parser [(ThriftVal, ThriftVal)]
+parseBinaryMap kt vt n | n <= 0 = return []
+                       | otherwise = do
+  k <- parseBinaryValue kt
+  v <- parseBinaryValue vt
+  ((k,v) :) <$> parseBinaryMap kt vt (n-1)
 
-    readBinary p = do
-        i <- readI32 p
-        tReadAll (getTransport p) (fromIntegral i)
+parseBinaryList :: ThriftType -> Int32 -> P.Parser [ThriftVal]
+parseBinaryList ty n | n <= 0 = return []
+                     | otherwise = liftM2 (:) (parseBinaryValue ty)
+                                   (parseBinaryList ty (n-1))
+
 
 
 -- | Write a type as a byte
-writeType :: (Protocol p, Transport t) => p t -> ThriftType -> IO ()
-writeType p t = writeByte p (fromIntegral $ fromEnum t)
+buildType :: ThriftType -> Builder
+buildType t = word8 $ fromIntegral $ fromEnum t
+
+-- | Write type of a ThriftVal as a byte
+buildTypeOf :: ThriftVal -> Builder
+buildTypeOf = buildType . getTypeOf
 
 -- | Read a byte as though it were a ThriftType
-readType :: (Protocol p, Transport t) => p t -> IO ThriftType
-readType p = do
-    b <- readByte p
-    return $ toEnum $ fromIntegral b
+parseType :: P.Parser ThriftType
+parseType = toEnum . fromIntegral <$> P.anyWord8
 
-floatBits :: Double -> Word64
-floatBits (D# d#) = W64# (unsafeCoerce# d#)
-
-floatOfBits :: Word64 -> Double
-floatOfBits (W64# b#) = D# (unsafeCoerce# b#)
-
+matchType :: ThriftType -> P.Parser ThriftType
+matchType t = t <$ P.word8 (fromIntegral $ fromEnum t)