THRIFT-2641 Improvements to Haskell Compiler/Libraries
- test/test.sh integration
- add json and compact protocol
This closes #175
Signed-off-by: Roger Meier <roger@apache.org>
diff --git a/lib/hs/src/Thrift/Protocol/Binary.hs b/lib/hs/src/Thrift/Protocol/Binary.hs
index 1bc9add..ac78483 100644
--- a/lib/hs/src/Thrift/Protocol/Binary.hs
+++ b/lib/hs/src/Thrift/Protocol/Binary.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE ExistentialQuantification #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE OverloadedStrings #-}
--
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
@@ -20,145 +17,169 @@
-- under the License.
--
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
module Thrift.Protocol.Binary
( module Thrift.Protocol
, BinaryProtocol(..)
) where
import Control.Exception ( throw )
-import Control.Monad ( liftM )
-
-import qualified Data.Binary
+import Control.Monad
import Data.Bits
+import Data.ByteString.Lazy.Builder
+import Data.Functor
import Data.Int
+import Data.Monoid
import Data.Text.Lazy.Encoding ( decodeUtf8, encodeUtf8 )
-import GHC.Exts
-import GHC.Word
-
import Thrift.Protocol
import Thrift.Transport
+import Thrift.Types
+import qualified Data.Attoparsec.ByteString as P
+import qualified Data.Attoparsec.ByteString.Lazy as LP
+import qualified Data.Binary as Binary
import qualified Data.ByteString.Lazy as LBS
+import qualified Data.HashMap.Strict as Map
+import qualified Data.Text.Lazy as LT
-version_mask :: Int32
-version_mask = 0xffff0000
+data BinaryProtocol a = BinaryProtocol a
-version_1 :: Int32
-version_1 = 0x80010000
-
-data BinaryProtocol a = Transport a => BinaryProtocol a
-
-
+-- NOTE: Reading and Writing functions rely on Builders and Data.Binary to
+-- encode and decode data. Data.Binary assumes that the binary values it is
+-- encoding to and decoding from are in BIG ENDIAN format, and converts the
+-- endianness as necessary to match the local machine.
instance Protocol BinaryProtocol where
getTransport (BinaryProtocol t) = t
- writeMessageBegin p (n, t, s) = do
- writeI32 p (version_1 .|. (fromIntegral $ fromEnum t))
- writeString p n
- writeI32 p s
- writeMessageEnd _ = return ()
+ writeMessageBegin p (n, t, s) = tWrite (getTransport p) $ toLazyByteString $
+ buildBinaryValue (TI32 (version1 .|. fromIntegral (fromEnum t))) <>
+ buildBinaryValue (TString $ encodeUtf8 n) <>
+ buildBinaryValue (TI32 s)
- writeStructBegin _ _ = return ()
- writeStructEnd _ = return ()
- writeFieldBegin p (_, t, i) = writeType p t >> writeI16 p i
- writeFieldEnd _ = return ()
- writeFieldStop p = writeType p T_STOP
- writeMapBegin p (k, v, n) = writeType p k >> writeType p v >> writeI32 p n
- writeMapEnd _ = return ()
- writeListBegin p (t, n) = writeType p t >> writeI32 p n
- writeListEnd _ = return ()
- writeSetBegin p (t, n) = writeType p t >> writeI32 p n
- writeSetEnd _ = return ()
+ readMessageBegin p = runParser p $ do
+ TI32 ver <- parseBinaryValue T_I32
+ if ver .&. versionMask /= version1
+ then throw $ ProtocolExn PE_BAD_VERSION "Missing version identifier"
+ else do
+ TString s <- parseBinaryValue T_STRING
+ TI32 sz <- parseBinaryValue T_I32
+ return (decodeUtf8 s, toEnum $ fromIntegral $ ver .&. 0xFF, sz)
- writeBool p b = tWrite (getTransport p) $ LBS.singleton $ toEnum $ if b then 1 else 0
- writeByte p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeI16 p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeI32 p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeI64 p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeDouble p d = writeI64 p (fromIntegral $ floatBits d)
- writeString p s = writeI32 p (fromIntegral $ LBS.length s') >> tWrite (getTransport p) s'
- where
- s' = encodeUtf8 s
- writeBinary p s = writeI32 p (fromIntegral $ LBS.length s) >> tWrite (getTransport p) s
+ serializeVal _ = toLazyByteString . buildBinaryValue
+ deserializeVal _ ty bs =
+ case LP.eitherResult $ LP.parse (parseBinaryValue ty) bs of
+ Left s -> error s
+ Right val -> val
- readMessageBegin p = do
- ver <- readI32 p
- if (ver .&. version_mask /= version_1)
- then throw $ ProtocolExn PE_BAD_VERSION "Missing version identifier"
- else do
- s <- readString p
- sz <- readI32 p
- return (s, toEnum $ fromIntegral $ ver .&. 0xFF, sz)
- readMessageEnd _ = return ()
- readStructBegin _ = return ""
- readStructEnd _ = return ()
- readFieldBegin p = do
- t <- readType p
- n <- if t /= T_STOP then readI16 p else return 0
- return ("", t, n)
- readFieldEnd _ = return ()
- readMapBegin p = do
- kt <- readType p
- vt <- readType p
- n <- readI32 p
- return (kt, vt, n)
- readMapEnd _ = return ()
- readListBegin p = do
- t <- readType p
- n <- readI32 p
- return (t, n)
- readListEnd _ = return ()
- readSetBegin p = do
- t <- readType p
- n <- readI32 p
- return (t, n)
- readSetEnd _ = return ()
+ readVal p = runParser p . parseBinaryValue
- readBool p = (== 1) `fmap` readByte p
+-- | Writing Functions
+buildBinaryValue :: ThriftVal -> Builder
+buildBinaryValue (TStruct fields) = buildBinaryStruct fields <> buildType T_STOP
+buildBinaryValue (TMap ky vt entries) =
+ buildType ky <>
+ buildType vt <>
+ int32BE (fromIntegral (length entries)) <>
+ buildBinaryMap entries
+buildBinaryValue (TList ty entries) =
+ buildType ty <>
+ int32BE (fromIntegral (length entries)) <>
+ buildBinaryList entries
+buildBinaryValue (TSet ty entries) =
+ buildType ty <>
+ int32BE (fromIntegral (length entries)) <>
+ buildBinaryList entries
+buildBinaryValue (TBool b) =
+ word8 $ toEnum $ if b then 1 else 0
+buildBinaryValue (TByte b) = int8 b
+buildBinaryValue (TI16 i) = int16BE i
+buildBinaryValue (TI32 i) = int32BE i
+buildBinaryValue (TI64 i) = int64BE i
+buildBinaryValue (TDouble d) = doubleBE d
+buildBinaryValue (TString s) = int32BE len <> lazyByteString s
+ where
+ len :: Int32 = fromIntegral (LBS.length s)
- readByte p = do
- bs <- tReadAll (getTransport p) 1
- return $ Data.Binary.decode bs
+buildBinaryStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder
+buildBinaryStruct = Map.foldrWithKey combine mempty
+ where
+ combine fid (_,val) s =
+ buildTypeOf val <> int16BE fid <> buildBinaryValue val <> s
- readI16 p = do
- bs <- tReadAll (getTransport p) 2
- return $ Data.Binary.decode bs
+buildBinaryMap :: [(ThriftVal, ThriftVal)] -> Builder
+buildBinaryMap = foldl combine mempty
+ where
+ combine s (key, val) = s <> buildBinaryValue key <> buildBinaryValue val
- readI32 p = do
- bs <- tReadAll (getTransport p) 4
- return $ Data.Binary.decode bs
+buildBinaryList :: [ThriftVal] -> Builder
+buildBinaryList = foldr (mappend . buildBinaryValue) mempty
- readI64 p = do
- bs <- tReadAll (getTransport p) 8
- return $ Data.Binary.decode bs
+-- | Reading Functions
+parseBinaryValue :: ThriftType -> P.Parser ThriftVal
+parseBinaryValue (T_STRUCT _) = TStruct <$> parseBinaryStruct
+parseBinaryValue (T_MAP _ _) = do
+ kt <- parseType
+ vt <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TMap kt vt <$> parseBinaryMap kt vt n
+parseBinaryValue (T_LIST _) = do
+ t <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TList t <$> parseBinaryList t n
+parseBinaryValue (T_SET _) = do
+ t <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TSet t <$> parseBinaryList t n
+parseBinaryValue T_BOOL = TBool . (/=0) <$> P.anyWord8
+parseBinaryValue T_BYTE = TByte . Binary.decode . LBS.fromStrict <$> P.take 1
+parseBinaryValue T_I16 = TI16 . Binary.decode . LBS.fromStrict <$> P.take 2
+parseBinaryValue T_I32 = TI32 . Binary.decode . LBS.fromStrict <$> P.take 4
+parseBinaryValue T_I64 = TI64 . Binary.decode . LBS.fromStrict <$> P.take 8
+parseBinaryValue T_DOUBLE = TDouble . bsToDouble <$> P.take 8
+parseBinaryValue T_STRING = do
+ i :: Int32 <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TString . LBS.fromStrict <$> P.take (fromIntegral i)
+parseBinaryValue ty = error $ "Cannot read value of type " ++ show ty
- readDouble p = do
- bs <- readI64 p
- return $ floatOfBits $ fromIntegral bs
+parseBinaryStruct :: P.Parser (Map.HashMap Int16 (LT.Text, ThriftVal))
+parseBinaryStruct = Map.fromList <$> P.manyTill parseField (matchType T_STOP)
+ where
+ parseField = do
+ t <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 2
+ v <- parseBinaryValue t
+ return (n, ("", v))
- readString p = do
- i <- readI32 p
- decodeUtf8 `liftM` tReadAll (getTransport p) (fromIntegral i)
+parseBinaryMap :: ThriftType -> ThriftType -> Int32 -> P.Parser [(ThriftVal, ThriftVal)]
+parseBinaryMap kt vt n | n <= 0 = return []
+ | otherwise = do
+ k <- parseBinaryValue kt
+ v <- parseBinaryValue vt
+ ((k,v) :) <$> parseBinaryMap kt vt (n-1)
- readBinary p = do
- i <- readI32 p
- tReadAll (getTransport p) (fromIntegral i)
+parseBinaryList :: ThriftType -> Int32 -> P.Parser [ThriftVal]
+parseBinaryList ty n | n <= 0 = return []
+ | otherwise = liftM2 (:) (parseBinaryValue ty)
+ (parseBinaryList ty (n-1))
+
-- | Write a type as a byte
-writeType :: (Protocol p, Transport t) => p t -> ThriftType -> IO ()
-writeType p t = writeByte p (fromIntegral $ fromEnum t)
+buildType :: ThriftType -> Builder
+buildType t = word8 $ fromIntegral $ fromEnum t
+
+-- | Write type of a ThriftVal as a byte
+buildTypeOf :: ThriftVal -> Builder
+buildTypeOf = buildType . getTypeOf
-- | Read a byte as though it were a ThriftType
-readType :: (Protocol p, Transport t) => p t -> IO ThriftType
-readType p = do
- b <- readByte p
- return $ toEnum $ fromIntegral b
+parseType :: P.Parser ThriftType
+parseType = toEnum . fromIntegral <$> P.anyWord8
-floatBits :: Double -> Word64
-floatBits (D# d#) = W64# (unsafeCoerce# d#)
-
-floatOfBits :: Word64 -> Double
-floatOfBits (W64# b#) = D# (unsafeCoerce# b#)
-
+matchType :: ThriftType -> P.Parser ThriftType
+matchType t = t <$ P.word8 (fromIntegral $ fromEnum t)