THRIFT-2641 Improvements to Haskell Compiler/Libraries
- test/test.sh integration
- add json and compact protocol
This closes #175
Signed-off-by: Roger Meier <roger@apache.org>
diff --git a/lib/hs/src/Thrift/Protocol/Binary.hs b/lib/hs/src/Thrift/Protocol/Binary.hs
index 1bc9add..ac78483 100644
--- a/lib/hs/src/Thrift/Protocol/Binary.hs
+++ b/lib/hs/src/Thrift/Protocol/Binary.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE ExistentialQuantification #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE OverloadedStrings #-}
--
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
@@ -20,145 +17,169 @@
-- under the License.
--
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
module Thrift.Protocol.Binary
( module Thrift.Protocol
, BinaryProtocol(..)
) where
import Control.Exception ( throw )
-import Control.Monad ( liftM )
-
-import qualified Data.Binary
+import Control.Monad
import Data.Bits
+import Data.ByteString.Lazy.Builder
+import Data.Functor
import Data.Int
+import Data.Monoid
import Data.Text.Lazy.Encoding ( decodeUtf8, encodeUtf8 )
-import GHC.Exts
-import GHC.Word
-
import Thrift.Protocol
import Thrift.Transport
+import Thrift.Types
+import qualified Data.Attoparsec.ByteString as P
+import qualified Data.Attoparsec.ByteString.Lazy as LP
+import qualified Data.Binary as Binary
import qualified Data.ByteString.Lazy as LBS
+import qualified Data.HashMap.Strict as Map
+import qualified Data.Text.Lazy as LT
-version_mask :: Int32
-version_mask = 0xffff0000
+data BinaryProtocol a = BinaryProtocol a
-version_1 :: Int32
-version_1 = 0x80010000
-
-data BinaryProtocol a = Transport a => BinaryProtocol a
-
-
+-- NOTE: Reading and Writing functions rely on Builders and Data.Binary to
+-- encode and decode data. Data.Binary assumes that the binary values it is
+-- encoding to and decoding from are in BIG ENDIAN format, and converts the
+-- endianness as necessary to match the local machine.
instance Protocol BinaryProtocol where
getTransport (BinaryProtocol t) = t
- writeMessageBegin p (n, t, s) = do
- writeI32 p (version_1 .|. (fromIntegral $ fromEnum t))
- writeString p n
- writeI32 p s
- writeMessageEnd _ = return ()
+ writeMessageBegin p (n, t, s) = tWrite (getTransport p) $ toLazyByteString $
+ buildBinaryValue (TI32 (version1 .|. fromIntegral (fromEnum t))) <>
+ buildBinaryValue (TString $ encodeUtf8 n) <>
+ buildBinaryValue (TI32 s)
- writeStructBegin _ _ = return ()
- writeStructEnd _ = return ()
- writeFieldBegin p (_, t, i) = writeType p t >> writeI16 p i
- writeFieldEnd _ = return ()
- writeFieldStop p = writeType p T_STOP
- writeMapBegin p (k, v, n) = writeType p k >> writeType p v >> writeI32 p n
- writeMapEnd _ = return ()
- writeListBegin p (t, n) = writeType p t >> writeI32 p n
- writeListEnd _ = return ()
- writeSetBegin p (t, n) = writeType p t >> writeI32 p n
- writeSetEnd _ = return ()
+ readMessageBegin p = runParser p $ do
+ TI32 ver <- parseBinaryValue T_I32
+ if ver .&. versionMask /= version1
+ then throw $ ProtocolExn PE_BAD_VERSION "Missing version identifier"
+ else do
+ TString s <- parseBinaryValue T_STRING
+ TI32 sz <- parseBinaryValue T_I32
+ return (decodeUtf8 s, toEnum $ fromIntegral $ ver .&. 0xFF, sz)
- writeBool p b = tWrite (getTransport p) $ LBS.singleton $ toEnum $ if b then 1 else 0
- writeByte p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeI16 p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeI32 p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeI64 p b = tWrite (getTransport p) $ Data.Binary.encode b
- writeDouble p d = writeI64 p (fromIntegral $ floatBits d)
- writeString p s = writeI32 p (fromIntegral $ LBS.length s') >> tWrite (getTransport p) s'
- where
- s' = encodeUtf8 s
- writeBinary p s = writeI32 p (fromIntegral $ LBS.length s) >> tWrite (getTransport p) s
+ serializeVal _ = toLazyByteString . buildBinaryValue
+ deserializeVal _ ty bs =
+ case LP.eitherResult $ LP.parse (parseBinaryValue ty) bs of
+ Left s -> error s
+ Right val -> val
- readMessageBegin p = do
- ver <- readI32 p
- if (ver .&. version_mask /= version_1)
- then throw $ ProtocolExn PE_BAD_VERSION "Missing version identifier"
- else do
- s <- readString p
- sz <- readI32 p
- return (s, toEnum $ fromIntegral $ ver .&. 0xFF, sz)
- readMessageEnd _ = return ()
- readStructBegin _ = return ""
- readStructEnd _ = return ()
- readFieldBegin p = do
- t <- readType p
- n <- if t /= T_STOP then readI16 p else return 0
- return ("", t, n)
- readFieldEnd _ = return ()
- readMapBegin p = do
- kt <- readType p
- vt <- readType p
- n <- readI32 p
- return (kt, vt, n)
- readMapEnd _ = return ()
- readListBegin p = do
- t <- readType p
- n <- readI32 p
- return (t, n)
- readListEnd _ = return ()
- readSetBegin p = do
- t <- readType p
- n <- readI32 p
- return (t, n)
- readSetEnd _ = return ()
+ readVal p = runParser p . parseBinaryValue
- readBool p = (== 1) `fmap` readByte p
+-- | Writing Functions
+buildBinaryValue :: ThriftVal -> Builder
+buildBinaryValue (TStruct fields) = buildBinaryStruct fields <> buildType T_STOP
+buildBinaryValue (TMap ky vt entries) =
+ buildType ky <>
+ buildType vt <>
+ int32BE (fromIntegral (length entries)) <>
+ buildBinaryMap entries
+buildBinaryValue (TList ty entries) =
+ buildType ty <>
+ int32BE (fromIntegral (length entries)) <>
+ buildBinaryList entries
+buildBinaryValue (TSet ty entries) =
+ buildType ty <>
+ int32BE (fromIntegral (length entries)) <>
+ buildBinaryList entries
+buildBinaryValue (TBool b) =
+ word8 $ toEnum $ if b then 1 else 0
+buildBinaryValue (TByte b) = int8 b
+buildBinaryValue (TI16 i) = int16BE i
+buildBinaryValue (TI32 i) = int32BE i
+buildBinaryValue (TI64 i) = int64BE i
+buildBinaryValue (TDouble d) = doubleBE d
+buildBinaryValue (TString s) = int32BE len <> lazyByteString s
+ where
+ len :: Int32 = fromIntegral (LBS.length s)
- readByte p = do
- bs <- tReadAll (getTransport p) 1
- return $ Data.Binary.decode bs
+buildBinaryStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder
+buildBinaryStruct = Map.foldrWithKey combine mempty
+ where
+ combine fid (_,val) s =
+ buildTypeOf val <> int16BE fid <> buildBinaryValue val <> s
- readI16 p = do
- bs <- tReadAll (getTransport p) 2
- return $ Data.Binary.decode bs
+buildBinaryMap :: [(ThriftVal, ThriftVal)] -> Builder
+buildBinaryMap = foldl combine mempty
+ where
+ combine s (key, val) = s <> buildBinaryValue key <> buildBinaryValue val
- readI32 p = do
- bs <- tReadAll (getTransport p) 4
- return $ Data.Binary.decode bs
+buildBinaryList :: [ThriftVal] -> Builder
+buildBinaryList = foldr (mappend . buildBinaryValue) mempty
- readI64 p = do
- bs <- tReadAll (getTransport p) 8
- return $ Data.Binary.decode bs
+-- | Reading Functions
+parseBinaryValue :: ThriftType -> P.Parser ThriftVal
+parseBinaryValue (T_STRUCT _) = TStruct <$> parseBinaryStruct
+parseBinaryValue (T_MAP _ _) = do
+ kt <- parseType
+ vt <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TMap kt vt <$> parseBinaryMap kt vt n
+parseBinaryValue (T_LIST _) = do
+ t <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TList t <$> parseBinaryList t n
+parseBinaryValue (T_SET _) = do
+ t <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TSet t <$> parseBinaryList t n
+parseBinaryValue T_BOOL = TBool . (/=0) <$> P.anyWord8
+parseBinaryValue T_BYTE = TByte . Binary.decode . LBS.fromStrict <$> P.take 1
+parseBinaryValue T_I16 = TI16 . Binary.decode . LBS.fromStrict <$> P.take 2
+parseBinaryValue T_I32 = TI32 . Binary.decode . LBS.fromStrict <$> P.take 4
+parseBinaryValue T_I64 = TI64 . Binary.decode . LBS.fromStrict <$> P.take 8
+parseBinaryValue T_DOUBLE = TDouble . bsToDouble <$> P.take 8
+parseBinaryValue T_STRING = do
+ i :: Int32 <- Binary.decode . LBS.fromStrict <$> P.take 4
+ TString . LBS.fromStrict <$> P.take (fromIntegral i)
+parseBinaryValue ty = error $ "Cannot read value of type " ++ show ty
- readDouble p = do
- bs <- readI64 p
- return $ floatOfBits $ fromIntegral bs
+parseBinaryStruct :: P.Parser (Map.HashMap Int16 (LT.Text, ThriftVal))
+parseBinaryStruct = Map.fromList <$> P.manyTill parseField (matchType T_STOP)
+ where
+ parseField = do
+ t <- parseType
+ n <- Binary.decode . LBS.fromStrict <$> P.take 2
+ v <- parseBinaryValue t
+ return (n, ("", v))
- readString p = do
- i <- readI32 p
- decodeUtf8 `liftM` tReadAll (getTransport p) (fromIntegral i)
+parseBinaryMap :: ThriftType -> ThriftType -> Int32 -> P.Parser [(ThriftVal, ThriftVal)]
+parseBinaryMap kt vt n | n <= 0 = return []
+ | otherwise = do
+ k <- parseBinaryValue kt
+ v <- parseBinaryValue vt
+ ((k,v) :) <$> parseBinaryMap kt vt (n-1)
- readBinary p = do
- i <- readI32 p
- tReadAll (getTransport p) (fromIntegral i)
+parseBinaryList :: ThriftType -> Int32 -> P.Parser [ThriftVal]
+parseBinaryList ty n | n <= 0 = return []
+ | otherwise = liftM2 (:) (parseBinaryValue ty)
+ (parseBinaryList ty (n-1))
+
-- | Write a type as a byte
-writeType :: (Protocol p, Transport t) => p t -> ThriftType -> IO ()
-writeType p t = writeByte p (fromIntegral $ fromEnum t)
+buildType :: ThriftType -> Builder
+buildType t = word8 $ fromIntegral $ fromEnum t
+
+-- | Write type of a ThriftVal as a byte
+buildTypeOf :: ThriftVal -> Builder
+buildTypeOf = buildType . getTypeOf
-- | Read a byte as though it were a ThriftType
-readType :: (Protocol p, Transport t) => p t -> IO ThriftType
-readType p = do
- b <- readByte p
- return $ toEnum $ fromIntegral b
+parseType :: P.Parser ThriftType
+parseType = toEnum . fromIntegral <$> P.anyWord8
-floatBits :: Double -> Word64
-floatBits (D# d#) = W64# (unsafeCoerce# d#)
-
-floatOfBits :: Word64 -> Double
-floatOfBits (W64# b#) = D# (unsafeCoerce# b#)
-
+matchType :: ThriftType -> P.Parser ThriftType
+matchType t = t <$ P.word8 (fromIntegral $ fromEnum t)
diff --git a/lib/hs/src/Thrift/Protocol/Compact.hs b/lib/hs/src/Thrift/Protocol/Compact.hs
new file mode 100644
index 0000000..c3bd22d
--- /dev/null
+++ b/lib/hs/src/Thrift/Protocol/Compact.hs
@@ -0,0 +1,292 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+--
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Thrift.Protocol.Compact
+ ( module Thrift.Protocol
+ , CompactProtocol(..)
+ ) where
+
+import Control.Applicative
+import Control.Exception ( throw )
+import Control.Monad
+import Data.Attoparsec.ByteString as P
+import Data.Attoparsec.ByteString.Lazy as LP
+import Data.Bits
+import Data.ByteString.Lazy.Builder as B
+import Data.Int
+import Data.List as List
+import Data.Monoid
+import Data.Word
+import Data.Text.Lazy.Encoding ( decodeUtf8, encodeUtf8 )
+
+import Thrift.Protocol hiding (versionMask)
+import Thrift.Transport
+import Thrift.Types
+
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as LBS
+import qualified Data.HashMap.Strict as Map
+import qualified Data.Text.Lazy as LT
+
+-- | the Compact Protocol implements the standard Thrift 'TCompactProcotol'
+-- which is similar to the 'TBinaryProtocol', but takes less space on the wire.
+-- Integral types are encoded using as varints.
+data CompactProtocol a = CompactProtocol a
+ -- ^ Constuct a 'CompactProtocol' with a 'Transport'
+
+protocolID, version, typeMask :: Int8
+protocolID = 0x82 -- 1000 0010
+version = 0x01
+versionMask = 0x1f -- 0001 1111
+typeMask = 0xe0 -- 1110 0000
+typeShiftAmount :: Int
+typeShiftAmount = 5
+
+
+instance Protocol CompactProtocol where
+ getTransport (CompactProtocol t) = t
+
+ writeMessageBegin p (n, t, s) = tWrite (getTransport p) $ toLazyByteString $
+ B.int8 protocolID <>
+ B.int8 ((version .&. versionMask) .|.
+ (((fromIntegral $ fromEnum t) `shiftL`
+ typeShiftAmount) .&. typeMask)) <>
+ buildVarint (i32ToZigZag s) <>
+ buildCompactValue (TString $ encodeUtf8 n)
+
+ readMessageBegin p = runParser p $ do
+ pid <- fromIntegral <$> P.anyWord8
+ when (pid /= protocolID) $ error "Bad Protocol ID"
+ w <- fromIntegral <$> P.anyWord8
+ let ver = w .&. versionMask
+ when (ver /= version) $ error "Bad Protocol version"
+ let typ = (w `shiftR` typeShiftAmount) .&. 0x03
+ seqId <- parseVarint zigZagToI32
+ TString name <- parseCompactValue T_STRING
+ return (decodeUtf8 name, toEnum $ fromIntegral $ typ, seqId)
+
+ serializeVal _ = toLazyByteString . buildCompactValue
+ deserializeVal _ ty bs =
+ case LP.eitherResult $ LP.parse (parseCompactValue ty) bs of
+ Left s -> error s
+ Right val -> val
+
+ readVal p ty = runParser p $ parseCompactValue ty
+
+
+-- | Writing Functions
+buildCompactValue :: ThriftVal -> Builder
+buildCompactValue (TStruct fields) = buildCompactStruct fields
+buildCompactValue (TMap kt vt entries) =
+ let len = fromIntegral $ length entries :: Word32 in
+ if len == 0
+ then B.word8 0x00
+ else buildVarint len <>
+ B.word8 (fromTType kt `shiftL` 4 .|. fromTType vt) <>
+ buildCompactMap entries
+buildCompactValue (TList ty entries) =
+ let len = length entries in
+ (if len < 15
+ then B.word8 $ (fromIntegral len `shiftL` 4) .|. fromTType ty
+ else B.word8 (0xF0 .|. fromTType ty) <>
+ buildVarint (fromIntegral len :: Word32)) <>
+ buildCompactList entries
+buildCompactValue (TSet ty entries) = buildCompactValue (TList ty entries)
+buildCompactValue (TBool b) =
+ B.word8 $ toEnum $ if b then 1 else 0
+buildCompactValue (TByte b) = int8 b
+buildCompactValue (TI16 i) = buildVarint $ i16ToZigZag i
+buildCompactValue (TI32 i) = buildVarint $ i32ToZigZag i
+buildCompactValue (TI64 i) = buildVarint $ i64ToZigZag i
+buildCompactValue (TDouble d) = doubleBE d
+buildCompactValue (TString s) = buildVarint len <> lazyByteString s
+ where
+ len = fromIntegral (LBS.length s) :: Word32
+
+buildCompactStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder
+buildCompactStruct = flip (loop 0) mempty . Map.toList
+ where
+ loop _ [] acc = acc <> B.word8 (fromTType T_STOP)
+ loop lastId ((fid, (_,val)) : fields) acc = loop fid fields $ acc <>
+ (if fid > lastId && fid - lastId <= 15
+ then B.word8 $ fromIntegral ((fid - lastId) `shiftL` 4) .|. typeOf val
+ else B.word8 (typeOf val) <> buildVarint (i16ToZigZag fid)) <>
+ (if typeOf val > 0x02 -- Not a T_BOOL
+ then buildCompactValue val
+ else mempty) -- T_BOOLs are encoded in the type
+buildCompactMap :: [(ThriftVal, ThriftVal)] -> Builder
+buildCompactMap = foldl combine mempty
+ where
+ combine s (key, val) = buildCompactValue key <> buildCompactValue val <> s
+
+buildCompactList :: [ThriftVal] -> Builder
+buildCompactList = foldr (mappend . buildCompactValue) mempty
+
+-- | Reading Functions
+parseCompactValue :: ThriftType -> Parser ThriftVal
+parseCompactValue (T_STRUCT _) = TStruct <$> parseCompactStruct
+parseCompactValue (T_MAP kt' vt') = do
+ n <- parseVarint id
+ if n == 0
+ then return $ TMap kt' vt' []
+ else do
+ w <- P.anyWord8
+ let kt = typeFrom $ w `shiftR` 4
+ vt = typeFrom $ w .&. 0x0F
+ TMap kt vt <$> parseCompactMap kt vt n
+parseCompactValue (T_LIST ty) = TList ty <$> parseCompactList
+parseCompactValue (T_SET ty) = TSet ty <$> parseCompactList
+parseCompactValue T_BOOL = TBool . (/=0) <$> P.anyWord8
+parseCompactValue T_BYTE = TByte . fromIntegral <$> P.anyWord8
+parseCompactValue T_I16 = TI16 <$> parseVarint zigZagToI16
+parseCompactValue T_I32 = TI32 <$> parseVarint zigZagToI32
+parseCompactValue T_I64 = TI64 <$> parseVarint zigZagToI64
+parseCompactValue T_DOUBLE = TDouble . bsToDouble <$> P.take 8
+parseCompactValue T_STRING = do
+ len :: Word32 <- parseVarint id
+ TString . LBS.fromStrict <$> P.take (fromIntegral len)
+parseCompactValue ty = error $ "Cannot read value of type " ++ show ty
+
+parseCompactStruct :: Parser (Map.HashMap Int16 (LT.Text, ThriftVal))
+parseCompactStruct = Map.fromList <$> parseFields 0
+ where
+ parseFields :: Int16 -> Parser [(Int16, (LT.Text, ThriftVal))]
+ parseFields lastId = do
+ w <- P.anyWord8
+ if w == 0x00
+ then return []
+ else do
+ let ty = typeFrom (w .&. 0x0F)
+ modifier = (w .&. 0xF0) `shiftR` 4
+ fid <- if modifier /= 0
+ then return (lastId + fromIntegral modifier)
+ else parseVarint zigZagToI16
+ val <- if ty == T_BOOL
+ then return (TBool $ (w .&. 0x0F) == 0x01)
+ else parseCompactValue ty
+ ((fid, (LT.empty, val)) : ) <$> parseFields fid
+
+parseCompactMap :: ThriftType -> ThriftType -> Int32 ->
+ Parser [(ThriftVal, ThriftVal)]
+parseCompactMap kt vt n | n <= 0 = return []
+ | otherwise = do
+ k <- parseCompactValue kt
+ v <- parseCompactValue vt
+ ((k,v) :) <$> parseCompactMap kt vt (n-1)
+
+parseCompactList :: Parser [ThriftVal]
+parseCompactList = do
+ w <- P.anyWord8
+ let ty = typeFrom $ w .&. 0x0F
+ lsize = w `shiftR` 4
+ size <- if lsize == 0xF
+ then parseVarint id
+ else return $ fromIntegral lsize
+ loop ty size
+ where
+ loop :: ThriftType -> Int32 -> Parser [ThriftVal]
+ loop ty n | n <= 0 = return []
+ | otherwise = liftM2 (:) (parseCompactValue ty)
+ (loop ty (n-1))
+
+-- Signed numbers must be converted to "Zig Zag" format before they can be
+-- serialized in the Varint format
+i16ToZigZag :: Int16 -> Word16
+i16ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 15)
+
+zigZagToI16 :: Word16 -> Int16
+zigZagToI16 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1)
+
+i32ToZigZag :: Int32 -> Word32
+i32ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 31)
+
+zigZagToI32 :: Word32 -> Int32
+zigZagToI32 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1)
+
+i64ToZigZag :: Int64 -> Word64
+i64ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 63)
+
+zigZagToI64 :: Word64 -> Int64
+zigZagToI64 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1)
+
+buildVarint :: (Bits a, Integral a) => a -> Builder
+buildVarint n | n .&. complement 0x7F == 0 = B.word8 $ fromIntegral n
+ | otherwise = B.word8 (0x80 .|. (fromIntegral n .&. 0x7F)) <>
+ buildVarint (n `shiftR` 7)
+
+parseVarint :: (Bits a, Integral a, Ord a) => (a -> b) -> Parser b
+parseVarint fromZigZag = do
+ bytestemp <- BS.unpack <$> P.takeTill (not . flip testBit 7)
+ lsb <- P.anyWord8
+ let bytes = lsb : List.reverse bytestemp
+ return $ fromZigZag $ List.foldl' combine 0x00 bytes
+ where combine a b = (a `shiftL` 7) .|. (fromIntegral b .&. 0x7f)
+
+-- | Compute the Compact Type
+fromTType :: ThriftType -> Word8
+fromTType ty = case ty of
+ T_STOP -> 0x00
+ T_BOOL -> 0x01
+ T_BYTE -> 0x03
+ T_I16 -> 0x04
+ T_I32 -> 0x05
+ T_I64 -> 0x06
+ T_DOUBLE -> 0x07
+ T_STRING -> 0x08
+ T_LIST{} -> 0x09
+ T_SET{} -> 0x0A
+ T_MAP{} -> 0x0B
+ T_STRUCT{} -> 0x0C
+ T_VOID -> error "No Compact type for T_VOID"
+
+typeOf :: ThriftVal -> Word8
+typeOf v = case v of
+ TBool True -> 0x01
+ TBool False -> 0x02
+ TByte _ -> 0x03
+ TI16 _ -> 0x04
+ TI32 _ -> 0x05
+ TI64 _ -> 0x06
+ TDouble _ -> 0x07
+ TString _ -> 0x08
+ TList{} -> 0x09
+ TSet{} -> 0x0A
+ TMap{} -> 0x0B
+ TStruct{} -> 0x0C
+
+typeFrom :: Word8 -> ThriftType
+typeFrom w = case w of
+ 0x01 -> T_BOOL
+ 0x02 -> T_BOOL
+ 0x03 -> T_BYTE
+ 0x04 -> T_I16
+ 0x05 -> T_I32
+ 0x06 -> T_I64
+ 0x07 -> T_DOUBLE
+ 0x08 -> T_STRING
+ 0x09 -> T_LIST T_VOID
+ 0x0A -> T_SET T_VOID
+ 0x0B -> T_MAP T_VOID T_VOID
+ 0x0C -> T_STRUCT Map.empty
+ n -> error $ "typeFrom: " ++ show n ++ " is not a compact type"
diff --git a/lib/hs/src/Thrift/Protocol/JSON.hs b/lib/hs/src/Thrift/Protocol/JSON.hs
new file mode 100644
index 0000000..f378ea2
--- /dev/null
+++ b/lib/hs/src/Thrift/Protocol/JSON.hs
@@ -0,0 +1,325 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+--
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
+
+module Thrift.Protocol.JSON
+ ( module Thrift.Protocol
+ , JSONProtocol(..)
+ ) where
+
+import Control.Applicative
+import Control.Monad
+import Data.Attoparsec.ByteString as P
+import Data.Attoparsec.ByteString.Char8 as PC
+import Data.Attoparsec.ByteString.Lazy as LP
+import Data.ByteString.Lazy.Builder as B
+import Data.ByteString.Internal (c2w, w2c)
+import Data.Functor
+import Data.Int
+import Data.List
+import Data.Maybe (catMaybes)
+import Data.Monoid
+import Data.Text.Lazy.Encoding
+import Data.Word
+import qualified Data.HashMap.Strict as Map
+
+import Thrift.Protocol
+import Thrift.Transport
+import Thrift.Types
+
+import qualified Data.ByteString.Lazy as LBS
+import qualified Data.Text.Lazy as LT
+
+-- | The JSON Protocol data uses the standard 'TSimpleJSONProtocol'. Data is
+-- encoded as a JSON 'ByteString'
+data JSONProtocol t = JSONProtocol t
+ -- ^ Construct a 'JSONProtocol' with a 'Transport'
+
+instance Protocol JSONProtocol where
+ getTransport (JSONProtocol t) = t
+
+ writeMessageBegin (JSONProtocol t) (s, ty, sq) = tWrite t $ toLazyByteString $
+ B.char8 '[' <> buildShowable (1 :: Int32) <>
+ B.string8 ",\"" <> escape (encodeUtf8 s) <> B.char8 '\"' <>
+ B.char8 ',' <> buildShowable (fromEnum ty) <>
+ B.char8 ',' <> buildShowable sq <>
+ B.char8 ','
+ writeMessageEnd (JSONProtocol t) = tWrite t "]"
+ readMessageBegin p = runParser p $ skipSpace *> do
+ _ver :: Int32 <- lexeme (PC.char8 '[') *> lexeme (signed decimal)
+ bs <- lexeme (PC.char8 ',') *> lexeme escapedString
+ case decodeUtf8' bs of
+ Left _ -> fail "readMessage: invalid text encoding"
+ Right str -> do
+ ty <- toEnum <$> (lexeme (PC.char8 ',') *> lexeme (signed decimal))
+ seqNum <- lexeme (PC.char8 ',') *> lexeme (signed decimal)
+ _ <- PC.char8 ','
+ return (str, ty, seqNum)
+ readMessageEnd p = void $ runParser p (PC.char8 ']')
+
+ serializeVal _ = toLazyByteString . buildJSONValue
+ deserializeVal _ ty bs =
+ case LP.eitherResult $ LP.parse (parseJSONValue ty) bs of
+ Left s -> error s
+ Right val -> val
+
+ readVal p ty = runParser p $ skipSpace *> parseJSONValue ty
+
+
+-- Writing Functions
+
+buildJSONValue :: ThriftVal -> Builder
+buildJSONValue (TStruct fields) = B.char8 '{' <> buildJSONStruct fields <> B.char8 '}'
+buildJSONValue (TMap kt vt entries) =
+ B.char8 '[' <> B.char8 '"' <> getTypeName kt <> B.char8 '"' <>
+ B.char8 ',' <> B.char8 '"' <> getTypeName vt <> B.char8 '"' <>
+ B.char8 ',' <> buildShowable (length entries) <>
+ B.char8 ',' <> B.char8 '{' <> buildJSONMap entries <> B.char8 '}' <>
+ B.char8 ']'
+buildJSONValue (TList ty entries) =
+ B.char8 '[' <> B.char8 '"' <> getTypeName ty <> B.char8 '"' <>
+ B.char8 ',' <> buildShowable (length entries) <>
+ (if length entries > 0
+ then B.char8 ',' <> buildJSONList entries
+ else mempty) <>
+ B.char8 ']'
+buildJSONValue (TSet ty entries) = buildJSONValue (TList ty entries)
+buildJSONValue (TBool b) = if b then B.string8 "true" else B.string8 "false"
+buildJSONValue (TByte b) = buildShowable b
+buildJSONValue (TI16 i) = buildShowable i
+buildJSONValue (TI32 i) = buildShowable i
+buildJSONValue (TI64 i) = buildShowable i
+buildJSONValue (TDouble d) = buildShowable d
+buildJSONValue (TString s) = B.char8 '\"' <> escape s <> B.char8 '\"'
+
+buildJSONStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder
+buildJSONStruct = mconcat . intersperse (B.char8 ',') . Map.foldrWithKey buildField []
+ where
+ buildField fid (_,val) = (:) $
+ B.char8 '"' <> buildShowable fid <> B.string8 "\":" <>
+ B.char8 '{' <>
+ B.char8 '"' <> getTypeName (getTypeOf val) <> B.string8 "\":" <>
+ buildJSONValue val <>
+ B.char8 '}'
+
+buildJSONMap :: [(ThriftVal, ThriftVal)] -> Builder
+buildJSONMap = mconcat . intersperse (B.char8 ',') . map buildKV
+ where
+ buildKV (key@(TString _), val) =
+ buildJSONValue key <> B.char8 ':' <> buildJSONValue val
+ buildKV (key, val) =
+ B.char8 '\"' <> buildJSONValue key <> B.string8 "\":" <> buildJSONValue val
+buildJSONList :: [ThriftVal] -> Builder
+buildJSONList = mconcat . intersperse (B.char8 ',') . map buildJSONValue
+
+buildShowable :: Show a => a -> Builder
+buildShowable = B.string8 . show
+
+-- Reading Functions
+
+parseJSONValue :: ThriftType -> Parser ThriftVal
+parseJSONValue (T_STRUCT tmap) =
+ TStruct <$> (lexeme (PC.char8 '{') *> parseJSONStruct tmap <* PC.char8 '}')
+parseJSONValue (T_MAP kt vt) = fmap (TMap kt vt) $
+ between '[' ']' $
+ lexeme escapedString *> lexeme (PC.char8 ',') *>
+ lexeme escapedString *> lexeme (PC.char8 ',') *>
+ lexeme decimal *> lexeme (PC.char8 ',') *>
+ between '{' '}' (parseJSONMap kt vt)
+parseJSONValue (T_LIST ty) = fmap (TList ty) $
+ between '[' ']' $ do
+ len <- lexeme escapedString *> lexeme (PC.char8 ',') *>
+ lexeme decimal <* lexeme (PC.char8 ',')
+ if len > 0 then parseJSONList ty else return []
+parseJSONValue (T_SET ty) = fmap (TSet ty) $
+ between '[' ']' $ do
+ len <- lexeme escapedString *> lexeme (PC.char8 ',') *>
+ lexeme decimal <* lexeme (PC.char8 ',')
+ if len > 0 then parseJSONList ty else return []
+parseJSONValue T_BOOL =
+ (TBool True <$ string "true") <|> (TBool False <$ string "false")
+parseJSONValue T_BYTE = TByte <$> signed decimal
+parseJSONValue T_I16 = TI16 <$> signed decimal
+parseJSONValue T_I32 = TI32 <$> signed decimal
+parseJSONValue T_I64 = TI64 <$> signed decimal
+parseJSONValue T_DOUBLE = TDouble <$> double
+parseJSONValue T_STRING = TString <$> escapedString
+parseJSONValue T_STOP = fail "parseJSONValue: cannot parse type T_STOP"
+parseJSONValue T_VOID = fail "parseJSONValue: cannot parse type T_VOID"
+
+parseAnyValue :: Parser ()
+parseAnyValue = choice $
+ skipBetween '{' '}' :
+ skipBetween '[' ']' :
+ map (void . parseJSONValue)
+ [ T_BOOL
+ , T_I16
+ , T_I32
+ , T_I64
+ , T_DOUBLE
+ , T_STRING
+ ]
+ where
+ skipBetween :: Char -> Char -> Parser ()
+ skipBetween a b = between a b $ void (PC.satisfy (\c -> c /= a && c /= b))
+ <|> skipBetween a b
+
+parseJSONStruct :: TypeMap -> Parser (Map.HashMap Int16 (LT.Text, ThriftVal))
+parseJSONStruct tmap = Map.fromList . catMaybes <$> parseField
+ `sepBy` lexeme (PC.char8 ',')
+ where
+ parseField = do
+ fid <- lexeme (between '"' '"' decimal) <* lexeme (PC.char8 ':')
+ case Map.lookup fid tmap of
+ Just (str, ftype) -> between '{' '}' $ do
+ _ <- lexeme (escapedString) *> lexeme (PC.char8 ':')
+ val <- lexeme (parseJSONValue ftype)
+ return $ Just (fid, (str, val))
+ Nothing -> lexeme parseAnyValue *> return Nothing
+
+parseJSONMap :: ThriftType -> ThriftType -> Parser [(ThriftVal, ThriftVal)]
+parseJSONMap kt vt =
+ ((,) <$> lexeme (PC.char8 '"' *> parseJSONValue kt <* PC.char8 '"') <*>
+ (lexeme (PC.char8 ':') *> lexeme (parseJSONValue vt))) `sepBy`
+ lexeme (PC.char8 ',')
+
+parseJSONList :: ThriftType -> Parser [ThriftVal]
+parseJSONList ty = lexeme (parseJSONValue ty) `sepBy` lexeme (PC.char8 ',')
+
+escapedString :: Parser LBS.ByteString
+escapedString = PC.char8 '"' *>
+ (LBS.pack <$> P.many' (escapedChar <|> notChar8 '"')) <*
+ PC.char8 '"'
+
+escapedChar :: Parser Word8
+escapedChar = PC.char8 '\\' *> (c2w <$> choice
+ [ '\SOH' <$ P.string "u0001"
+ , '\STX' <$ P.string "u0002"
+ , '\ETX' <$ P.string "u0003"
+ , '\EOT' <$ P.string "u0004"
+ , '\ENQ' <$ P.string "u0005"
+ , '\ACK' <$ P.string "u0006"
+ , '\BEL' <$ P.string "u0007"
+ , '\BS' <$ P.string "u0008"
+ , '\VT' <$ P.string "u000b"
+ , '\FF' <$ P.string "u000c"
+ , '\CR' <$ P.string "u000d"
+ , '\SO' <$ P.string "u000e"
+ , '\SI' <$ P.string "u000f"
+ , '\DLE' <$ P.string "u0010"
+ , '\DC1' <$ P.string "u0011"
+ , '\DC2' <$ P.string "u0012"
+ , '\DC3' <$ P.string "u0013"
+ , '\DC4' <$ P.string "u0014"
+ , '\NAK' <$ P.string "u0015"
+ , '\SYN' <$ P.string "u0016"
+ , '\ETB' <$ P.string "u0017"
+ , '\CAN' <$ P.string "u0018"
+ , '\EM' <$ P.string "u0019"
+ , '\SUB' <$ P.string "u001a"
+ , '\ESC' <$ P.string "u001b"
+ , '\FS' <$ P.string "u001c"
+ , '\GS' <$ P.string "u001d"
+ , '\RS' <$ P.string "u001e"
+ , '\US' <$ P.string "u001f"
+ , '\DEL' <$ P.string "u007f"
+ , '\0' <$ PC.char '0'
+ , '\a' <$ PC.char 'a'
+ , '\b' <$ PC.char 'b'
+ , '\f' <$ PC.char 'f'
+ , '\n' <$ PC.char 'n'
+ , '\r' <$ PC.char 'r'
+ , '\t' <$ PC.char 't'
+ , '\v' <$ PC.char 'v'
+ , '\"' <$ PC.char '"'
+ , '\'' <$ PC.char '\''
+ , '\\' <$ PC.char '\\'
+ , '/' <$ PC.char '/'
+ ])
+
+escape :: LBS.ByteString -> Builder
+escape = LBS.foldl' escapeChar mempty
+ where
+ escapeChar b w = b <> (B.lazyByteString $ case w2c w of
+ '\0' -> "\\0"
+ '\b' -> "\\b"
+ '\f' -> "\\f"
+ '\n' -> "\\n"
+ '\r' -> "\\r"
+ '\t' -> "\\t"
+ '\"' -> "\\\""
+ '\\' -> "\\\\"
+ '\SOH' -> "\\u0001"
+ '\STX' -> "\\u0002"
+ '\ETX' -> "\\u0003"
+ '\EOT' -> "\\u0004"
+ '\ENQ' -> "\\u0005"
+ '\ACK' -> "\\u0006"
+ '\BEL' -> "\\u0007"
+ '\VT' -> "\\u000b"
+ '\SO' -> "\\u000e"
+ '\SI' -> "\\u000f"
+ '\DLE' -> "\\u0010"
+ '\DC1' -> "\\u0011"
+ '\DC2' -> "\\u0012"
+ '\DC3' -> "\\u0013"
+ '\DC4' -> "\\u0014"
+ '\NAK' -> "\\u0015"
+ '\SYN' -> "\\u0016"
+ '\ETB' -> "\\u0017"
+ '\CAN' -> "\\u0018"
+ '\EM' -> "\\u0019"
+ '\SUB' -> "\\u001a"
+ '\ESC' -> "\\u001b"
+ '\FS' -> "\\u001c"
+ '\GS' -> "\\u001d"
+ '\RS' -> "\\u001e"
+ '\US' -> "\\u001f"
+ '\DEL' -> "\\u007f"
+ _ -> LBS.singleton w)
+
+lexeme :: Parser a -> Parser a
+lexeme = (<* skipSpace)
+
+notChar8 :: Char -> Parser Word8
+notChar8 c = P.satisfy (/= c2w c)
+
+between :: Char -> Char -> Parser a -> Parser a
+between a b p = lexeme (PC.char8 a) *> lexeme p <* lexeme (PC.char8 b)
+
+getTypeName :: ThriftType -> Builder
+getTypeName ty = B.string8 $ case ty of
+ T_STRUCT _ -> "rec"
+ T_MAP _ _ -> "map"
+ T_LIST _ -> "lst"
+ T_SET _ -> "set"
+ T_BOOL -> "tf"
+ T_BYTE -> "i8"
+ T_I16 -> "i16"
+ T_I32 -> "i32"
+ T_I64 -> "i64"
+ T_DOUBLE -> "dbl"
+ T_STRING -> "str"
+ _ -> error "Unrecognized Type"
+