Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 1 | -- |
| 2 | -- Licensed to the Apache Software Foundation (ASF) under one |
| 3 | -- or more contributor license agreements. See the NOTICE file |
| 4 | -- distributed with this work for additional information |
| 5 | -- regarding copyright ownership. The ASF licenses this file |
| 6 | -- to you under the Apache License, Version 2.0 (the |
| 7 | -- "License"); you may not use this file except in compliance |
| 8 | -- with the License. You may obtain a copy of the License at |
| 9 | -- |
| 10 | -- http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | -- |
| 12 | -- Unless required by applicable law or agreed to in writing, |
| 13 | -- software distributed under the License is distributed on an |
| 14 | -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | -- KIND, either express or implied. See the License for the |
| 16 | -- specific language governing permissions and limitations |
| 17 | -- under the License. |
| 18 | -- |
| 19 | |
| 20 | {-# LANGUAGE CPP #-} |
| 21 | {-# LANGUAGE ExistentialQuantification #-} |
| 22 | {-# LANGUAGE OverloadedStrings #-} |
| 23 | {-# LANGUAGE ScopedTypeVariables #-} |
| 24 | |
| 25 | module Thrift.Protocol.Compact |
| 26 | ( module Thrift.Protocol |
| 27 | , CompactProtocol(..) |
| 28 | ) where |
| 29 | |
| 30 | import Control.Applicative |
| 31 | import Control.Exception ( throw ) |
| 32 | import Control.Monad |
| 33 | import Data.Attoparsec.ByteString as P |
| 34 | import Data.Attoparsec.ByteString.Lazy as LP |
| 35 | import Data.Bits |
| 36 | import Data.ByteString.Lazy.Builder as B |
| 37 | import Data.Int |
| 38 | import Data.List as List |
| 39 | import Data.Monoid |
| 40 | import Data.Word |
| 41 | import Data.Text.Lazy.Encoding ( decodeUtf8, encodeUtf8 ) |
| 42 | |
| 43 | import Thrift.Protocol hiding (versionMask) |
| 44 | import Thrift.Transport |
| 45 | import Thrift.Types |
| 46 | |
| 47 | import qualified Data.ByteString as BS |
| 48 | import qualified Data.ByteString.Lazy as LBS |
| 49 | import qualified Data.HashMap.Strict as Map |
| 50 | import qualified Data.Text.Lazy as LT |
| 51 | |
| 52 | -- | the Compact Protocol implements the standard Thrift 'TCompactProcotol' |
| 53 | -- which is similar to the 'TBinaryProtocol', but takes less space on the wire. |
| 54 | -- Integral types are encoded using as varints. |
| 55 | data CompactProtocol a = CompactProtocol a |
| 56 | -- ^ Constuct a 'CompactProtocol' with a 'Transport' |
| 57 | |
| 58 | protocolID, version, typeMask :: Int8 |
| 59 | protocolID = 0x82 -- 1000 0010 |
| 60 | version = 0x01 |
| 61 | versionMask = 0x1f -- 0001 1111 |
| 62 | typeMask = 0xe0 -- 1110 0000 |
Jens Geyer | a86886e | 2014-09-17 22:25:48 +0200 | [diff] [blame^] | 63 | typeBits = 0x07 -- 0000 0111 |
Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 64 | typeShiftAmount :: Int |
| 65 | typeShiftAmount = 5 |
| 66 | |
| 67 | |
| 68 | instance Protocol CompactProtocol where |
| 69 | getTransport (CompactProtocol t) = t |
| 70 | |
| 71 | writeMessageBegin p (n, t, s) = tWrite (getTransport p) $ toLazyByteString $ |
| 72 | B.int8 protocolID <> |
| 73 | B.int8 ((version .&. versionMask) .|. |
| 74 | (((fromIntegral $ fromEnum t) `shiftL` |
| 75 | typeShiftAmount) .&. typeMask)) <> |
| 76 | buildVarint (i32ToZigZag s) <> |
| 77 | buildCompactValue (TString $ encodeUtf8 n) |
| 78 | |
| 79 | readMessageBegin p = runParser p $ do |
| 80 | pid <- fromIntegral <$> P.anyWord8 |
| 81 | when (pid /= protocolID) $ error "Bad Protocol ID" |
| 82 | w <- fromIntegral <$> P.anyWord8 |
| 83 | let ver = w .&. versionMask |
| 84 | when (ver /= version) $ error "Bad Protocol version" |
Jens Geyer | a86886e | 2014-09-17 22:25:48 +0200 | [diff] [blame^] | 85 | let typ = (w `shiftR` typeShiftAmount) .&. typeBits |
Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 86 | seqId <- parseVarint zigZagToI32 |
| 87 | TString name <- parseCompactValue T_STRING |
| 88 | return (decodeUtf8 name, toEnum $ fromIntegral $ typ, seqId) |
| 89 | |
| 90 | serializeVal _ = toLazyByteString . buildCompactValue |
| 91 | deserializeVal _ ty bs = |
| 92 | case LP.eitherResult $ LP.parse (parseCompactValue ty) bs of |
| 93 | Left s -> error s |
| 94 | Right val -> val |
| 95 | |
| 96 | readVal p ty = runParser p $ parseCompactValue ty |
| 97 | |
| 98 | |
| 99 | -- | Writing Functions |
| 100 | buildCompactValue :: ThriftVal -> Builder |
| 101 | buildCompactValue (TStruct fields) = buildCompactStruct fields |
| 102 | buildCompactValue (TMap kt vt entries) = |
| 103 | let len = fromIntegral $ length entries :: Word32 in |
| 104 | if len == 0 |
| 105 | then B.word8 0x00 |
| 106 | else buildVarint len <> |
| 107 | B.word8 (fromTType kt `shiftL` 4 .|. fromTType vt) <> |
| 108 | buildCompactMap entries |
| 109 | buildCompactValue (TList ty entries) = |
| 110 | let len = length entries in |
| 111 | (if len < 15 |
| 112 | then B.word8 $ (fromIntegral len `shiftL` 4) .|. fromTType ty |
| 113 | else B.word8 (0xF0 .|. fromTType ty) <> |
| 114 | buildVarint (fromIntegral len :: Word32)) <> |
| 115 | buildCompactList entries |
| 116 | buildCompactValue (TSet ty entries) = buildCompactValue (TList ty entries) |
| 117 | buildCompactValue (TBool b) = |
| 118 | B.word8 $ toEnum $ if b then 1 else 0 |
| 119 | buildCompactValue (TByte b) = int8 b |
| 120 | buildCompactValue (TI16 i) = buildVarint $ i16ToZigZag i |
| 121 | buildCompactValue (TI32 i) = buildVarint $ i32ToZigZag i |
| 122 | buildCompactValue (TI64 i) = buildVarint $ i64ToZigZag i |
| 123 | buildCompactValue (TDouble d) = doubleBE d |
| 124 | buildCompactValue (TString s) = buildVarint len <> lazyByteString s |
| 125 | where |
| 126 | len = fromIntegral (LBS.length s) :: Word32 |
| 127 | |
| 128 | buildCompactStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder |
| 129 | buildCompactStruct = flip (loop 0) mempty . Map.toList |
| 130 | where |
| 131 | loop _ [] acc = acc <> B.word8 (fromTType T_STOP) |
| 132 | loop lastId ((fid, (_,val)) : fields) acc = loop fid fields $ acc <> |
| 133 | (if fid > lastId && fid - lastId <= 15 |
| 134 | then B.word8 $ fromIntegral ((fid - lastId) `shiftL` 4) .|. typeOf val |
| 135 | else B.word8 (typeOf val) <> buildVarint (i16ToZigZag fid)) <> |
| 136 | (if typeOf val > 0x02 -- Not a T_BOOL |
| 137 | then buildCompactValue val |
| 138 | else mempty) -- T_BOOLs are encoded in the type |
| 139 | buildCompactMap :: [(ThriftVal, ThriftVal)] -> Builder |
| 140 | buildCompactMap = foldl combine mempty |
| 141 | where |
| 142 | combine s (key, val) = buildCompactValue key <> buildCompactValue val <> s |
| 143 | |
| 144 | buildCompactList :: [ThriftVal] -> Builder |
| 145 | buildCompactList = foldr (mappend . buildCompactValue) mempty |
| 146 | |
| 147 | -- | Reading Functions |
| 148 | parseCompactValue :: ThriftType -> Parser ThriftVal |
| 149 | parseCompactValue (T_STRUCT _) = TStruct <$> parseCompactStruct |
| 150 | parseCompactValue (T_MAP kt' vt') = do |
| 151 | n <- parseVarint id |
| 152 | if n == 0 |
| 153 | then return $ TMap kt' vt' [] |
| 154 | else do |
| 155 | w <- P.anyWord8 |
| 156 | let kt = typeFrom $ w `shiftR` 4 |
| 157 | vt = typeFrom $ w .&. 0x0F |
| 158 | TMap kt vt <$> parseCompactMap kt vt n |
| 159 | parseCompactValue (T_LIST ty) = TList ty <$> parseCompactList |
| 160 | parseCompactValue (T_SET ty) = TSet ty <$> parseCompactList |
| 161 | parseCompactValue T_BOOL = TBool . (/=0) <$> P.anyWord8 |
| 162 | parseCompactValue T_BYTE = TByte . fromIntegral <$> P.anyWord8 |
| 163 | parseCompactValue T_I16 = TI16 <$> parseVarint zigZagToI16 |
| 164 | parseCompactValue T_I32 = TI32 <$> parseVarint zigZagToI32 |
| 165 | parseCompactValue T_I64 = TI64 <$> parseVarint zigZagToI64 |
| 166 | parseCompactValue T_DOUBLE = TDouble . bsToDouble <$> P.take 8 |
| 167 | parseCompactValue T_STRING = do |
| 168 | len :: Word32 <- parseVarint id |
| 169 | TString . LBS.fromStrict <$> P.take (fromIntegral len) |
| 170 | parseCompactValue ty = error $ "Cannot read value of type " ++ show ty |
| 171 | |
| 172 | parseCompactStruct :: Parser (Map.HashMap Int16 (LT.Text, ThriftVal)) |
| 173 | parseCompactStruct = Map.fromList <$> parseFields 0 |
| 174 | where |
| 175 | parseFields :: Int16 -> Parser [(Int16, (LT.Text, ThriftVal))] |
| 176 | parseFields lastId = do |
| 177 | w <- P.anyWord8 |
| 178 | if w == 0x00 |
| 179 | then return [] |
| 180 | else do |
| 181 | let ty = typeFrom (w .&. 0x0F) |
| 182 | modifier = (w .&. 0xF0) `shiftR` 4 |
| 183 | fid <- if modifier /= 0 |
| 184 | then return (lastId + fromIntegral modifier) |
| 185 | else parseVarint zigZagToI16 |
| 186 | val <- if ty == T_BOOL |
| 187 | then return (TBool $ (w .&. 0x0F) == 0x01) |
| 188 | else parseCompactValue ty |
| 189 | ((fid, (LT.empty, val)) : ) <$> parseFields fid |
| 190 | |
| 191 | parseCompactMap :: ThriftType -> ThriftType -> Int32 -> |
| 192 | Parser [(ThriftVal, ThriftVal)] |
| 193 | parseCompactMap kt vt n | n <= 0 = return [] |
| 194 | | otherwise = do |
| 195 | k <- parseCompactValue kt |
| 196 | v <- parseCompactValue vt |
| 197 | ((k,v) :) <$> parseCompactMap kt vt (n-1) |
| 198 | |
| 199 | parseCompactList :: Parser [ThriftVal] |
| 200 | parseCompactList = do |
| 201 | w <- P.anyWord8 |
| 202 | let ty = typeFrom $ w .&. 0x0F |
| 203 | lsize = w `shiftR` 4 |
| 204 | size <- if lsize == 0xF |
| 205 | then parseVarint id |
| 206 | else return $ fromIntegral lsize |
| 207 | loop ty size |
| 208 | where |
| 209 | loop :: ThriftType -> Int32 -> Parser [ThriftVal] |
| 210 | loop ty n | n <= 0 = return [] |
| 211 | | otherwise = liftM2 (:) (parseCompactValue ty) |
| 212 | (loop ty (n-1)) |
| 213 | |
| 214 | -- Signed numbers must be converted to "Zig Zag" format before they can be |
| 215 | -- serialized in the Varint format |
| 216 | i16ToZigZag :: Int16 -> Word16 |
| 217 | i16ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 15) |
| 218 | |
| 219 | zigZagToI16 :: Word16 -> Int16 |
| 220 | zigZagToI16 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1) |
| 221 | |
| 222 | i32ToZigZag :: Int32 -> Word32 |
| 223 | i32ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 31) |
| 224 | |
| 225 | zigZagToI32 :: Word32 -> Int32 |
| 226 | zigZagToI32 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1) |
| 227 | |
| 228 | i64ToZigZag :: Int64 -> Word64 |
| 229 | i64ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 63) |
| 230 | |
| 231 | zigZagToI64 :: Word64 -> Int64 |
| 232 | zigZagToI64 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1) |
| 233 | |
| 234 | buildVarint :: (Bits a, Integral a) => a -> Builder |
| 235 | buildVarint n | n .&. complement 0x7F == 0 = B.word8 $ fromIntegral n |
| 236 | | otherwise = B.word8 (0x80 .|. (fromIntegral n .&. 0x7F)) <> |
| 237 | buildVarint (n `shiftR` 7) |
| 238 | |
| 239 | parseVarint :: (Bits a, Integral a, Ord a) => (a -> b) -> Parser b |
| 240 | parseVarint fromZigZag = do |
| 241 | bytestemp <- BS.unpack <$> P.takeTill (not . flip testBit 7) |
| 242 | lsb <- P.anyWord8 |
| 243 | let bytes = lsb : List.reverse bytestemp |
| 244 | return $ fromZigZag $ List.foldl' combine 0x00 bytes |
| 245 | where combine a b = (a `shiftL` 7) .|. (fromIntegral b .&. 0x7f) |
| 246 | |
| 247 | -- | Compute the Compact Type |
| 248 | fromTType :: ThriftType -> Word8 |
| 249 | fromTType ty = case ty of |
| 250 | T_STOP -> 0x00 |
| 251 | T_BOOL -> 0x01 |
| 252 | T_BYTE -> 0x03 |
| 253 | T_I16 -> 0x04 |
| 254 | T_I32 -> 0x05 |
| 255 | T_I64 -> 0x06 |
| 256 | T_DOUBLE -> 0x07 |
| 257 | T_STRING -> 0x08 |
| 258 | T_LIST{} -> 0x09 |
| 259 | T_SET{} -> 0x0A |
| 260 | T_MAP{} -> 0x0B |
| 261 | T_STRUCT{} -> 0x0C |
| 262 | T_VOID -> error "No Compact type for T_VOID" |
| 263 | |
| 264 | typeOf :: ThriftVal -> Word8 |
| 265 | typeOf v = case v of |
| 266 | TBool True -> 0x01 |
| 267 | TBool False -> 0x02 |
| 268 | TByte _ -> 0x03 |
| 269 | TI16 _ -> 0x04 |
| 270 | TI32 _ -> 0x05 |
| 271 | TI64 _ -> 0x06 |
| 272 | TDouble _ -> 0x07 |
| 273 | TString _ -> 0x08 |
| 274 | TList{} -> 0x09 |
| 275 | TSet{} -> 0x0A |
| 276 | TMap{} -> 0x0B |
| 277 | TStruct{} -> 0x0C |
| 278 | |
| 279 | typeFrom :: Word8 -> ThriftType |
| 280 | typeFrom w = case w of |
| 281 | 0x01 -> T_BOOL |
| 282 | 0x02 -> T_BOOL |
| 283 | 0x03 -> T_BYTE |
| 284 | 0x04 -> T_I16 |
| 285 | 0x05 -> T_I32 |
| 286 | 0x06 -> T_I64 |
| 287 | 0x07 -> T_DOUBLE |
| 288 | 0x08 -> T_STRING |
| 289 | 0x09 -> T_LIST T_VOID |
| 290 | 0x0A -> T_SET T_VOID |
| 291 | 0x0B -> T_MAP T_VOID T_VOID |
| 292 | 0x0C -> T_STRUCT Map.empty |
| 293 | n -> error $ "typeFrom: " ++ show n ++ " is not a compact type" |