| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 1 | -- | 
|  | 2 | -- Licensed to the Apache Software Foundation (ASF) under one | 
|  | 3 | -- or more contributor license agreements. See the NOTICE file | 
|  | 4 | -- distributed with this work for additional information | 
|  | 5 | -- regarding copyright ownership. The ASF licenses this file | 
|  | 6 | -- to you under the Apache License, Version 2.0 (the | 
|  | 7 | -- "License"); you may not use this file except in compliance | 
|  | 8 | -- with the License. You may obtain a copy of the License at | 
|  | 9 | -- | 
|  | 10 | --   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 11 | -- | 
|  | 12 | -- Unless required by applicable law or agreed to in writing, | 
|  | 13 | -- software distributed under the License is distributed on an | 
|  | 14 | -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | 15 | -- KIND, either express or implied. See the License for the | 
|  | 16 | -- specific language governing permissions and limitations | 
|  | 17 | -- under the License. | 
|  | 18 | -- | 
|  | 19 |  | 
|  | 20 | {-# LANGUAGE CPP #-} | 
|  | 21 | {-# LANGUAGE ExistentialQuantification #-} | 
|  | 22 | {-# LANGUAGE OverloadedStrings #-} | 
|  | 23 | {-# LANGUAGE ScopedTypeVariables #-} | 
|  | 24 |  | 
|  | 25 | module Thrift.Protocol.Compact | 
|  | 26 | ( module Thrift.Protocol | 
|  | 27 | , CompactProtocol(..) | 
|  | 28 | ) where | 
|  | 29 |  | 
|  | 30 | import Control.Applicative | 
|  | 31 | import Control.Exception ( throw ) | 
|  | 32 | import Control.Monad | 
|  | 33 | import Data.Attoparsec.ByteString as P | 
|  | 34 | import Data.Attoparsec.ByteString.Lazy as LP | 
|  | 35 | import Data.Bits | 
|  | 36 | import Data.ByteString.Lazy.Builder as B | 
|  | 37 | import Data.Int | 
|  | 38 | import Data.List as List | 
|  | 39 | import Data.Monoid | 
|  | 40 | import Data.Word | 
|  | 41 | import Data.Text.Lazy.Encoding ( decodeUtf8, encodeUtf8 ) | 
|  | 42 |  | 
|  | 43 | import Thrift.Protocol hiding (versionMask) | 
|  | 44 | import Thrift.Transport | 
|  | 45 | import Thrift.Types | 
|  | 46 |  | 
|  | 47 | import qualified Data.ByteString as BS | 
|  | 48 | import qualified Data.ByteString.Lazy as LBS | 
|  | 49 | import qualified Data.HashMap.Strict as Map | 
|  | 50 | import qualified Data.Text.Lazy as LT | 
|  | 51 |  | 
|  | 52 | -- | the Compact Protocol implements the standard Thrift 'TCompactProcotol' | 
|  | 53 | -- which is similar to the 'TBinaryProtocol', but takes less space on the wire. | 
|  | 54 | -- Integral types are encoded using as varints. | 
|  | 55 | data CompactProtocol a = CompactProtocol a | 
|  | 56 | -- ^ Constuct a 'CompactProtocol' with a 'Transport' | 
|  | 57 |  | 
| Nobuaki Sukegawa | 7c7d679 | 2015-12-09 03:22:35 +0900 | [diff] [blame] | 58 | protocolID, version, versionMask, typeMask, typeBits :: Word8 | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 59 | protocolID  = 0x82 -- 1000 0010 | 
|  | 60 | version     = 0x01 | 
|  | 61 | versionMask = 0x1f -- 0001 1111 | 
|  | 62 | typeMask    = 0xe0 -- 1110 0000 | 
| Jens Geyer | a86886e | 2014-09-17 22:25:48 +0200 | [diff] [blame] | 63 | typeBits    = 0x07 -- 0000 0111 | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 64 | typeShiftAmount :: Int | 
|  | 65 | typeShiftAmount = 5 | 
|  | 66 |  | 
|  | 67 |  | 
|  | 68 | instance Protocol CompactProtocol where | 
|  | 69 | getTransport (CompactProtocol t) = t | 
|  | 70 |  | 
|  | 71 | writeMessageBegin p (n, t, s) = tWrite (getTransport p) $ toLazyByteString $ | 
| Nobuaki Sukegawa | 7c7d679 | 2015-12-09 03:22:35 +0900 | [diff] [blame] | 72 | B.word8 protocolID <> | 
|  | 73 | B.word8 ((version .&. versionMask) .|. | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 74 | (((fromIntegral $ fromEnum t) `shiftL` | 
|  | 75 | typeShiftAmount) .&. typeMask)) <> | 
|  | 76 | buildVarint (i32ToZigZag s) <> | 
|  | 77 | buildCompactValue (TString $ encodeUtf8 n) | 
|  | 78 |  | 
|  | 79 | readMessageBegin p = runParser p $ do | 
|  | 80 | pid <- fromIntegral <$> P.anyWord8 | 
|  | 81 | when (pid /= protocolID) $ error "Bad Protocol ID" | 
|  | 82 | w <- fromIntegral <$> P.anyWord8 | 
|  | 83 | let ver = w .&. versionMask | 
|  | 84 | when (ver /= version) $ error "Bad Protocol version" | 
| Jens Geyer | a86886e | 2014-09-17 22:25:48 +0200 | [diff] [blame] | 85 | let typ = (w `shiftR` typeShiftAmount) .&. typeBits | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 86 | seqId <- parseVarint zigZagToI32 | 
|  | 87 | TString name <- parseCompactValue T_STRING | 
|  | 88 | return (decodeUtf8 name, toEnum $ fromIntegral $ typ, seqId) | 
|  | 89 |  | 
|  | 90 | serializeVal _ = toLazyByteString . buildCompactValue | 
|  | 91 | deserializeVal _ ty bs = | 
|  | 92 | case LP.eitherResult $ LP.parse (parseCompactValue ty) bs of | 
|  | 93 | Left s -> error s | 
|  | 94 | Right val -> val | 
|  | 95 |  | 
|  | 96 | readVal p ty = runParser p $ parseCompactValue ty | 
|  | 97 |  | 
|  | 98 |  | 
|  | 99 | -- | Writing Functions | 
|  | 100 | buildCompactValue :: ThriftVal -> Builder | 
|  | 101 | buildCompactValue (TStruct fields) = buildCompactStruct fields | 
|  | 102 | buildCompactValue (TMap kt vt entries) = | 
|  | 103 | let len = fromIntegral $ length entries :: Word32 in | 
|  | 104 | if len == 0 | 
|  | 105 | then B.word8 0x00 | 
|  | 106 | else buildVarint len <> | 
|  | 107 | B.word8 (fromTType kt `shiftL` 4 .|. fromTType vt) <> | 
|  | 108 | buildCompactMap entries | 
|  | 109 | buildCompactValue (TList ty entries) = | 
|  | 110 | let len = length entries in | 
|  | 111 | (if len < 15 | 
|  | 112 | then B.word8 $ (fromIntegral len `shiftL` 4) .|. fromTType ty | 
|  | 113 | else B.word8 (0xF0 .|. fromTType ty) <> | 
|  | 114 | buildVarint (fromIntegral len :: Word32)) <> | 
|  | 115 | buildCompactList entries | 
|  | 116 | buildCompactValue (TSet ty entries) = buildCompactValue (TList ty entries) | 
|  | 117 | buildCompactValue (TBool b) = | 
|  | 118 | B.word8 $ toEnum $ if b then 1 else 0 | 
|  | 119 | buildCompactValue (TByte b) = int8 b | 
|  | 120 | buildCompactValue (TI16 i) = buildVarint $ i16ToZigZag i | 
|  | 121 | buildCompactValue (TI32 i) = buildVarint $ i32ToZigZag i | 
|  | 122 | buildCompactValue (TI64 i) = buildVarint $ i64ToZigZag i | 
| Nobuaki Sukegawa | 7c7d679 | 2015-12-09 03:22:35 +0900 | [diff] [blame] | 123 | buildCompactValue (TDouble d) = doubleLE d | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 124 | buildCompactValue (TString s) = buildVarint len <> lazyByteString s | 
|  | 125 | where | 
|  | 126 | len = fromIntegral (LBS.length s) :: Word32 | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 127 | buildCompactValue (TBinary s) = buildCompactValue (TString s) | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 128 |  | 
|  | 129 | buildCompactStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder | 
|  | 130 | buildCompactStruct = flip (loop 0) mempty . Map.toList | 
|  | 131 | where | 
|  | 132 | loop _ [] acc = acc <> B.word8 (fromTType T_STOP) | 
|  | 133 | loop lastId ((fid, (_,val)) : fields) acc = loop fid fields $ acc <> | 
|  | 134 | (if fid > lastId && fid - lastId <= 15 | 
|  | 135 | then B.word8 $ fromIntegral ((fid - lastId) `shiftL` 4) .|. typeOf val | 
|  | 136 | else B.word8 (typeOf val) <> buildVarint (i16ToZigZag fid)) <> | 
|  | 137 | (if typeOf val > 0x02 -- Not a T_BOOL | 
|  | 138 | then buildCompactValue val | 
|  | 139 | else mempty) -- T_BOOLs are encoded in the type | 
|  | 140 | buildCompactMap :: [(ThriftVal, ThriftVal)] -> Builder | 
|  | 141 | buildCompactMap = foldl combine mempty | 
|  | 142 | where | 
|  | 143 | combine s (key, val) = buildCompactValue key <> buildCompactValue val <> s | 
|  | 144 |  | 
|  | 145 | buildCompactList :: [ThriftVal] -> Builder | 
|  | 146 | buildCompactList = foldr (mappend . buildCompactValue) mempty | 
|  | 147 |  | 
|  | 148 | -- | Reading Functions | 
|  | 149 | parseCompactValue :: ThriftType -> Parser ThriftVal | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 150 | parseCompactValue (T_STRUCT tmap) = TStruct <$> parseCompactStruct tmap | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 151 | parseCompactValue (T_MAP kt' vt') = do | 
|  | 152 | n <- parseVarint id | 
|  | 153 | if n == 0 | 
|  | 154 | then return $ TMap kt' vt' [] | 
|  | 155 | else do | 
|  | 156 | w <- P.anyWord8 | 
|  | 157 | let kt = typeFrom $ w `shiftR` 4 | 
|  | 158 | vt = typeFrom $ w .&. 0x0F | 
|  | 159 | TMap kt vt <$> parseCompactMap kt vt n | 
|  | 160 | parseCompactValue (T_LIST ty) = TList ty <$> parseCompactList | 
|  | 161 | parseCompactValue (T_SET ty) = TSet ty <$> parseCompactList | 
|  | 162 | parseCompactValue T_BOOL = TBool . (/=0) <$> P.anyWord8 | 
|  | 163 | parseCompactValue T_BYTE = TByte . fromIntegral <$> P.anyWord8 | 
|  | 164 | parseCompactValue T_I16 = TI16 <$> parseVarint zigZagToI16 | 
|  | 165 | parseCompactValue T_I32 = TI32 <$> parseVarint zigZagToI32 | 
|  | 166 | parseCompactValue T_I64 = TI64 <$> parseVarint zigZagToI64 | 
| Nobuaki Sukegawa | 7c7d679 | 2015-12-09 03:22:35 +0900 | [diff] [blame] | 167 | parseCompactValue T_DOUBLE = TDouble . bsToDoubleLE <$> P.take 8 | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 168 | parseCompactValue T_STRING = parseCompactString TString | 
|  | 169 | parseCompactValue T_BINARY = parseCompactString TBinary | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 170 | parseCompactValue ty = error $ "Cannot read value of type " ++ show ty | 
|  | 171 |  | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 172 | parseCompactString ty = do | 
|  | 173 | len :: Word32 <- parseVarint id | 
|  | 174 | ty . LBS.fromStrict <$> P.take (fromIntegral len) | 
|  | 175 |  | 
|  | 176 | parseCompactStruct :: TypeMap -> Parser (Map.HashMap Int16 (LT.Text, ThriftVal)) | 
|  | 177 | parseCompactStruct tmap = Map.fromList <$> parseFields 0 | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 178 | where | 
|  | 179 | parseFields :: Int16 -> Parser [(Int16, (LT.Text, ThriftVal))] | 
|  | 180 | parseFields lastId = do | 
|  | 181 | w <- P.anyWord8 | 
|  | 182 | if w == 0x00 | 
|  | 183 | then return [] | 
|  | 184 | else do | 
|  | 185 | let ty = typeFrom (w .&. 0x0F) | 
|  | 186 | modifier = (w .&. 0xF0) `shiftR` 4 | 
|  | 187 | fid <- if modifier /= 0 | 
|  | 188 | then return (lastId + fromIntegral modifier) | 
|  | 189 | else parseVarint zigZagToI16 | 
|  | 190 | val <- if ty == T_BOOL | 
|  | 191 | then return (TBool $ (w .&. 0x0F) == 0x01) | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 192 | else case (ty, Map.lookup fid tmap) of | 
|  | 193 | (T_STRING, Just (_, T_BINARY)) -> parseCompactValue T_BINARY | 
|  | 194 | _ -> parseCompactValue ty | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 195 | ((fid, (LT.empty, val)) : ) <$> parseFields fid | 
|  | 196 |  | 
|  | 197 | parseCompactMap :: ThriftType -> ThriftType -> Int32 -> | 
|  | 198 | Parser [(ThriftVal, ThriftVal)] | 
|  | 199 | parseCompactMap kt vt n | n <= 0 = return [] | 
|  | 200 | | otherwise = do | 
|  | 201 | k <- parseCompactValue kt | 
|  | 202 | v <- parseCompactValue vt | 
|  | 203 | ((k,v) :) <$> parseCompactMap kt vt (n-1) | 
|  | 204 |  | 
|  | 205 | parseCompactList :: Parser [ThriftVal] | 
|  | 206 | parseCompactList = do | 
|  | 207 | w <- P.anyWord8 | 
|  | 208 | let ty = typeFrom $ w .&. 0x0F | 
|  | 209 | lsize = w `shiftR` 4 | 
|  | 210 | size <- if lsize == 0xF | 
|  | 211 | then parseVarint id | 
|  | 212 | else return $ fromIntegral lsize | 
|  | 213 | loop ty size | 
|  | 214 | where | 
|  | 215 | loop :: ThriftType -> Int32 -> Parser [ThriftVal] | 
|  | 216 | loop ty n | n <= 0 = return [] | 
|  | 217 | | otherwise = liftM2 (:) (parseCompactValue ty) | 
|  | 218 | (loop ty (n-1)) | 
|  | 219 |  | 
|  | 220 | -- Signed numbers must be converted to "Zig Zag" format before they can be | 
|  | 221 | -- serialized in the Varint format | 
|  | 222 | i16ToZigZag :: Int16 -> Word16 | 
|  | 223 | i16ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 15) | 
|  | 224 |  | 
|  | 225 | zigZagToI16 :: Word16 -> Int16 | 
|  | 226 | zigZagToI16 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1) | 
|  | 227 |  | 
|  | 228 | i32ToZigZag :: Int32 -> Word32 | 
|  | 229 | i32ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 31) | 
|  | 230 |  | 
|  | 231 | zigZagToI32 :: Word32 -> Int32 | 
|  | 232 | zigZagToI32 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1) | 
|  | 233 |  | 
|  | 234 | i64ToZigZag :: Int64 -> Word64 | 
|  | 235 | i64ToZigZag n = fromIntegral $ (n `shiftL` 1) `xor` (n `shiftR` 63) | 
|  | 236 |  | 
|  | 237 | zigZagToI64 :: Word64 -> Int64 | 
|  | 238 | zigZagToI64 n = fromIntegral $ (n `shiftR` 1) `xor` negate (n .&. 0x1) | 
|  | 239 |  | 
|  | 240 | buildVarint :: (Bits a, Integral a)  => a -> Builder | 
|  | 241 | buildVarint n | n .&. complement 0x7F == 0 = B.word8 $ fromIntegral n | 
|  | 242 | | otherwise = B.word8 (0x80 .|. (fromIntegral n .&. 0x7F)) <> | 
|  | 243 | buildVarint (n `shiftR` 7) | 
|  | 244 |  | 
|  | 245 | parseVarint :: (Bits a, Integral a, Ord a) => (a -> b) -> Parser b | 
|  | 246 | parseVarint fromZigZag = do | 
|  | 247 | bytestemp <- BS.unpack <$> P.takeTill (not . flip testBit 7) | 
|  | 248 | lsb <- P.anyWord8 | 
|  | 249 | let bytes = lsb : List.reverse bytestemp | 
|  | 250 | return $ fromZigZag $ List.foldl' combine 0x00 bytes | 
|  | 251 | where combine a b = (a `shiftL` 7) .|. (fromIntegral b .&. 0x7f) | 
|  | 252 |  | 
|  | 253 | -- | Compute the Compact Type | 
|  | 254 | fromTType :: ThriftType -> Word8 | 
|  | 255 | fromTType ty = case ty of | 
|  | 256 | T_STOP -> 0x00 | 
|  | 257 | T_BOOL -> 0x01 | 
|  | 258 | T_BYTE -> 0x03 | 
|  | 259 | T_I16 -> 0x04 | 
|  | 260 | T_I32 -> 0x05 | 
|  | 261 | T_I64 -> 0x06 | 
|  | 262 | T_DOUBLE -> 0x07 | 
|  | 263 | T_STRING -> 0x08 | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 264 | T_BINARY -> 0x08 | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 265 | T_LIST{} -> 0x09 | 
|  | 266 | T_SET{} -> 0x0A | 
|  | 267 | T_MAP{} -> 0x0B | 
|  | 268 | T_STRUCT{} -> 0x0C | 
|  | 269 | T_VOID -> error "No Compact type for T_VOID" | 
|  | 270 |  | 
|  | 271 | typeOf :: ThriftVal -> Word8 | 
|  | 272 | typeOf v = case v of | 
|  | 273 | TBool True -> 0x01 | 
|  | 274 | TBool False -> 0x02 | 
|  | 275 | TByte _ -> 0x03 | 
|  | 276 | TI16 _ -> 0x04 | 
|  | 277 | TI32 _ -> 0x05 | 
|  | 278 | TI64 _ -> 0x06 | 
|  | 279 | TDouble _ -> 0x07 | 
|  | 280 | TString _ -> 0x08 | 
| Nobuaki Sukegawa | e68ccc2 | 2015-12-13 21:45:39 +0900 | [diff] [blame] | 281 | TBinary _ -> 0x08 | 
| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 282 | TList{} -> 0x09 | 
|  | 283 | TSet{} -> 0x0A | 
|  | 284 | TMap{} -> 0x0B | 
|  | 285 | TStruct{} -> 0x0C | 
|  | 286 |  | 
|  | 287 | typeFrom :: Word8 -> ThriftType | 
|  | 288 | typeFrom w = case w of | 
|  | 289 | 0x01 -> T_BOOL | 
|  | 290 | 0x02 -> T_BOOL | 
|  | 291 | 0x03 -> T_BYTE | 
|  | 292 | 0x04 -> T_I16 | 
|  | 293 | 0x05 -> T_I32 | 
|  | 294 | 0x06 -> T_I64 | 
|  | 295 | 0x07 -> T_DOUBLE | 
|  | 296 | 0x08 -> T_STRING | 
|  | 297 | 0x09 -> T_LIST T_VOID | 
|  | 298 | 0x0A -> T_SET T_VOID | 
|  | 299 | 0x0B -> T_MAP T_VOID T_VOID | 
|  | 300 | 0x0C -> T_STRUCT Map.empty | 
|  | 301 | n -> error $ "typeFrom: " ++ show n ++ " is not a compact type" |