| Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 1 | -- | 
|  | 2 | -- Licensed to the Apache Software Foundation (ASF) under one | 
|  | 3 | -- or more contributor license agreements. See the NOTICE file | 
|  | 4 | -- distributed with this work for additional information | 
|  | 5 | -- regarding copyright ownership. The ASF licenses this file | 
|  | 6 | -- to you under the Apache License, Version 2.0 (the | 
|  | 7 | -- "License"); you may not use this file except in compliance | 
|  | 8 | -- with the License. You may obtain a copy of the License at | 
|  | 9 | -- | 
|  | 10 | --   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 11 | -- | 
|  | 12 | -- Unless required by applicable law or agreed to in writing, | 
|  | 13 | -- software distributed under the License is distributed on an | 
|  | 14 | -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | 15 | -- KIND, either express or implied. See the License for the | 
|  | 16 | -- specific language governing permissions and limitations | 
|  | 17 | -- under the License. | 
|  | 18 | -- | 
|  | 19 |  | 
|  | 20 | {-# LANGUAGE CPP #-} | 
|  | 21 | {-# LANGUAGE ExistentialQuantification #-} | 
|  | 22 | {-# LANGUAGE OverloadedStrings #-} | 
|  | 23 | {-# LANGUAGE ScopedTypeVariables #-} | 
|  | 24 | {-# LANGUAGE TupleSections #-} | 
|  | 25 |  | 
|  | 26 | module Thrift.Protocol.JSON | 
|  | 27 | ( module Thrift.Protocol | 
|  | 28 | , JSONProtocol(..) | 
|  | 29 | ) where | 
|  | 30 |  | 
|  | 31 | import Control.Applicative | 
|  | 32 | import Control.Monad | 
|  | 33 | import Data.Attoparsec.ByteString as P | 
|  | 34 | import Data.Attoparsec.ByteString.Char8 as PC | 
|  | 35 | import Data.Attoparsec.ByteString.Lazy as LP | 
|  | 36 | import Data.ByteString.Lazy.Builder as B | 
|  | 37 | import Data.ByteString.Internal (c2w, w2c) | 
|  | 38 | import Data.Functor | 
|  | 39 | import Data.Int | 
|  | 40 | import Data.List | 
|  | 41 | import Data.Maybe (catMaybes) | 
|  | 42 | import Data.Monoid | 
|  | 43 | import Data.Text.Lazy.Encoding | 
|  | 44 | import Data.Word | 
|  | 45 | import qualified Data.HashMap.Strict as Map | 
|  | 46 |  | 
|  | 47 | import Thrift.Protocol | 
|  | 48 | import Thrift.Transport | 
|  | 49 | import Thrift.Types | 
|  | 50 |  | 
|  | 51 | import qualified Data.ByteString.Lazy as LBS | 
|  | 52 | import qualified Data.Text.Lazy as LT | 
|  | 53 |  | 
|  | 54 | -- | The JSON Protocol data uses the standard 'TSimpleJSONProtocol'.  Data is | 
|  | 55 | -- encoded as a JSON 'ByteString' | 
|  | 56 | data JSONProtocol t = JSONProtocol t | 
|  | 57 | -- ^ Construct a 'JSONProtocol' with a 'Transport' | 
|  | 58 |  | 
|  | 59 | instance Protocol JSONProtocol where | 
|  | 60 | getTransport (JSONProtocol t) = t | 
|  | 61 |  | 
|  | 62 | writeMessageBegin (JSONProtocol t) (s, ty, sq) = tWrite t $ toLazyByteString $ | 
|  | 63 | B.char8 '[' <> buildShowable (1 :: Int32) <> | 
|  | 64 | B.string8 ",\"" <> escape (encodeUtf8 s) <> B.char8 '\"' <> | 
|  | 65 | B.char8 ',' <> buildShowable (fromEnum ty) <> | 
|  | 66 | B.char8 ',' <> buildShowable sq <> | 
|  | 67 | B.char8 ',' | 
|  | 68 | writeMessageEnd (JSONProtocol t) = tWrite t "]" | 
|  | 69 | readMessageBegin p = runParser p $ skipSpace *> do | 
|  | 70 | _ver :: Int32 <- lexeme (PC.char8 '[') *> lexeme (signed decimal) | 
|  | 71 | bs <- lexeme (PC.char8 ',') *> lexeme escapedString | 
|  | 72 | case decodeUtf8' bs of | 
|  | 73 | Left _ -> fail "readMessage: invalid text encoding" | 
|  | 74 | Right str -> do | 
|  | 75 | ty <- toEnum <$> (lexeme (PC.char8 ',') *> lexeme (signed decimal)) | 
|  | 76 | seqNum <- lexeme (PC.char8 ',') *> lexeme (signed decimal) | 
|  | 77 | _ <- PC.char8 ',' | 
|  | 78 | return (str, ty, seqNum) | 
|  | 79 | readMessageEnd p = void $ runParser p (PC.char8 ']') | 
|  | 80 |  | 
|  | 81 | serializeVal _ = toLazyByteString . buildJSONValue | 
|  | 82 | deserializeVal _ ty bs = | 
|  | 83 | case LP.eitherResult $ LP.parse (parseJSONValue ty) bs of | 
|  | 84 | Left s -> error s | 
|  | 85 | Right val -> val | 
|  | 86 |  | 
|  | 87 | readVal p ty = runParser p $ skipSpace *> parseJSONValue ty | 
|  | 88 |  | 
|  | 89 |  | 
|  | 90 | -- Writing Functions | 
|  | 91 |  | 
|  | 92 | buildJSONValue :: ThriftVal -> Builder | 
|  | 93 | buildJSONValue (TStruct fields) = B.char8 '{' <> buildJSONStruct fields <> B.char8 '}' | 
|  | 94 | buildJSONValue (TMap kt vt entries) = | 
|  | 95 | B.char8 '[' <> B.char8 '"' <> getTypeName kt <> B.char8 '"' <> | 
|  | 96 | B.char8 ',' <> B.char8 '"' <> getTypeName vt <> B.char8 '"' <> | 
|  | 97 | B.char8 ',' <> buildShowable (length entries) <> | 
|  | 98 | B.char8 ',' <> B.char8 '{' <> buildJSONMap entries <> B.char8 '}' <> | 
|  | 99 | B.char8 ']' | 
|  | 100 | buildJSONValue (TList ty entries) = | 
|  | 101 | B.char8 '[' <> B.char8 '"' <> getTypeName ty <> B.char8 '"' <> | 
|  | 102 | B.char8 ',' <> buildShowable (length entries) <> | 
|  | 103 | (if length entries > 0 | 
|  | 104 | then B.char8 ',' <> buildJSONList entries | 
|  | 105 | else mempty) <> | 
|  | 106 | B.char8 ']' | 
|  | 107 | buildJSONValue (TSet ty entries) = buildJSONValue (TList ty entries) | 
|  | 108 | buildJSONValue (TBool b) = if b then B.string8 "true" else B.string8 "false" | 
|  | 109 | buildJSONValue (TByte b) = buildShowable b | 
|  | 110 | buildJSONValue (TI16 i) = buildShowable i | 
|  | 111 | buildJSONValue (TI32 i) = buildShowable i | 
|  | 112 | buildJSONValue (TI64 i) = buildShowable i | 
|  | 113 | buildJSONValue (TDouble d) = buildShowable d | 
|  | 114 | buildJSONValue (TString s) = B.char8 '\"' <> escape s <> B.char8 '\"' | 
|  | 115 |  | 
|  | 116 | buildJSONStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder | 
|  | 117 | buildJSONStruct = mconcat . intersperse (B.char8 ',') . Map.foldrWithKey buildField [] | 
|  | 118 | where | 
|  | 119 | buildField fid (_,val) = (:) $ | 
|  | 120 | B.char8 '"' <> buildShowable fid <> B.string8 "\":" <> | 
|  | 121 | B.char8 '{' <> | 
|  | 122 | B.char8 '"' <> getTypeName (getTypeOf val) <> B.string8 "\":" <> | 
|  | 123 | buildJSONValue val <> | 
|  | 124 | B.char8 '}' | 
|  | 125 |  | 
|  | 126 | buildJSONMap :: [(ThriftVal, ThriftVal)] -> Builder | 
|  | 127 | buildJSONMap = mconcat . intersperse (B.char8 ',') . map buildKV | 
|  | 128 | where | 
|  | 129 | buildKV (key@(TString _), val) = | 
|  | 130 | buildJSONValue key <> B.char8 ':' <> buildJSONValue val | 
|  | 131 | buildKV (key, val) = | 
|  | 132 | B.char8 '\"' <> buildJSONValue key <> B.string8 "\":" <> buildJSONValue val | 
|  | 133 | buildJSONList :: [ThriftVal] -> Builder | 
|  | 134 | buildJSONList = mconcat . intersperse (B.char8 ',') . map buildJSONValue | 
|  | 135 |  | 
|  | 136 | buildShowable :: Show a => a ->  Builder | 
|  | 137 | buildShowable = B.string8 . show | 
|  | 138 |  | 
|  | 139 | -- Reading Functions | 
|  | 140 |  | 
|  | 141 | parseJSONValue :: ThriftType -> Parser ThriftVal | 
|  | 142 | parseJSONValue (T_STRUCT tmap) = | 
|  | 143 | TStruct <$> (lexeme (PC.char8 '{') *> parseJSONStruct tmap <* PC.char8 '}') | 
|  | 144 | parseJSONValue (T_MAP kt vt) = fmap (TMap kt vt) $ | 
|  | 145 | between '[' ']' $ | 
|  | 146 | lexeme escapedString *> lexeme (PC.char8 ',') *> | 
|  | 147 | lexeme escapedString *> lexeme (PC.char8 ',') *> | 
|  | 148 | lexeme decimal *> lexeme (PC.char8 ',') *> | 
|  | 149 | between '{' '}' (parseJSONMap kt vt) | 
|  | 150 | parseJSONValue (T_LIST ty) = fmap (TList ty) $ | 
|  | 151 | between '[' ']' $ do | 
|  | 152 | len <- lexeme escapedString *> lexeme (PC.char8 ',') *> | 
|  | 153 | lexeme decimal <* lexeme (PC.char8 ',') | 
|  | 154 | if len > 0 then parseJSONList ty else return [] | 
|  | 155 | parseJSONValue (T_SET ty) = fmap (TSet ty) $ | 
|  | 156 | between '[' ']' $ do | 
|  | 157 | len <- lexeme escapedString *> lexeme (PC.char8 ',') *> | 
|  | 158 | lexeme decimal <* lexeme (PC.char8 ',') | 
|  | 159 | if len > 0 then parseJSONList ty else return [] | 
|  | 160 | parseJSONValue T_BOOL = | 
|  | 161 | (TBool True <$ string "true") <|> (TBool False <$ string "false") | 
|  | 162 | parseJSONValue T_BYTE = TByte <$> signed decimal | 
|  | 163 | parseJSONValue T_I16 = TI16 <$> signed decimal | 
|  | 164 | parseJSONValue T_I32 = TI32 <$> signed decimal | 
|  | 165 | parseJSONValue T_I64 = TI64 <$> signed decimal | 
|  | 166 | parseJSONValue T_DOUBLE = TDouble <$> double | 
|  | 167 | parseJSONValue T_STRING = TString <$> escapedString | 
|  | 168 | parseJSONValue T_STOP = fail "parseJSONValue: cannot parse type T_STOP" | 
|  | 169 | parseJSONValue T_VOID = fail "parseJSONValue: cannot parse type T_VOID" | 
|  | 170 |  | 
|  | 171 | parseAnyValue :: Parser () | 
|  | 172 | parseAnyValue = choice $ | 
|  | 173 | skipBetween '{' '}' : | 
|  | 174 | skipBetween '[' ']' : | 
|  | 175 | map (void . parseJSONValue) | 
|  | 176 | [ T_BOOL | 
|  | 177 | , T_I16 | 
|  | 178 | , T_I32 | 
|  | 179 | , T_I64 | 
|  | 180 | , T_DOUBLE | 
|  | 181 | , T_STRING | 
|  | 182 | ] | 
|  | 183 | where | 
|  | 184 | skipBetween :: Char -> Char -> Parser () | 
|  | 185 | skipBetween a b = between a b $ void (PC.satisfy (\c -> c /= a && c /= b)) | 
|  | 186 | <|> skipBetween a b | 
|  | 187 |  | 
|  | 188 | parseJSONStruct :: TypeMap -> Parser (Map.HashMap Int16 (LT.Text, ThriftVal)) | 
|  | 189 | parseJSONStruct tmap = Map.fromList . catMaybes <$> parseField | 
|  | 190 | `sepBy` lexeme (PC.char8 ',') | 
|  | 191 | where | 
|  | 192 | parseField = do | 
|  | 193 | fid <- lexeme (between '"' '"' decimal) <* lexeme (PC.char8 ':') | 
|  | 194 | case Map.lookup fid tmap of | 
|  | 195 | Just (str, ftype) -> between '{' '}' $ do | 
|  | 196 | _ <- lexeme (escapedString) *> lexeme (PC.char8 ':') | 
|  | 197 | val <- lexeme (parseJSONValue ftype) | 
|  | 198 | return $ Just (fid, (str, val)) | 
|  | 199 | Nothing -> lexeme parseAnyValue *> return Nothing | 
|  | 200 |  | 
|  | 201 | parseJSONMap :: ThriftType -> ThriftType -> Parser [(ThriftVal, ThriftVal)] | 
|  | 202 | parseJSONMap kt vt = | 
|  | 203 | ((,) <$> lexeme (PC.char8 '"' *> parseJSONValue kt <* PC.char8 '"') <*> | 
|  | 204 | (lexeme (PC.char8 ':') *> lexeme (parseJSONValue vt))) `sepBy` | 
|  | 205 | lexeme (PC.char8 ',') | 
|  | 206 |  | 
|  | 207 | parseJSONList :: ThriftType -> Parser [ThriftVal] | 
|  | 208 | parseJSONList ty = lexeme (parseJSONValue ty) `sepBy` lexeme (PC.char8 ',') | 
|  | 209 |  | 
|  | 210 | escapedString :: Parser LBS.ByteString | 
|  | 211 | escapedString = PC.char8 '"' *> | 
|  | 212 | (LBS.pack <$> P.many' (escapedChar <|> notChar8 '"')) <* | 
|  | 213 | PC.char8 '"' | 
|  | 214 |  | 
|  | 215 | escapedChar :: Parser Word8 | 
|  | 216 | escapedChar = PC.char8 '\\' *> (c2w <$> choice | 
|  | 217 | [ '\SOH' <$ P.string "u0001" | 
|  | 218 | , '\STX' <$ P.string "u0002" | 
|  | 219 | , '\ETX' <$ P.string "u0003" | 
|  | 220 | , '\EOT' <$ P.string "u0004" | 
|  | 221 | , '\ENQ' <$ P.string "u0005" | 
|  | 222 | , '\ACK' <$ P.string "u0006" | 
|  | 223 | , '\BEL' <$ P.string "u0007" | 
|  | 224 | , '\BS'  <$ P.string "u0008" | 
|  | 225 | , '\VT'  <$ P.string "u000b" | 
|  | 226 | , '\FF'  <$ P.string "u000c" | 
|  | 227 | , '\CR'  <$ P.string "u000d" | 
|  | 228 | , '\SO'  <$ P.string "u000e" | 
|  | 229 | , '\SI'  <$ P.string "u000f" | 
|  | 230 | , '\DLE' <$ P.string "u0010" | 
|  | 231 | , '\DC1' <$ P.string "u0011" | 
|  | 232 | , '\DC2' <$ P.string "u0012" | 
|  | 233 | , '\DC3' <$ P.string "u0013" | 
|  | 234 | , '\DC4' <$ P.string "u0014" | 
|  | 235 | , '\NAK' <$ P.string "u0015" | 
|  | 236 | , '\SYN' <$ P.string "u0016" | 
|  | 237 | , '\ETB' <$ P.string "u0017" | 
|  | 238 | , '\CAN' <$ P.string "u0018" | 
|  | 239 | , '\EM'  <$ P.string "u0019" | 
|  | 240 | , '\SUB' <$ P.string "u001a" | 
|  | 241 | , '\ESC' <$ P.string "u001b" | 
|  | 242 | , '\FS'  <$ P.string "u001c" | 
|  | 243 | , '\GS'  <$ P.string "u001d" | 
|  | 244 | , '\RS'  <$ P.string "u001e" | 
|  | 245 | , '\US'  <$ P.string "u001f" | 
|  | 246 | , '\DEL' <$ P.string "u007f" | 
|  | 247 | , '\0' <$ PC.char '0' | 
|  | 248 | , '\a' <$ PC.char 'a' | 
|  | 249 | , '\b' <$ PC.char 'b' | 
|  | 250 | , '\f' <$ PC.char 'f' | 
|  | 251 | , '\n' <$ PC.char 'n' | 
|  | 252 | , '\r' <$ PC.char 'r' | 
|  | 253 | , '\t' <$ PC.char 't' | 
|  | 254 | , '\v' <$ PC.char 'v' | 
|  | 255 | , '\"' <$ PC.char '"' | 
|  | 256 | , '\'' <$ PC.char '\'' | 
|  | 257 | , '\\' <$ PC.char '\\' | 
|  | 258 | , '/'  <$ PC.char '/' | 
|  | 259 | ]) | 
|  | 260 |  | 
|  | 261 | escape :: LBS.ByteString -> Builder | 
|  | 262 | escape = LBS.foldl' escapeChar mempty | 
|  | 263 | where | 
|  | 264 | escapeChar b w = b <> (B.lazyByteString $ case w2c w of | 
|  | 265 | '\0' -> "\\0" | 
|  | 266 | '\b' -> "\\b" | 
|  | 267 | '\f' -> "\\f" | 
|  | 268 | '\n' -> "\\n" | 
|  | 269 | '\r' -> "\\r" | 
|  | 270 | '\t' -> "\\t" | 
|  | 271 | '\"' -> "\\\"" | 
|  | 272 | '\\' -> "\\\\" | 
|  | 273 | '\SOH' -> "\\u0001" | 
|  | 274 | '\STX' -> "\\u0002" | 
|  | 275 | '\ETX' -> "\\u0003" | 
|  | 276 | '\EOT' -> "\\u0004" | 
|  | 277 | '\ENQ' -> "\\u0005" | 
|  | 278 | '\ACK' -> "\\u0006" | 
|  | 279 | '\BEL' -> "\\u0007" | 
|  | 280 | '\VT'  -> "\\u000b" | 
|  | 281 | '\SO'  -> "\\u000e" | 
|  | 282 | '\SI'  -> "\\u000f" | 
|  | 283 | '\DLE' -> "\\u0010" | 
|  | 284 | '\DC1' -> "\\u0011" | 
|  | 285 | '\DC2' -> "\\u0012" | 
|  | 286 | '\DC3' -> "\\u0013" | 
|  | 287 | '\DC4' -> "\\u0014" | 
|  | 288 | '\NAK' -> "\\u0015" | 
|  | 289 | '\SYN' -> "\\u0016" | 
|  | 290 | '\ETB' -> "\\u0017" | 
|  | 291 | '\CAN' -> "\\u0018" | 
|  | 292 | '\EM'  -> "\\u0019" | 
|  | 293 | '\SUB' -> "\\u001a" | 
|  | 294 | '\ESC' -> "\\u001b" | 
|  | 295 | '\FS'  -> "\\u001c" | 
|  | 296 | '\GS'  -> "\\u001d" | 
|  | 297 | '\RS'  -> "\\u001e" | 
|  | 298 | '\US'  -> "\\u001f" | 
|  | 299 | '\DEL' -> "\\u007f" | 
|  | 300 | _ -> LBS.singleton w) | 
|  | 301 |  | 
|  | 302 | lexeme :: Parser a -> Parser a | 
|  | 303 | lexeme = (<* skipSpace) | 
|  | 304 |  | 
|  | 305 | notChar8 :: Char -> Parser Word8 | 
|  | 306 | notChar8 c = P.satisfy (/= c2w c) | 
|  | 307 |  | 
|  | 308 | between :: Char -> Char -> Parser a -> Parser a | 
|  | 309 | between a b p = lexeme (PC.char8 a) *> lexeme p <* lexeme (PC.char8 b) | 
|  | 310 |  | 
|  | 311 | getTypeName :: ThriftType -> Builder | 
|  | 312 | getTypeName ty = B.string8 $ case ty of | 
|  | 313 | T_STRUCT _ -> "rec" | 
|  | 314 | T_MAP _ _  -> "map" | 
|  | 315 | T_LIST _   -> "lst" | 
|  | 316 | T_SET _    -> "set" | 
|  | 317 | T_BOOL     -> "tf" | 
|  | 318 | T_BYTE     -> "i8" | 
|  | 319 | T_I16      -> "i16" | 
|  | 320 | T_I32      -> "i32" | 
|  | 321 | T_I64      -> "i64" | 
|  | 322 | T_DOUBLE   -> "dbl" | 
|  | 323 | T_STRING   -> "str" | 
|  | 324 | _ -> error "Unrecognized Type" | 
|  | 325 |  |