Noam Zilberstein | af5d64a | 2014-07-31 15:44:13 -0700 | [diff] [blame] | 1 | -- |
| 2 | -- Licensed to the Apache Software Foundation (ASF) under one |
| 3 | -- or more contributor license agreements. See the NOTICE file |
| 4 | -- distributed with this work for additional information |
| 5 | -- regarding copyright ownership. The ASF licenses this file |
| 6 | -- to you under the Apache License, Version 2.0 (the |
| 7 | -- "License"); you may not use this file except in compliance |
| 8 | -- with the License. You may obtain a copy of the License at |
| 9 | -- |
| 10 | -- http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | -- |
| 12 | -- Unless required by applicable law or agreed to in writing, |
| 13 | -- software distributed under the License is distributed on an |
| 14 | -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | -- KIND, either express or implied. See the License for the |
| 16 | -- specific language governing permissions and limitations |
| 17 | -- under the License. |
| 18 | -- |
| 19 | |
| 20 | {-# LANGUAGE CPP #-} |
| 21 | {-# LANGUAGE ExistentialQuantification #-} |
| 22 | {-# LANGUAGE OverloadedStrings #-} |
| 23 | {-# LANGUAGE ScopedTypeVariables #-} |
| 24 | {-# LANGUAGE TupleSections #-} |
| 25 | |
| 26 | module Thrift.Protocol.JSON |
| 27 | ( module Thrift.Protocol |
| 28 | , JSONProtocol(..) |
| 29 | ) where |
| 30 | |
| 31 | import Control.Applicative |
| 32 | import Control.Monad |
| 33 | import Data.Attoparsec.ByteString as P |
| 34 | import Data.Attoparsec.ByteString.Char8 as PC |
| 35 | import Data.Attoparsec.ByteString.Lazy as LP |
| 36 | import Data.ByteString.Lazy.Builder as B |
| 37 | import Data.ByteString.Internal (c2w, w2c) |
| 38 | import Data.Functor |
| 39 | import Data.Int |
| 40 | import Data.List |
| 41 | import Data.Maybe (catMaybes) |
| 42 | import Data.Monoid |
| 43 | import Data.Text.Lazy.Encoding |
| 44 | import Data.Word |
| 45 | import qualified Data.HashMap.Strict as Map |
| 46 | |
| 47 | import Thrift.Protocol |
| 48 | import Thrift.Transport |
| 49 | import Thrift.Types |
| 50 | |
| 51 | import qualified Data.ByteString.Lazy as LBS |
| 52 | import qualified Data.Text.Lazy as LT |
| 53 | |
| 54 | -- | The JSON Protocol data uses the standard 'TSimpleJSONProtocol'. Data is |
| 55 | -- encoded as a JSON 'ByteString' |
| 56 | data JSONProtocol t = JSONProtocol t |
| 57 | -- ^ Construct a 'JSONProtocol' with a 'Transport' |
| 58 | |
| 59 | instance Protocol JSONProtocol where |
| 60 | getTransport (JSONProtocol t) = t |
| 61 | |
| 62 | writeMessageBegin (JSONProtocol t) (s, ty, sq) = tWrite t $ toLazyByteString $ |
| 63 | B.char8 '[' <> buildShowable (1 :: Int32) <> |
| 64 | B.string8 ",\"" <> escape (encodeUtf8 s) <> B.char8 '\"' <> |
| 65 | B.char8 ',' <> buildShowable (fromEnum ty) <> |
| 66 | B.char8 ',' <> buildShowable sq <> |
| 67 | B.char8 ',' |
| 68 | writeMessageEnd (JSONProtocol t) = tWrite t "]" |
| 69 | readMessageBegin p = runParser p $ skipSpace *> do |
| 70 | _ver :: Int32 <- lexeme (PC.char8 '[') *> lexeme (signed decimal) |
| 71 | bs <- lexeme (PC.char8 ',') *> lexeme escapedString |
| 72 | case decodeUtf8' bs of |
| 73 | Left _ -> fail "readMessage: invalid text encoding" |
| 74 | Right str -> do |
| 75 | ty <- toEnum <$> (lexeme (PC.char8 ',') *> lexeme (signed decimal)) |
| 76 | seqNum <- lexeme (PC.char8 ',') *> lexeme (signed decimal) |
| 77 | _ <- PC.char8 ',' |
| 78 | return (str, ty, seqNum) |
| 79 | readMessageEnd p = void $ runParser p (PC.char8 ']') |
| 80 | |
| 81 | serializeVal _ = toLazyByteString . buildJSONValue |
| 82 | deserializeVal _ ty bs = |
| 83 | case LP.eitherResult $ LP.parse (parseJSONValue ty) bs of |
| 84 | Left s -> error s |
| 85 | Right val -> val |
| 86 | |
| 87 | readVal p ty = runParser p $ skipSpace *> parseJSONValue ty |
| 88 | |
| 89 | |
| 90 | -- Writing Functions |
| 91 | |
| 92 | buildJSONValue :: ThriftVal -> Builder |
| 93 | buildJSONValue (TStruct fields) = B.char8 '{' <> buildJSONStruct fields <> B.char8 '}' |
| 94 | buildJSONValue (TMap kt vt entries) = |
| 95 | B.char8 '[' <> B.char8 '"' <> getTypeName kt <> B.char8 '"' <> |
| 96 | B.char8 ',' <> B.char8 '"' <> getTypeName vt <> B.char8 '"' <> |
| 97 | B.char8 ',' <> buildShowable (length entries) <> |
| 98 | B.char8 ',' <> B.char8 '{' <> buildJSONMap entries <> B.char8 '}' <> |
| 99 | B.char8 ']' |
| 100 | buildJSONValue (TList ty entries) = |
| 101 | B.char8 '[' <> B.char8 '"' <> getTypeName ty <> B.char8 '"' <> |
| 102 | B.char8 ',' <> buildShowable (length entries) <> |
| 103 | (if length entries > 0 |
| 104 | then B.char8 ',' <> buildJSONList entries |
| 105 | else mempty) <> |
| 106 | B.char8 ']' |
| 107 | buildJSONValue (TSet ty entries) = buildJSONValue (TList ty entries) |
| 108 | buildJSONValue (TBool b) = if b then B.string8 "true" else B.string8 "false" |
| 109 | buildJSONValue (TByte b) = buildShowable b |
| 110 | buildJSONValue (TI16 i) = buildShowable i |
| 111 | buildJSONValue (TI32 i) = buildShowable i |
| 112 | buildJSONValue (TI64 i) = buildShowable i |
| 113 | buildJSONValue (TDouble d) = buildShowable d |
| 114 | buildJSONValue (TString s) = B.char8 '\"' <> escape s <> B.char8 '\"' |
| 115 | |
| 116 | buildJSONStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder |
| 117 | buildJSONStruct = mconcat . intersperse (B.char8 ',') . Map.foldrWithKey buildField [] |
| 118 | where |
| 119 | buildField fid (_,val) = (:) $ |
| 120 | B.char8 '"' <> buildShowable fid <> B.string8 "\":" <> |
| 121 | B.char8 '{' <> |
| 122 | B.char8 '"' <> getTypeName (getTypeOf val) <> B.string8 "\":" <> |
| 123 | buildJSONValue val <> |
| 124 | B.char8 '}' |
| 125 | |
| 126 | buildJSONMap :: [(ThriftVal, ThriftVal)] -> Builder |
| 127 | buildJSONMap = mconcat . intersperse (B.char8 ',') . map buildKV |
| 128 | where |
| 129 | buildKV (key@(TString _), val) = |
| 130 | buildJSONValue key <> B.char8 ':' <> buildJSONValue val |
| 131 | buildKV (key, val) = |
| 132 | B.char8 '\"' <> buildJSONValue key <> B.string8 "\":" <> buildJSONValue val |
| 133 | buildJSONList :: [ThriftVal] -> Builder |
| 134 | buildJSONList = mconcat . intersperse (B.char8 ',') . map buildJSONValue |
| 135 | |
| 136 | buildShowable :: Show a => a -> Builder |
| 137 | buildShowable = B.string8 . show |
| 138 | |
| 139 | -- Reading Functions |
| 140 | |
| 141 | parseJSONValue :: ThriftType -> Parser ThriftVal |
| 142 | parseJSONValue (T_STRUCT tmap) = |
| 143 | TStruct <$> (lexeme (PC.char8 '{') *> parseJSONStruct tmap <* PC.char8 '}') |
| 144 | parseJSONValue (T_MAP kt vt) = fmap (TMap kt vt) $ |
| 145 | between '[' ']' $ |
| 146 | lexeme escapedString *> lexeme (PC.char8 ',') *> |
| 147 | lexeme escapedString *> lexeme (PC.char8 ',') *> |
| 148 | lexeme decimal *> lexeme (PC.char8 ',') *> |
| 149 | between '{' '}' (parseJSONMap kt vt) |
| 150 | parseJSONValue (T_LIST ty) = fmap (TList ty) $ |
| 151 | between '[' ']' $ do |
| 152 | len <- lexeme escapedString *> lexeme (PC.char8 ',') *> |
| 153 | lexeme decimal <* lexeme (PC.char8 ',') |
| 154 | if len > 0 then parseJSONList ty else return [] |
| 155 | parseJSONValue (T_SET ty) = fmap (TSet ty) $ |
| 156 | between '[' ']' $ do |
| 157 | len <- lexeme escapedString *> lexeme (PC.char8 ',') *> |
| 158 | lexeme decimal <* lexeme (PC.char8 ',') |
| 159 | if len > 0 then parseJSONList ty else return [] |
| 160 | parseJSONValue T_BOOL = |
| 161 | (TBool True <$ string "true") <|> (TBool False <$ string "false") |
| 162 | parseJSONValue T_BYTE = TByte <$> signed decimal |
| 163 | parseJSONValue T_I16 = TI16 <$> signed decimal |
| 164 | parseJSONValue T_I32 = TI32 <$> signed decimal |
| 165 | parseJSONValue T_I64 = TI64 <$> signed decimal |
| 166 | parseJSONValue T_DOUBLE = TDouble <$> double |
| 167 | parseJSONValue T_STRING = TString <$> escapedString |
| 168 | parseJSONValue T_STOP = fail "parseJSONValue: cannot parse type T_STOP" |
| 169 | parseJSONValue T_VOID = fail "parseJSONValue: cannot parse type T_VOID" |
| 170 | |
| 171 | parseAnyValue :: Parser () |
| 172 | parseAnyValue = choice $ |
| 173 | skipBetween '{' '}' : |
| 174 | skipBetween '[' ']' : |
| 175 | map (void . parseJSONValue) |
| 176 | [ T_BOOL |
| 177 | , T_I16 |
| 178 | , T_I32 |
| 179 | , T_I64 |
| 180 | , T_DOUBLE |
| 181 | , T_STRING |
| 182 | ] |
| 183 | where |
| 184 | skipBetween :: Char -> Char -> Parser () |
| 185 | skipBetween a b = between a b $ void (PC.satisfy (\c -> c /= a && c /= b)) |
| 186 | <|> skipBetween a b |
| 187 | |
| 188 | parseJSONStruct :: TypeMap -> Parser (Map.HashMap Int16 (LT.Text, ThriftVal)) |
| 189 | parseJSONStruct tmap = Map.fromList . catMaybes <$> parseField |
| 190 | `sepBy` lexeme (PC.char8 ',') |
| 191 | where |
| 192 | parseField = do |
| 193 | fid <- lexeme (between '"' '"' decimal) <* lexeme (PC.char8 ':') |
| 194 | case Map.lookup fid tmap of |
| 195 | Just (str, ftype) -> between '{' '}' $ do |
| 196 | _ <- lexeme (escapedString) *> lexeme (PC.char8 ':') |
| 197 | val <- lexeme (parseJSONValue ftype) |
| 198 | return $ Just (fid, (str, val)) |
| 199 | Nothing -> lexeme parseAnyValue *> return Nothing |
| 200 | |
| 201 | parseJSONMap :: ThriftType -> ThriftType -> Parser [(ThriftVal, ThriftVal)] |
| 202 | parseJSONMap kt vt = |
| 203 | ((,) <$> lexeme (PC.char8 '"' *> parseJSONValue kt <* PC.char8 '"') <*> |
| 204 | (lexeme (PC.char8 ':') *> lexeme (parseJSONValue vt))) `sepBy` |
| 205 | lexeme (PC.char8 ',') |
| 206 | |
| 207 | parseJSONList :: ThriftType -> Parser [ThriftVal] |
| 208 | parseJSONList ty = lexeme (parseJSONValue ty) `sepBy` lexeme (PC.char8 ',') |
| 209 | |
| 210 | escapedString :: Parser LBS.ByteString |
| 211 | escapedString = PC.char8 '"' *> |
| 212 | (LBS.pack <$> P.many' (escapedChar <|> notChar8 '"')) <* |
| 213 | PC.char8 '"' |
| 214 | |
| 215 | escapedChar :: Parser Word8 |
| 216 | escapedChar = PC.char8 '\\' *> (c2w <$> choice |
| 217 | [ '\SOH' <$ P.string "u0001" |
| 218 | , '\STX' <$ P.string "u0002" |
| 219 | , '\ETX' <$ P.string "u0003" |
| 220 | , '\EOT' <$ P.string "u0004" |
| 221 | , '\ENQ' <$ P.string "u0005" |
| 222 | , '\ACK' <$ P.string "u0006" |
| 223 | , '\BEL' <$ P.string "u0007" |
| 224 | , '\BS' <$ P.string "u0008" |
| 225 | , '\VT' <$ P.string "u000b" |
| 226 | , '\FF' <$ P.string "u000c" |
| 227 | , '\CR' <$ P.string "u000d" |
| 228 | , '\SO' <$ P.string "u000e" |
| 229 | , '\SI' <$ P.string "u000f" |
| 230 | , '\DLE' <$ P.string "u0010" |
| 231 | , '\DC1' <$ P.string "u0011" |
| 232 | , '\DC2' <$ P.string "u0012" |
| 233 | , '\DC3' <$ P.string "u0013" |
| 234 | , '\DC4' <$ P.string "u0014" |
| 235 | , '\NAK' <$ P.string "u0015" |
| 236 | , '\SYN' <$ P.string "u0016" |
| 237 | , '\ETB' <$ P.string "u0017" |
| 238 | , '\CAN' <$ P.string "u0018" |
| 239 | , '\EM' <$ P.string "u0019" |
| 240 | , '\SUB' <$ P.string "u001a" |
| 241 | , '\ESC' <$ P.string "u001b" |
| 242 | , '\FS' <$ P.string "u001c" |
| 243 | , '\GS' <$ P.string "u001d" |
| 244 | , '\RS' <$ P.string "u001e" |
| 245 | , '\US' <$ P.string "u001f" |
| 246 | , '\DEL' <$ P.string "u007f" |
| 247 | , '\0' <$ PC.char '0' |
| 248 | , '\a' <$ PC.char 'a' |
| 249 | , '\b' <$ PC.char 'b' |
| 250 | , '\f' <$ PC.char 'f' |
| 251 | , '\n' <$ PC.char 'n' |
| 252 | , '\r' <$ PC.char 'r' |
| 253 | , '\t' <$ PC.char 't' |
| 254 | , '\v' <$ PC.char 'v' |
| 255 | , '\"' <$ PC.char '"' |
| 256 | , '\'' <$ PC.char '\'' |
| 257 | , '\\' <$ PC.char '\\' |
| 258 | , '/' <$ PC.char '/' |
| 259 | ]) |
| 260 | |
| 261 | escape :: LBS.ByteString -> Builder |
| 262 | escape = LBS.foldl' escapeChar mempty |
| 263 | where |
| 264 | escapeChar b w = b <> (B.lazyByteString $ case w2c w of |
| 265 | '\0' -> "\\0" |
| 266 | '\b' -> "\\b" |
| 267 | '\f' -> "\\f" |
| 268 | '\n' -> "\\n" |
| 269 | '\r' -> "\\r" |
| 270 | '\t' -> "\\t" |
| 271 | '\"' -> "\\\"" |
| 272 | '\\' -> "\\\\" |
| 273 | '\SOH' -> "\\u0001" |
| 274 | '\STX' -> "\\u0002" |
| 275 | '\ETX' -> "\\u0003" |
| 276 | '\EOT' -> "\\u0004" |
| 277 | '\ENQ' -> "\\u0005" |
| 278 | '\ACK' -> "\\u0006" |
| 279 | '\BEL' -> "\\u0007" |
| 280 | '\VT' -> "\\u000b" |
| 281 | '\SO' -> "\\u000e" |
| 282 | '\SI' -> "\\u000f" |
| 283 | '\DLE' -> "\\u0010" |
| 284 | '\DC1' -> "\\u0011" |
| 285 | '\DC2' -> "\\u0012" |
| 286 | '\DC3' -> "\\u0013" |
| 287 | '\DC4' -> "\\u0014" |
| 288 | '\NAK' -> "\\u0015" |
| 289 | '\SYN' -> "\\u0016" |
| 290 | '\ETB' -> "\\u0017" |
| 291 | '\CAN' -> "\\u0018" |
| 292 | '\EM' -> "\\u0019" |
| 293 | '\SUB' -> "\\u001a" |
| 294 | '\ESC' -> "\\u001b" |
| 295 | '\FS' -> "\\u001c" |
| 296 | '\GS' -> "\\u001d" |
| 297 | '\RS' -> "\\u001e" |
| 298 | '\US' -> "\\u001f" |
| 299 | '\DEL' -> "\\u007f" |
| 300 | _ -> LBS.singleton w) |
| 301 | |
| 302 | lexeme :: Parser a -> Parser a |
| 303 | lexeme = (<* skipSpace) |
| 304 | |
| 305 | notChar8 :: Char -> Parser Word8 |
| 306 | notChar8 c = P.satisfy (/= c2w c) |
| 307 | |
| 308 | between :: Char -> Char -> Parser a -> Parser a |
| 309 | between a b p = lexeme (PC.char8 a) *> lexeme p <* lexeme (PC.char8 b) |
| 310 | |
| 311 | getTypeName :: ThriftType -> Builder |
| 312 | getTypeName ty = B.string8 $ case ty of |
| 313 | T_STRUCT _ -> "rec" |
| 314 | T_MAP _ _ -> "map" |
| 315 | T_LIST _ -> "lst" |
| 316 | T_SET _ -> "set" |
| 317 | T_BOOL -> "tf" |
| 318 | T_BYTE -> "i8" |
| 319 | T_I16 -> "i16" |
| 320 | T_I32 -> "i32" |
| 321 | T_I64 -> "i64" |
| 322 | T_DOUBLE -> "dbl" |
| 323 | T_STRING -> "str" |
| 324 | _ -> error "Unrecognized Type" |
| 325 | |