THRIFT-2641 Improvements to Haskell Compiler/Libraries

- test/test.sh integration
- add json and compact protocol

This closes #175

Signed-off-by: Roger Meier <roger@apache.org>
diff --git a/lib/hs/src/Thrift/Protocol/JSON.hs b/lib/hs/src/Thrift/Protocol/JSON.hs
new file mode 100644
index 0000000..f378ea2
--- /dev/null
+++ b/lib/hs/src/Thrift/Protocol/JSON.hs
@@ -0,0 +1,325 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+--
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
+
+module Thrift.Protocol.JSON
+    ( module Thrift.Protocol
+    , JSONProtocol(..)
+    ) where
+
+import Control.Applicative
+import Control.Monad
+import Data.Attoparsec.ByteString as P
+import Data.Attoparsec.ByteString.Char8 as PC
+import Data.Attoparsec.ByteString.Lazy as LP
+import Data.ByteString.Lazy.Builder as B
+import Data.ByteString.Internal (c2w, w2c)
+import Data.Functor
+import Data.Int
+import Data.List
+import Data.Maybe (catMaybes)
+import Data.Monoid
+import Data.Text.Lazy.Encoding
+import Data.Word
+import qualified Data.HashMap.Strict as Map
+
+import Thrift.Protocol
+import Thrift.Transport
+import Thrift.Types
+
+import qualified Data.ByteString.Lazy as LBS
+import qualified Data.Text.Lazy as LT
+
+-- | The JSON Protocol data uses the standard 'TSimpleJSONProtocol'.  Data is
+-- encoded as a JSON 'ByteString'
+data JSONProtocol t = JSONProtocol t
+                      -- ^ Construct a 'JSONProtocol' with a 'Transport'
+
+instance Protocol JSONProtocol where
+    getTransport (JSONProtocol t) = t
+
+    writeMessageBegin (JSONProtocol t) (s, ty, sq) = tWrite t $ toLazyByteString $
+      B.char8 '[' <> buildShowable (1 :: Int32) <>
+      B.string8 ",\"" <> escape (encodeUtf8 s) <> B.char8 '\"' <>
+      B.char8 ',' <> buildShowable (fromEnum ty) <>
+      B.char8 ',' <> buildShowable sq <>
+      B.char8 ','
+    writeMessageEnd (JSONProtocol t) = tWrite t "]"
+    readMessageBegin p = runParser p $ skipSpace *> do
+      _ver :: Int32 <- lexeme (PC.char8 '[') *> lexeme (signed decimal)
+      bs <- lexeme (PC.char8 ',') *> lexeme escapedString
+      case decodeUtf8' bs of
+        Left _ -> fail "readMessage: invalid text encoding"
+        Right str -> do
+          ty <- toEnum <$> (lexeme (PC.char8 ',') *> lexeme (signed decimal))
+          seqNum <- lexeme (PC.char8 ',') *> lexeme (signed decimal)
+          _ <- PC.char8 ','
+          return (str, ty, seqNum)
+    readMessageEnd p = void $ runParser p (PC.char8 ']')
+
+    serializeVal _ = toLazyByteString . buildJSONValue
+    deserializeVal _ ty bs =
+      case LP.eitherResult $ LP.parse (parseJSONValue ty) bs of
+        Left s -> error s
+        Right val -> val
+
+    readVal p ty = runParser p $ skipSpace *> parseJSONValue ty
+
+
+-- Writing Functions
+
+buildJSONValue :: ThriftVal -> Builder
+buildJSONValue (TStruct fields) = B.char8 '{' <> buildJSONStruct fields <> B.char8 '}'
+buildJSONValue (TMap kt vt entries) =
+  B.char8 '[' <> B.char8 '"' <> getTypeName kt <> B.char8 '"' <>
+  B.char8 ',' <> B.char8 '"' <> getTypeName vt <> B.char8 '"' <>
+  B.char8 ',' <> buildShowable (length entries) <>
+  B.char8 ',' <> B.char8 '{' <> buildJSONMap entries <> B.char8 '}' <>
+  B.char8 ']'
+buildJSONValue (TList ty entries) =
+  B.char8 '[' <> B.char8 '"' <> getTypeName ty <> B.char8 '"' <>
+  B.char8 ',' <> buildShowable (length entries) <>
+  (if length entries > 0
+   then B.char8 ',' <> buildJSONList entries
+   else mempty) <>
+  B.char8 ']'
+buildJSONValue (TSet ty entries) = buildJSONValue (TList ty entries)
+buildJSONValue (TBool b) = if b then B.string8 "true" else B.string8 "false"
+buildJSONValue (TByte b) = buildShowable b
+buildJSONValue (TI16 i) = buildShowable i
+buildJSONValue (TI32 i) = buildShowable i
+buildJSONValue (TI64 i) = buildShowable i
+buildJSONValue (TDouble d) = buildShowable d
+buildJSONValue (TString s) = B.char8 '\"' <> escape s <> B.char8 '\"'
+
+buildJSONStruct :: Map.HashMap Int16 (LT.Text, ThriftVal) -> Builder
+buildJSONStruct = mconcat . intersperse (B.char8 ',') . Map.foldrWithKey buildField []
+  where 
+    buildField fid (_,val) = (:) $
+      B.char8 '"' <> buildShowable fid <> B.string8 "\":" <> 
+      B.char8 '{' <>
+      B.char8 '"' <> getTypeName (getTypeOf val) <> B.string8 "\":" <>
+      buildJSONValue val <>
+      B.char8 '}'
+
+buildJSONMap :: [(ThriftVal, ThriftVal)] -> Builder
+buildJSONMap = mconcat . intersperse (B.char8 ',') . map buildKV
+  where
+    buildKV (key@(TString _), val) =
+      buildJSONValue key <> B.char8 ':' <> buildJSONValue val
+    buildKV (key, val) =
+      B.char8 '\"' <> buildJSONValue key <> B.string8 "\":" <> buildJSONValue val
+buildJSONList :: [ThriftVal] -> Builder
+buildJSONList = mconcat . intersperse (B.char8 ',') . map buildJSONValue
+
+buildShowable :: Show a => a ->  Builder
+buildShowable = B.string8 . show
+
+-- Reading Functions
+
+parseJSONValue :: ThriftType -> Parser ThriftVal
+parseJSONValue (T_STRUCT tmap) =
+  TStruct <$> (lexeme (PC.char8 '{') *> parseJSONStruct tmap <* PC.char8 '}')
+parseJSONValue (T_MAP kt vt) = fmap (TMap kt vt) $
+  between '[' ']' $
+    lexeme escapedString *> lexeme (PC.char8 ',') *>
+    lexeme escapedString *> lexeme (PC.char8 ',') *>
+    lexeme decimal *> lexeme (PC.char8 ',') *>
+    between '{' '}' (parseJSONMap kt vt)
+parseJSONValue (T_LIST ty) = fmap (TList ty) $
+  between '[' ']' $ do
+    len <- lexeme escapedString *> lexeme (PC.char8 ',') *>
+           lexeme decimal <* lexeme (PC.char8 ',')
+    if len > 0 then parseJSONList ty else return []
+parseJSONValue (T_SET ty) = fmap (TSet ty) $
+  between '[' ']' $ do
+    len <- lexeme escapedString *> lexeme (PC.char8 ',') *>
+           lexeme decimal <* lexeme (PC.char8 ',')
+    if len > 0 then parseJSONList ty else return []
+parseJSONValue T_BOOL =
+  (TBool True <$ string "true") <|> (TBool False <$ string "false")
+parseJSONValue T_BYTE = TByte <$> signed decimal
+parseJSONValue T_I16 = TI16 <$> signed decimal
+parseJSONValue T_I32 = TI32 <$> signed decimal
+parseJSONValue T_I64 = TI64 <$> signed decimal
+parseJSONValue T_DOUBLE = TDouble <$> double
+parseJSONValue T_STRING = TString <$> escapedString
+parseJSONValue T_STOP = fail "parseJSONValue: cannot parse type T_STOP"
+parseJSONValue T_VOID = fail "parseJSONValue: cannot parse type T_VOID"
+
+parseAnyValue :: Parser ()
+parseAnyValue = choice $
+                skipBetween '{' '}' :
+                skipBetween '[' ']' :
+                map (void . parseJSONValue)
+                  [ T_BOOL
+                  , T_I16
+                  , T_I32
+                  , T_I64
+                  , T_DOUBLE
+                  , T_STRING
+                  ]
+  where
+    skipBetween :: Char -> Char -> Parser ()
+    skipBetween a b = between a b $ void (PC.satisfy (\c -> c /= a && c /= b))
+                                          <|> skipBetween a b
+
+parseJSONStruct :: TypeMap -> Parser (Map.HashMap Int16 (LT.Text, ThriftVal))
+parseJSONStruct tmap = Map.fromList . catMaybes <$> parseField
+                       `sepBy` lexeme (PC.char8 ',')
+  where
+    parseField = do
+      fid <- lexeme (between '"' '"' decimal) <* lexeme (PC.char8 ':')
+      case Map.lookup fid tmap of
+        Just (str, ftype) -> between '{' '}' $ do
+          _ <- lexeme (escapedString) *> lexeme (PC.char8 ':')
+          val <- lexeme (parseJSONValue ftype)
+          return $ Just (fid, (str, val))
+        Nothing -> lexeme parseAnyValue *> return Nothing
+
+parseJSONMap :: ThriftType -> ThriftType -> Parser [(ThriftVal, ThriftVal)]
+parseJSONMap kt vt =
+  ((,) <$> lexeme (PC.char8 '"' *> parseJSONValue kt <* PC.char8 '"') <*>
+   (lexeme (PC.char8 ':') *> lexeme (parseJSONValue vt))) `sepBy`
+  lexeme (PC.char8 ',')
+
+parseJSONList :: ThriftType -> Parser [ThriftVal]
+parseJSONList ty = lexeme (parseJSONValue ty) `sepBy` lexeme (PC.char8 ',')
+
+escapedString :: Parser LBS.ByteString
+escapedString = PC.char8 '"' *>
+                (LBS.pack <$> P.many' (escapedChar <|> notChar8 '"')) <*
+                PC.char8 '"'
+
+escapedChar :: Parser Word8
+escapedChar = PC.char8 '\\' *> (c2w <$> choice
+                                [ '\SOH' <$ P.string "u0001"
+                                , '\STX' <$ P.string "u0002"
+                                , '\ETX' <$ P.string "u0003"
+                                , '\EOT' <$ P.string "u0004"
+                                , '\ENQ' <$ P.string "u0005"
+                                , '\ACK' <$ P.string "u0006"
+                                , '\BEL' <$ P.string "u0007"
+                                , '\BS'  <$ P.string "u0008"
+                                , '\VT'  <$ P.string "u000b"
+                                , '\FF'  <$ P.string "u000c"
+                                , '\CR'  <$ P.string "u000d"
+                                , '\SO'  <$ P.string "u000e"
+                                , '\SI'  <$ P.string "u000f"
+                                , '\DLE' <$ P.string "u0010"
+                                , '\DC1' <$ P.string "u0011"
+                                , '\DC2' <$ P.string "u0012"
+                                , '\DC3' <$ P.string "u0013"
+                                , '\DC4' <$ P.string "u0014"
+                                , '\NAK' <$ P.string "u0015"
+                                , '\SYN' <$ P.string "u0016"
+                                , '\ETB' <$ P.string "u0017"
+                                , '\CAN' <$ P.string "u0018"
+                                , '\EM'  <$ P.string "u0019"
+                                , '\SUB' <$ P.string "u001a"
+                                , '\ESC' <$ P.string "u001b"
+                                , '\FS'  <$ P.string "u001c"
+                                , '\GS'  <$ P.string "u001d"
+                                , '\RS'  <$ P.string "u001e"
+                                , '\US'  <$ P.string "u001f"
+                                , '\DEL' <$ P.string "u007f"
+                                , '\0' <$ PC.char '0'
+                                , '\a' <$ PC.char 'a'
+                                , '\b' <$ PC.char 'b'
+                                , '\f' <$ PC.char 'f'
+                                , '\n' <$ PC.char 'n'
+                                , '\r' <$ PC.char 'r'
+                                , '\t' <$ PC.char 't'
+                                , '\v' <$ PC.char 'v'
+                                , '\"' <$ PC.char '"'
+                                , '\'' <$ PC.char '\''
+                                , '\\' <$ PC.char '\\'
+                                , '/'  <$ PC.char '/'
+                                ])
+
+escape :: LBS.ByteString -> Builder
+escape = LBS.foldl' escapeChar mempty
+  where
+    escapeChar b w = b <> (B.lazyByteString $ case w2c w of
+      '\0' -> "\\0"
+      '\b' -> "\\b"
+      '\f' -> "\\f"
+      '\n' -> "\\n"
+      '\r' -> "\\r"
+      '\t' -> "\\t"
+      '\"' -> "\\\""
+      '\\' -> "\\\\"
+      '\SOH' -> "\\u0001"
+      '\STX' -> "\\u0002"
+      '\ETX' -> "\\u0003"
+      '\EOT' -> "\\u0004"
+      '\ENQ' -> "\\u0005"
+      '\ACK' -> "\\u0006"
+      '\BEL' -> "\\u0007"
+      '\VT'  -> "\\u000b"
+      '\SO'  -> "\\u000e"
+      '\SI'  -> "\\u000f"
+      '\DLE' -> "\\u0010"
+      '\DC1' -> "\\u0011"
+      '\DC2' -> "\\u0012"
+      '\DC3' -> "\\u0013"
+      '\DC4' -> "\\u0014"
+      '\NAK' -> "\\u0015"
+      '\SYN' -> "\\u0016"
+      '\ETB' -> "\\u0017"
+      '\CAN' -> "\\u0018"
+      '\EM'  -> "\\u0019"
+      '\SUB' -> "\\u001a"
+      '\ESC' -> "\\u001b"
+      '\FS'  -> "\\u001c"
+      '\GS'  -> "\\u001d"
+      '\RS'  -> "\\u001e"
+      '\US'  -> "\\u001f"
+      '\DEL' -> "\\u007f"
+      _ -> LBS.singleton w)
+
+lexeme :: Parser a -> Parser a
+lexeme = (<* skipSpace)
+
+notChar8 :: Char -> Parser Word8
+notChar8 c = P.satisfy (/= c2w c)
+
+between :: Char -> Char -> Parser a -> Parser a
+between a b p = lexeme (PC.char8 a) *> lexeme p <* lexeme (PC.char8 b)
+
+getTypeName :: ThriftType -> Builder
+getTypeName ty = B.string8 $ case ty of
+  T_STRUCT _ -> "rec"
+  T_MAP _ _  -> "map"
+  T_LIST _   -> "lst"
+  T_SET _    -> "set"
+  T_BOOL     -> "tf"
+  T_BYTE     -> "i8"
+  T_I16      -> "i16"
+  T_I32      -> "i32"
+  T_I64      -> "i64"
+  T_DOUBLE   -> "dbl"
+  T_STRING   -> "str"
+  _ -> error "Unrecognized Type"
+