THRIFT-5855: Add py fuzzers

Add fuzzers for py support, to improve the reliability/robustness of the implementation
diff --git a/FUZZING.md b/FUZZING.md
index 9ed7fb8..a81dfe4 100644
--- a/FUZZING.md
+++ b/FUZZING.md
@@ -20,12 +20,12 @@
 - C++
 - Java/JVM (and other JVM languages)
 - JavaScript
+- Python
 
 We are working on adding fuzzers for the following languages:
 
 - Rust  
 - Swift
-- Python
 - netstd
 
 ## Fuzzer Types
diff --git a/lib/py/Makefile.am b/lib/py/Makefile.am
index 4777ccf..2be72de 100644
--- a/lib/py/Makefile.am
+++ b/lib/py/Makefile.am
@@ -39,6 +39,7 @@
 all-local: py3-build
 	$(PYTHON) setup.py build
 	${THRIFT} --gen py test/test_thrift_file/TestServer.thrift
+	${THRIFT} --gen py ../../test/v0.16/FuzzTestNoUuid.thrift
 
 # We're ignoring prefix here because site-packages seems to be
 # the equivalent of /usr/local/lib in Python land.
diff --git a/lib/py/test/fuzz/README.md b/lib/py/test/fuzz/README.md
new file mode 100644
index 0000000..10e76e6
--- /dev/null
+++ b/lib/py/test/fuzz/README.md
@@ -0,0 +1,22 @@
+# Python Fuzzing README
+
+The Python Thrift implementation uses Atheris for fuzzing. Atheris is a coverage-guided, in-process fuzzer for Python that integrates with libFuzzer.
+
+Unlike the C++ implementation, the Python fuzzers are not directly runnable in a local environment. Instead, Atheris generates Python programs that need to be executed through the appropriate build system.
+
+We currently have several fuzz targets that test different aspects of the Thrift implementation:
+
+* FuzzParseBinary -- fuzzes the deserialization of the Binary protocol
+* FuzzParseBinaryAccelerated -- fuzzes the deserialization of the accelerated Binary protocol
+* FuzzParseCompact -- fuzzes the deserialization of the Compact protocol
+* FuzzParseCompactAccelerated -- fuzzes the deserialization of the accelerated Compact protocol
+* FuzzRoundtripBinary -- fuzzes the roundtrip of the Binary protocol (i.e. serializes then deserializes and compares the result)
+* FuzzRoundtripBinaryAccelerated -- fuzzes the roundtrip of the accelerated Binary protocol
+* FuzzRoundtripCompact -- fuzzes the roundtrip of the Compact protocol
+* FuzzRoundtripCompactAccelerated -- fuzzes the roundtrip of the accelerated Compact protocol
+
+The fuzzers use Atheris's mutation engine to generate test cases. Each fuzzer implements the standard Atheris interface and uses common testing code from the fuzz test utilities in `fuzz_common.py`.
+
+For more information about Atheris and its options, see the [Atheris documentation](https://github.com/google/atheris).
+
+You can also use the corpus generator from the Rust implementation to generate initial corpus files that can be used with these Python fuzzers, since the wire formats are identical between implementations.
diff --git a/lib/py/test/fuzz/fuzz_common.py b/lib/py/test/fuzz/fuzz_common.py
new file mode 100644
index 0000000..f472047
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_common.py
@@ -0,0 +1,134 @@
+import glob
+import sys
+import os
+import atheris
+
+def setup_thrift_imports():
+    """Set up the Python path to include Thrift libraries and generated code."""
+
+    # For oss-fuzz, we need to package it using pyinstaller and set up paths properly
+    if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
+        print('running in a PyInstaller bundle')
+        sys.path.insert(0, "thrift_lib")
+        sys.path.insert(0, "gen-py")
+    else:
+        print('running in a normal Python process')
+        SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+        ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(SCRIPT_DIR))))
+
+        for libpath in glob.glob(os.path.join(ROOT_DIR, 'lib', 'py', 'build', 'lib.*')):
+            for pattern in ('-%d.%d', '-%d%d'):
+                postfix = pattern % (sys.version_info[0], sys.version_info[1])
+                if libpath.endswith(postfix):
+                    sys.path.insert(0, libpath)
+                    break
+
+        gen_path = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "..", "gen-py"
+        )
+        sys.path.append(gen_path)
+    print(sys.path)
+
+setup_thrift_imports()
+
+from thrift.transport import TTransport
+from thrift.TSerialization import serialize, deserialize
+from fuzz.ttypes import FuzzTest
+
+def create_parser_fuzzer(protocol_factory_class):
+    """
+    Create a parser fuzzer function for a specific protocol.
+    
+    Args:
+        protocol_factory_class: The Thrift protocol factory class to use
+    
+    Returns:
+        A function that can be used with atheris.Setup()
+    """
+    def TestOneInput(data):
+        if len(data) < 2:
+            return
+
+        try:
+            # Create a memory buffer with the fuzzed data
+            buf = TTransport.TMemoryBuffer(data)
+            transport = TTransport.TBufferedTransportFactory().getTransport(buf)
+            factory = protocol_factory_class(string_length_limit=1000, container_length_limit=1000)
+
+            # Try to deserialize the fuzzed data into the test class
+            test_instance = deserialize(FuzzTest(), data, factory)
+
+        except Exception as e:
+            # We expect various exceptions during fuzzing
+            pass
+
+    return TestOneInput
+
+def create_roundtrip_fuzzer(protocol_factory_class):
+    """
+    Create a roundtrip fuzzer function for a specific protocol.
+    
+    Args:
+        protocol_factory_class: The Thrift protocol factory class to use
+    
+    Returns:
+        A function that can be used with atheris.Setup()
+    """
+    def TestOneInput(data):
+        if len(data) < 2:
+            return
+
+        try:
+            # Create a memory buffer with the fuzzed data
+            buf = TTransport.TMemoryBuffer(data)
+            transport = TTransport.TBufferedTransportFactory().getTransport(buf)
+            factory = protocol_factory_class(string_length_limit=1000, container_length_limit=1000)
+
+            # Try to deserialize the fuzzed data into the test class
+            test_instance = deserialize(FuzzTest(), data, factory)
+            # If deserialization succeeds, try to serialize it back
+            serialized = serialize(test_instance, factory)
+            # Deserialize again
+            deserialized = deserialize(FuzzTest(), serialized, factory)
+            # Verify the objects are equal after a second deserialization
+            assert test_instance == deserialized
+
+        except AssertionError as e:
+            raise e
+        except Exception as e:
+            # We expect various exceptions during fuzzing
+            pass
+
+    return TestOneInput
+
+def _run_fuzzer(fuzzer_function):
+    """
+    Set up and run the fuzzer for a specific protocol.
+    
+    Args:
+        fuzzer_function: The fuzzer function to use
+    """
+    setup_thrift_imports()
+    atheris.instrument_all()
+    atheris.Setup(sys.argv, fuzzer_function, enable_python_coverage=True)
+    atheris.Fuzz()
+
+
+def run_roundtrip_fuzzer(protocol_factory_class):
+    """
+    Set up and run the fuzzer for a specific protocol.
+    
+    Args:
+        protocol_factory_class: The Thrift protocol factory class to use
+    """
+    _run_fuzzer(create_roundtrip_fuzzer(protocol_factory_class))
+
+
+def run_parser_fuzzer(protocol_factory_class):
+    """
+    Set up and run the fuzzer for a specific protocol.
+    
+    Args:
+        protocol_factory_class: The Thrift protocol factory class to use
+    """
+    _run_fuzzer(create_parser_fuzzer(protocol_factory_class))
\ No newline at end of file
diff --git a/lib/py/test/fuzz/fuzz_parse_TBinaryProtocol.py b/lib/py/test/fuzz/fuzz_parse_TBinaryProtocol.py
new file mode 100644
index 0000000..a07de28
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_parse_TBinaryProtocol.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_parser_fuzzer
+from thrift.protocol.TBinaryProtocol import TBinaryProtocolFactory
+
+def main():
+    run_parser_fuzzer(TBinaryProtocolFactory)
+
+if __name__ == "__main__":
+    main()
diff --git a/lib/py/test/fuzz/fuzz_parse_TBinaryProtocolAccelerated.py b/lib/py/test/fuzz/fuzz_parse_TBinaryProtocolAccelerated.py
new file mode 100644
index 0000000..7c4f046
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_parse_TBinaryProtocolAccelerated.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_parser_fuzzer
+from thrift.protocol.TBinaryProtocol import TBinaryProtocolAcceleratedFactory
+
+def main():
+    run_parser_fuzzer(TBinaryProtocolAcceleratedFactory)
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/lib/py/test/fuzz/fuzz_parse_TCompactProtocol.py b/lib/py/test/fuzz/fuzz_parse_TCompactProtocol.py
new file mode 100644
index 0000000..ff05ec1
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_parse_TCompactProtocol.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_parser_fuzzer
+from thrift.protocol.TCompactProtocol import TCompactProtocolFactory
+
+def main():
+    run_parser_fuzzer(TCompactProtocolFactory)
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/lib/py/test/fuzz/fuzz_parse_TCompactProtocolAccelerated.py b/lib/py/test/fuzz/fuzz_parse_TCompactProtocolAccelerated.py
new file mode 100644
index 0000000..04ab96f
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_parse_TCompactProtocolAccelerated.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_parser_fuzzer
+from thrift.protocol.TCompactProtocol import TCompactProtocolAcceleratedFactory
+
+def main():
+    run_parser_fuzzer(TCompactProtocolAcceleratedFactory)
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/lib/py/test/fuzz/fuzz_roundtrip_TBinaryProtocol.py b/lib/py/test/fuzz/fuzz_roundtrip_TBinaryProtocol.py
new file mode 100644
index 0000000..7413707
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_roundtrip_TBinaryProtocol.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_roundtrip_fuzzer
+from thrift.protocol.TBinaryProtocol import TBinaryProtocolFactory
+
+def main():
+    run_roundtrip_fuzzer(TBinaryProtocolFactory)
+
+if __name__ == "__main__":
+    main()
diff --git a/lib/py/test/fuzz/fuzz_roundtrip_TBinaryProtocolAccelerated.py b/lib/py/test/fuzz/fuzz_roundtrip_TBinaryProtocolAccelerated.py
new file mode 100644
index 0000000..76f3114
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_roundtrip_TBinaryProtocolAccelerated.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_roundtrip_fuzzer
+from thrift.protocol.TBinaryProtocol import TBinaryProtocolAcceleratedFactory
+
+def main():
+    run_roundtrip_fuzzer(TBinaryProtocolAcceleratedFactory)
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/lib/py/test/fuzz/fuzz_roundtrip_TCompactProtocol.py b/lib/py/test/fuzz/fuzz_roundtrip_TCompactProtocol.py
new file mode 100644
index 0000000..0fb9cce
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_roundtrip_TCompactProtocol.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_roundtrip_fuzzer
+from thrift.protocol.TCompactProtocol import TCompactProtocolFactory
+
+def main():
+    run_roundtrip_fuzzer(TCompactProtocolFactory)
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/lib/py/test/fuzz/fuzz_roundtrip_TCompactProtocolAccelerated.py b/lib/py/test/fuzz/fuzz_roundtrip_TCompactProtocolAccelerated.py
new file mode 100644
index 0000000..0465b2b
--- /dev/null
+++ b/lib/py/test/fuzz/fuzz_roundtrip_TCompactProtocolAccelerated.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from fuzz_common import run_roundtrip_fuzzer
+from thrift.protocol.TCompactProtocol import TCompactProtocolAcceleratedFactory
+
+def main():
+    run_roundtrip_fuzzer(TCompactProtocolAcceleratedFactory)
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file