THRIFT-3503 Enable py:utf8string by default

This closes #779
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index ebaa9c3..3c70248 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -57,6 +57,10 @@
     if (iter != parsed_options.end()) {
       pwarning(0, "new_style is enabled by default, so the option will be removed in the near future.\n");
     }
+    iter = parsed_options.find("utf8strings");
+    if (iter != parsed_options.end()) {
+      pwarning(0, "utf8strings is enabled by default, so the option will be removed in the near future.\n");
+    }
 
     gen_newstyle_ = true;
     iter = parsed_options.find("old_style");
@@ -117,8 +121,8 @@
       throw "at most one of 'twisted' and 'tornado' are allowed";
     }
 
-    iter = parsed_options.find("utf8strings");
-    gen_utf8strings_ = (iter != parsed_options.end());
+    iter = parsed_options.find("no_utf8strings");
+    gen_utf8strings_ = (iter == parsed_options.end());
 
     iter = parsed_options.find("coding");
     if (iter != parsed_options.end()) {
@@ -2596,11 +2600,9 @@
 THRIFT_REGISTER_GENERATOR(
     py,
     "Python",
-    "    new_style:       No effect. Kept for backward compatibility.\n"
-    "    old_style:       Deprecated. Generate old-style classes.\n"
     "    twisted:         Generate Twisted-friendly RPC services.\n"
     "    tornado:         Generate code for use with Tornado.\n"
-    "    utf8strings:     Encode/decode strings using utf8 in the generated code.\n"
+    "    no_utf8strings:  Do not Encode/decode strings using utf8 in the generated code. Basically no effect for Python 3.\n"
     "    coding=CODING:   Add file encoding declare in generated file.\n"
     "    slots:           Generate code using slots for instance members.\n"
     "    dynamic:         Generate dynamic code, less code generated but slower.\n"
@@ -2610,4 +2612,5 @@
     "    dynimport='from foo.bar import CLS'\n"
     "                     Add an import line to generated code to find the dynbase class.\n"
     "    package_prefix='top.package.'\n"
-    "                     Package prefix for generated files.\n")
+    "                     Package prefix for generated files.\n"
+    "    old_style:       Deprecated. Generate old-style classes.\n")
diff --git a/lib/py/src/protocol/fastbinary.c b/lib/py/src/protocol/fastbinary.c
index eaecb8c..337201b 100644
--- a/lib/py/src/protocol/fastbinary.c
+++ b/lib/py/src/protocol/fastbinary.c
@@ -228,7 +228,7 @@
 
 static bool
 is_utf8(PyObject* typeargs) {
-  return PyString_Check(typeargs) && !strcmp(PyString_AS_STRING(typeargs), "UTF8");
+  return PyString_Check(typeargs) && !strncmp(PyString_AS_STRING(typeargs), "UTF8", 4);
 }
 
 /* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
diff --git a/test/py/CMakeLists.txt b/test/py/CMakeLists.txt
index 3756a46..fbc2217 100755
--- a/test/py/CMakeLists.txt
+++ b/test/py/CMakeLists.txt
@@ -22,11 +22,12 @@
             -DTHRIFTCOMPILER=$<TARGET_FILE:thrift-compiler>
             -DMY_PROJECT_DIR=${PROJECT_SOURCE_DIR}
             -DMY_CURRENT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}
+            -DMY_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
     -P ${CMAKE_CURRENT_SOURCE_DIR}/generate.cmake
 )
 
 add_test(NAME python_test
-    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/RunClientServer.py
+    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/RunClientServer.py --gen-base=${CMAKE_CURRENT_BINARY_DIR}
     DEPENDS python_test_generate
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
 )
diff --git a/test/py/FastbinaryTest.py b/test/py/FastbinaryTest.py
index 0583373..9d258fd 100755
--- a/test/py/FastbinaryTest.py
+++ b/test/py/FastbinaryTest.py
@@ -26,6 +26,8 @@
 # TODO(dreiss): Test error cases.  Check for memory leaks.
 
 import math
+import os
+import sys
 import timeit
 
 from copy import deepcopy
@@ -54,7 +56,7 @@
 ooe1.integer64 = 6000 * 1000 * 1000
 ooe1.double_precision = math.pi
 ooe1.some_characters = "Debug THIS!"
-ooe1.zomg_unicode = "\xd7\n\a\t"
+ooe1.zomg_unicode = u"\xd7\n\a\t"
 
 ooe2 = OneOfEach()
 ooe2.integer16 = 16
@@ -62,11 +64,15 @@
 ooe2.integer64 = 64
 ooe2.double_precision = (math.sqrt(5) + 1) / 2
 ooe2.some_characters = ":R (me going \"rrrr\")"
-ooe2.zomg_unicode = "\xd3\x80\xe2\x85\xae\xce\x9d\x20"\
-                    "\xd0\x9d\xce\xbf\xe2\x85\xbf\xd0\xbe"\
-                    "\xc9\xa1\xd0\xb3\xd0\xb0\xcf\x81\xe2\x84\x8e"\
-                    "\x20\xce\x91\x74\x74\xce\xb1\xe2\x85\xbd\xce\xba"\
-                    "\xc7\x83\xe2\x80\xbc"
+ooe2.zomg_unicode = u"\xd3\x80\xe2\x85\xae\xce\x9d\x20"\
+                    u"\xd0\x9d\xce\xbf\xe2\x85\xbf\xd0\xbe"\
+                    u"\xc9\xa1\xd0\xb3\xd0\xb0\xcf\x81\xe2\x84\x8e"\
+                    u"\x20\xce\x91\x74\x74\xce\xb1\xe2\x85\xbd\xce\xba"\
+                    u"\xc7\x83\xe2\x80\xbc"
+
+if sys.version_info[0] == 2 and os.environ.get('THRIFT_TEST_PY_NO_UTF8STRINGS'):
+  ooe1.zomg_unicode = ooe1.zomg_unicode.encode('utf8')
+  ooe2.zomg_unicode = ooe2.zomg_unicode.encode('utf8')
 
 hm = HolyMoley(**{"big": [], "contain": set(), "bonks": {}})
 hm.big.append(ooe1)
diff --git a/test/py/Makefile.am b/test/py/Makefile.am
index d1d2278..f105737 100755
--- a/test/py/Makefile.am
+++ b/test/py/Makefile.am
@@ -31,8 +31,8 @@
         gen-py-slots/DebugProtoTest/__init__.py \
         gen-py-oldstyle/ThriftTest/__init__.py \
         gen-py-oldstyle/DebugProtoTest/__init__.py \
-        gen-py-oldstyleslots/ThriftTest/__init__.py \
-        gen-py-oldstyleslots/DebugProtoTest/__init__.py \
+        gen-py-no_utf8strings/ThriftTest/__init__.py \
+        gen-py-no_utf8strings/DebugProtoTest/__init__.py \
         gen-py-dynamic/ThriftTest/__init__.py           \
         gen-py-dynamic/DebugProtoTest/__init__.py \
         gen-py-dynamicslots/ThriftTest/__init__.py           \
@@ -68,9 +68,9 @@
 	test -d gen-py-oldstyle || $(MKDIR_P) gen-py-oldstyle
 	$(THRIFT) --gen py:old_style -out gen-py-oldstyle $<
 
-gen-py-oldstyleslots/%/__init__.py: ../%.thrift $(THRIFT)
-	test -d gen-py-oldstyleslots || $(MKDIR_P) gen-py-oldstyleslots
-	$(THRIFT) --gen py:old_style,slots -out gen-py-oldstyleslots $<
+gen-py-no_utf8strings/%/__init__.py: ../%.thrift $(THRIFT)
+	test -d gen-py-no_utf8strings || $(MKDIR_P) gen-py-no_utf8strings
+	$(THRIFT) --gen py:no_utf8strings -out gen-py-no_utf8strings $<
 
 gen-py-dynamic/%/__init__.py: ../%.thrift $(THRIFT)
 	test -d gen-py-dynamic || $(MKDIR_P) gen-py-dynamic
@@ -81,4 +81,4 @@
 	$(THRIFT) --gen py:dynamic,slots -out gen-py-dynamicslots $<
 
 clean-local:
-	$(RM) -r gen-py gen-py-slots gen-py-default gen-py-oldstyle gen-py-oldstyleslots gen-py-dynamic gen-py-dynamicslots
+	$(RM) -r gen-py gen-py-slots gen-py-default gen-py-oldstyle gen-py-no_utf8strings gen-py-dynamic gen-py-dynamicslots
diff --git a/test/py/RunClientServer.py b/test/py/RunClientServer.py
index 8a7ead5..d5ebd6a 100755
--- a/test/py/RunClientServer.py
+++ b/test/py/RunClientServer.py
@@ -72,17 +72,20 @@
     return os.path.join(SCRIPT_DIR, fname)
 
 
-def setup_pypath(dirs):
-  env = copy.copy(os.environ)
+def setup_pypath(libdir, gendir):
+  dirs = [libdir, gendir]
+  env = copy.deepcopy(os.environ)
   pypath = env.get('PYTHONPATH', None)
   if pypath:
     dirs.append(pypath)
   env['PYTHONPATH'] = ':'.join(dirs)
+  if gendir.endswith('gen-py-no_utf8strings'):
+    env['THRIFT_TEST_PY_NO_UTF8STRINGS'] = '1'
   return env
 
 
-def runScriptTest(libdir, genpydir, script):
-  env = setup_pypath([libdir, genpydir])
+def runScriptTest(libdir, genbase, genpydir, script):
+  env = setup_pypath(libdir, os.path.join(genbase, genpydir))
   script_args = [sys.executable, relfile(script)]
   print('\nTesting script: %s\n----' % (' '.join(script_args)))
   ret = subprocess.call(script_args, env=env)
@@ -94,8 +97,8 @@
     raise Exception("Script subprocess failed, retcode=%d, args: %s" % (ret, ' '.join(script_args)))
 
 
-def runServiceTest(libdir, genpydir, server_class, proto, port, use_zlib, use_ssl, verbose):
-  env = setup_pypath([libdir, genpydir])
+def runServiceTest(libdir, genbase, genpydir, server_class, proto, port, use_zlib, use_ssl, verbose):
+  env = setup_pypath(libdir, os.path.join(genbase, genpydir))
   # Build command line arguments
   server_args = [sys.executable, relfile('TestServer.py')]
   cli_args = [sys.executable, relfile('TestClient.py')]
@@ -169,7 +172,8 @@
 
 
 class TestCases(object):
-  def __init__(self, libdir, port, gendirs, servers, verbose):
+  def __init__(self, genbase, libdir, port, gendirs, servers, verbose):
+    self.genbase = genbase
     self.libdir = libdir
     self.port = port
     self.verbose = verbose
@@ -200,7 +204,7 @@
     if self.verbose > 0:
       print('\nTest run #%d:  (includes %s) Server=%s,  Proto=%s,  zlib=%s,  SSL=%s'
             % (test_count, genpydir, try_server, try_proto, with_zlib, with_ssl))
-    runServiceTest(self.libdir, genpydir, try_server, try_proto, self.port, with_zlib, with_ssl, self.verbose)
+    runServiceTest(self.libdir, self.genbase, genpydir, try_server, try_proto, self.port, with_zlib, with_ssl, self.verbose)
     if self.verbose > 0:
       print('OK: Finished (includes %s)  %s / %s proto / zlib=%s / SSL=%s.   %d combinations tested.'
             % (genpydir, try_server, try_proto, with_zlib, with_ssl, test_count))
@@ -232,7 +236,7 @@
               if self.verbose > 0:
                 print('\nTest run #%d:  (includes %s) Server=%s,  Proto=%s,  zlib=%s,  SSL=%s'
                       % (test_count, genpydir, try_server, try_proto, with_zlib, with_ssl))
-              runServiceTest(self.libdir, genpydir, try_server, try_proto, self.port, with_zlib, with_ssl)
+              runServiceTest(self.libdir, self.genbase, genpydir, try_server, try_proto, self.port, with_zlib, with_ssl)
               if self.verbose > 0:
                 print('OK: Finished (includes %s)  %s / %s proto / zlib=%s / SSL=%s.   %d combinations tested.'
                       % (genpydir, try_server, try_proto, with_zlib, with_ssl, test_count))
@@ -250,7 +254,7 @@
   parser = OptionParser()
   parser.add_option('--all', action="store_true", dest='all')
   parser.add_option('--genpydirs', type='string', dest='genpydirs',
-                    default='default,slots,oldstyle,dynamic,dynamicslots',
+                    default='default,slots,oldstyle,no_utf8strings,dynamic,dynamicslots',
                     help='directory extensions for generated code, used as suffixes for \"gen-py-*\" added sys.path for individual tests')
   parser.add_option("--port", type="int", dest="port", default=9090,
                     help="port number for server to listen on")
@@ -262,6 +266,8 @@
                     help="minimal output")
   parser.add_option('-L', '--libdir', dest="libdir", default=default_libdir(),
                     help="directory path that contains Thrift Python library")
+  parser.add_option('--gen-base', dest="gen_base", default=SCRIPT_DIR,
+                    help="directory path that contains Thrift Python library")
   parser.set_defaults(verbose=1)
   options, args = parser.parse_args()
 
@@ -278,7 +284,7 @@
       print('Unavailable server type "%s", please choose one of: %s' % (args[0], servers))
       sys.exit(0)
 
-  tests = TestCases(options.libdir, options.port, generated_dirs, servers, options.verbose)
+  tests = TestCases(options.gen_base, options.libdir, options.port, generated_dirs, servers, options.verbose)
 
   # run tests without a client/server first
   print('----------------')
@@ -288,7 +294,7 @@
   print('----------------')
   for genpydir in generated_dirs:
     for script in SCRIPTS:
-      runScriptTest(options.libdir, genpydir, script)
+      runScriptTest(options.libdir, options.gen_base, genpydir, script)
 
   print('----------------')
   print(' Executing Client/Server tests with various generated code directories')
diff --git a/test/py/TestClient.py b/test/py/TestClient.py
index 8e3dcf8..347329e 100755
--- a/test/py/TestClient.py
+++ b/test/py/TestClient.py
@@ -92,7 +92,7 @@
         Türkçe, Татарча/Tatarça, Українська, اردو, Tiếng Việt, Volapük,
         Walon, Winaray, 吴语, isiXhosa, ייִדיש, Yorùbá, Zeêuws, 中文,
         Bân-lâm-gú, 粵語"""
-    if sys.version_info[0] == 2:
+    if sys.version_info[0] == 2 and os.environ.get('THRIFT_TEST_PY_NO_UTF8STRINGS'):
       s1 = s1.encode('utf8')
       s2 = s2.encode('utf8')
     self.assertEqual(self.client.testString(s1), s1)
@@ -300,7 +300,6 @@
   parser.add_option('--libpydir', type='string', dest='libpydir',
                     help='include this directory in sys.path for locating library code')
   parser.add_option('--genpydir', type='string', dest='genpydir',
-                    default='gen-py',
                     help='include this directory in sys.path for locating generated code')
   parser.add_option("--port", type="int", dest="port",
                     help="connect to server at port")
@@ -325,7 +324,8 @@
   parser.set_defaults(framed=False, http_path=None, verbose=1, host='localhost', port=9090, proto='binary')
   options, args = parser.parse_args()
 
-  sys.path.insert(0, os.path.join(SCRIPT_DIR, options.genpydir))
+  if options.genpydir:
+    sys.path.insert(0, os.path.join(SCRIPT_DIR, options.genpydir))
   if options.libpydir:
     sys.path.insert(0, glob.glob(options.libpydir)[0])
   else:
diff --git a/test/py/generate.cmake b/test/py/generate.cmake
index a0b94d6..44c5357 100644
--- a/test/py/generate.cmake
+++ b/test/py/generate.cmake
@@ -1,7 +1,6 @@
 macro(GENERATE FILENAME GENERATOR OUTPUTDIR)
-  file(MAKE_DIRECTORY ${MY_CURRENT_SOURCE_DIR}/${OUTPUTDIR})
-  execute_process(COMMAND ${THRIFTCOMPILER} --gen ${GENERATOR} -out ${OUTPUTDIR} ${FILENAME}
-                  WORKING_DIRECTORY ${MY_CURRENT_SOURCE_DIR}
+  file(MAKE_DIRECTORY ${MY_CURRENT_BINARY_DIR}/${OUTPUTDIR})
+  execute_process(COMMAND ${THRIFTCOMPILER} --gen ${GENERATOR} -out ${MY_CURRENT_BINARY_DIR}/${OUTPUTDIR} ${FILENAME}
                   RESULT_VARIABLE CMD_RESULT)
   if(CMD_RESULT)
         message(FATAL_ERROR "Error generating ${FILENAME} with generator ${GENERATOR}")
@@ -10,15 +9,14 @@
 
 generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py gen-py-default)
 generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:slots gen-py-slots)
-generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:new_style gen-py-newstyle)
-generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:new_style,slots gen-py-newstyleslots)
+generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:old_style gen-py-oldstyle)
+generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:no_utf8strings gen-py-no_utf8strings)
 generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:dynamic gen-py-dynamic)
 generate(${MY_PROJECT_DIR}/test/ThriftTest.thrift py:dynamic,slots gen-py-dynamicslots)
 
 generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py gen-py-default)
 generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:slots gen-py-slots)
-generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:new_style gen-py-newstyle)
-generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:new_style,slots gen-py-newstyleslots)
+generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:old_style gen-py-oldstyle)
+generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:no_utf8strings gen-py-no_utf8strings)
 generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:dynamic gen-py-dynamic)
 generate(${MY_PROJECT_DIR}/test/DebugProtoTest.thrift py:dynamic,slots gen-py-dynamicslots)
-