THRIFT-4138: Remove undefined behavior imported from Boost
Client: C++
There is undefined behavior in boost::lexical_cast that was fixed in
https://github.com/boostorg/lexical_cast/issues/21, but that fix is
only available in recent Boost releases. This patch removes all uses
of lexical_cast instead.
That removes the last undefined behavior, so this patch also makes
ubsan.sh unconditionally fail on undefined behavior.
This closes #1232
diff --git a/build/docker/scripts/ubsan.sh b/build/docker/scripts/ubsan.sh
index 6db10f3..d39cc83 100755
--- a/build/docker/scripts/ubsan.sh
+++ b/build/docker/scripts/ubsan.sh
@@ -15,7 +15,7 @@
# undefined casting, aka "vptr".
#
# TODO: fix undefined vptr behavior and turn this option back on.
-export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined -fno-sanitize=vptr"
+export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined"
# Builds without optimization and with debugging symbols for making crash reports more
# readable.
export CFLAGS="${CFLAGS} -O0 -ggdb3"
diff --git a/lib/cpp/src/thrift/TOutput.cpp b/lib/cpp/src/thrift/TOutput.cpp
index 5739d0f..bb46263 100644
--- a/lib/cpp/src/thrift/TOutput.cpp
+++ b/lib/cpp/src/thrift/TOutput.cpp
@@ -18,9 +18,9 @@
*/
#include <thrift/Thrift.h>
+#include <thrift/TToString.h>
#include <cstring>
#include <cstdlib>
-#include <boost/lexical_cast.hpp>
#include <stdarg.h>
#include <stdio.h>
@@ -100,7 +100,7 @@
std::string TOutput::strerror_s(int errno_copy) {
#ifndef HAVE_STRERROR_R
- return "errno = " + boost::lexical_cast<std::string>(errno_copy);
+ return "errno = " + to_string(errno_copy);
#else // HAVE_STRERROR_R
char b_errbuf[1024] = {'\0'};
@@ -112,7 +112,7 @@
if (rv == -1) {
// strerror_r failed. omgwtfbbq.
return "XSI-compliant strerror_r() failed with errno = "
- + boost::lexical_cast<std::string>(errno_copy);
+ + to_string(errno_copy);
}
#endif
// Can anyone prove that explicit cast is probably not necessary
diff --git a/lib/cpp/src/thrift/TToString.h b/lib/cpp/src/thrift/TToString.h
index 5023869..fdf191e 100644
--- a/lib/cpp/src/thrift/TToString.h
+++ b/lib/cpp/src/thrift/TToString.h
@@ -20,20 +20,45 @@
#ifndef _THRIFT_TOSTRING_H_
#define _THRIFT_TOSTRING_H_ 1
-#include <boost/lexical_cast.hpp>
-
-#include <vector>
+#include <cmath>
+#include <limits>
#include <map>
#include <set>
-#include <string>
#include <sstream>
+#include <string>
+#include <vector>
namespace apache {
namespace thrift {
template <typename T>
std::string to_string(const T& t) {
- return boost::lexical_cast<std::string>(t);
+ std::ostringstream o;
+ o << t;
+ return o.str();
+}
+
+// TODO: replace the computations below with std::numeric_limits::max_digits10 once C++11
+// is enabled.
+inline std::string to_string(const float& t) {
+ std::ostringstream o;
+ o.precision(std::ceil(std::numeric_limits<float>::digits * std::log10(2) + 1));
+ o << t;
+ return o.str();
+}
+
+inline std::string to_string(const double& t) {
+ std::ostringstream o;
+ o.precision(std::ceil(std::numeric_limits<double>::digits * std::log10(2) + 1));
+ o << t;
+ return o.str();
+}
+
+inline std::string to_string(const long double& t) {
+ std::ostringstream o;
+ o.precision(std::ceil(std::numeric_limits<long double>::digits * std::log10(2) + 1));
+ o << t;
+ return o.str();
}
template <typename K, typename V>
diff --git a/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp b/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp
index 09b978c..d3c6beb 100644
--- a/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp
+++ b/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp
@@ -19,12 +19,12 @@
#include <thrift/protocol/TDebugProtocol.h>
+#include <thrift/TToString.h>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <stdexcept>
#include <boost/static_assert.hpp>
-#include <boost/lexical_cast.hpp>
using std::string;
@@ -129,7 +129,7 @@
case MAP_VALUE:
return writePlain(" -> ");
case LIST:
- size = writeIndented("[" + boost::lexical_cast<string>(list_idx_.back()) + "] = ");
+ size = writeIndented("[" + to_string(list_idx_.back()) + "] = ");
list_idx_.back()++;
return size;
default:
@@ -223,7 +223,7 @@
const TType fieldType,
const int16_t fieldId) {
// sprintf(id_str, "%02d", fieldId);
- string id_str = boost::lexical_cast<string>(fieldId);
+ string id_str = to_string(fieldId);
if (id_str.length() == 1)
id_str = '0' + id_str;
@@ -248,7 +248,7 @@
bsize += startItem();
bsize += writePlain(
"map<" + fieldTypeName(keyType) + "," + fieldTypeName(valType) + ">"
- "[" + boost::lexical_cast<string>(size) + "] {\n");
+ "[" + to_string(size) + "] {\n");
indentUp();
write_state_.push_back(MAP_KEY);
return bsize;
@@ -269,7 +269,7 @@
bsize += startItem();
bsize += writePlain(
"list<" + fieldTypeName(elemType) + ">"
- "[" + boost::lexical_cast<string>(size) + "] {\n");
+ "[" + to_string(size) + "] {\n");
indentUp();
write_state_.push_back(LIST);
list_idx_.push_back(0);
@@ -292,7 +292,7 @@
bsize += startItem();
bsize += writePlain(
"set<" + fieldTypeName(elemType) + ">"
- "[" + boost::lexical_cast<string>(size) + "] {\n");
+ "[" + to_string(size) + "] {\n");
indentUp();
write_state_.push_back(SET);
return bsize;
@@ -316,19 +316,19 @@
}
uint32_t TDebugProtocol::writeI16(const int16_t i16) {
- return writeItem(boost::lexical_cast<string>(i16));
+ return writeItem(to_string(i16));
}
uint32_t TDebugProtocol::writeI32(const int32_t i32) {
- return writeItem(boost::lexical_cast<string>(i32));
+ return writeItem(to_string(i32));
}
uint32_t TDebugProtocol::writeI64(const int64_t i64) {
- return writeItem(boost::lexical_cast<string>(i64));
+ return writeItem(to_string(i64));
}
uint32_t TDebugProtocol::writeDouble(const double dub) {
- return writeItem(boost::lexical_cast<string>(dub));
+ return writeItem(to_string(dub));
}
uint32_t TDebugProtocol::writeString(const string& str) {
@@ -337,7 +337,7 @@
string to_show = str;
if (to_show.length() > (string::size_type)string_limit_) {
to_show = str.substr(0, string_prefix_size_);
- to_show += "[...](" + boost::lexical_cast<string>(str.length()) + ")";
+ to_show += "[...](" + to_string(str.length()) + ")";
}
string output = "\"";
diff --git a/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp b/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp
index 431e7c4..943d960 100644
--- a/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp
+++ b/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp
@@ -19,9 +19,9 @@
#include <thrift/protocol/TJSONProtocol.h>
-#include <boost/lexical_cast.hpp>
#include <boost/locale.hpp>
#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/sign.hpp>
#include <cmath>
#include <limits>
@@ -31,6 +31,7 @@
#include <thrift/protocol/TBase64Utils.h>
#include <thrift/transport/TTransportException.h>
+#include <thrift/TToString.h>
using namespace apache::thrift::transport;
@@ -503,7 +504,7 @@
template <typename NumberType>
uint32_t TJSONProtocol::writeJSONInteger(NumberType num) {
uint32_t result = context_->write(*trans_);
- std::string val(boost::lexical_cast<std::string>(num));
+ std::string val(to_string(num));
bool escapeNum = context_->escapeNum();
if (escapeNum) {
trans_->write(&kJSONStringDelimiter, 1);
@@ -684,7 +685,7 @@
}
uint32_t TJSONProtocol::writeByte(const int8_t byte) {
- // writeByte() must be handled specially because boost::lexical cast sees
+ // writeByte() must be handled specially because to_string sees
// int8_t as a text type instead of an integer type
return writeJSONInteger((int16_t)byte);
}
@@ -842,6 +843,19 @@
return result;
}
+namespace {
+template <typename T>
+T fromString(const std::string& s) {
+ T t;
+ std::istringstream str(s);
+ str.imbue(std::locale::classic());
+ str >> t;
+ if (str.bad() || !str.eof())
+ throw std::runtime_error(s);
+ return t;
+}
+}
+
// Reads a sequence of characters and assembles them into a number,
// returning them via num
template <typename NumberType>
@@ -853,10 +867,10 @@
std::string str;
result += readJSONNumericChars(str);
try {
- num = boost::lexical_cast<NumberType>(str);
- } catch (boost::bad_lexical_cast e) {
+ num = fromString<NumberType>(str);
+ } catch (const std::runtime_error&) {
throw TProtocolException(TProtocolException::INVALID_DATA,
- "Expected numeric value; got \"" + str + "\"");
+ "Expected numeric value; got \"" + str + "\"");
}
if (context_->escapeNum()) {
result += readJSONSyntaxChar(kJSONStringDelimiter);
@@ -864,18 +878,6 @@
return result;
}
-namespace {
-double stringToDouble(const std::string& s) {
- double d;
- std::istringstream str(s);
- str.imbue(std::locale::classic());
- str >> d;
- if (str.bad() || !str.eof())
- throw std::runtime_error(s);
- return d;
-}
-}
-
// Reads a JSON number or string and interprets it as a double.
uint32_t TJSONProtocol::readJSONDouble(double& num) {
uint32_t result = context_->read(reader_);
@@ -896,7 +898,7 @@
"Numeric data unexpectedly quoted");
}
try {
- num = stringToDouble(str);
+ num = fromString<double>(str);
} catch (std::runtime_error e) {
throw TProtocolException(TProtocolException::INVALID_DATA,
"Expected numeric value; got \"" + str + "\"");
@@ -909,7 +911,7 @@
}
result += readJSONNumericChars(str);
try {
- num = stringToDouble(str);
+ num = fromString<double>(str);
} catch (std::runtime_error e) {
throw TProtocolException(TProtocolException::INVALID_DATA,
"Expected numeric value; got \"" + str + "\"");
diff --git a/lib/cpp/src/thrift/protocol/TJSONProtocol.h b/lib/cpp/src/thrift/protocol/TJSONProtocol.h
index 5834eff..5a94624 100644
--- a/lib/cpp/src/thrift/protocol/TJSONProtocol.h
+++ b/lib/cpp/src/thrift/protocol/TJSONProtocol.h
@@ -87,10 +87,11 @@
* the current implementation is to match as closely as possible the behavior
* of Java's Double.toString(), which has no precision loss. Implementors in
* other languages should strive to achieve that where possible. I have not
- * yet verified whether boost:lexical_cast, which is doing that work for me in
- * C++, loses any precision, but I am leaving this as a future improvement. I
- * may try to provide a C component for this, so that other languages could
- * bind to the same underlying implementation for maximum consistency.
+ * yet verified whether std::istringstream::operator>>, which is doing that
+ * work for me in C++, loses any precision, but I am leaving this as a future
+ * improvement. I may try to provide a C component for this, so that other
+ * languages could bind to the same underlying implementation for maximum
+ * consistency.
*
*/
class TJSONProtocol : public TVirtualProtocol<TJSONProtocol> {
diff --git a/lib/cpp/src/thrift/transport/TSSLSocket.cpp b/lib/cpp/src/thrift/transport/TSSLSocket.cpp
index edcfb9d..b0f9166 100644
--- a/lib/cpp/src/thrift/transport/TSSLSocket.cpp
+++ b/lib/cpp/src/thrift/transport/TSSLSocket.cpp
@@ -36,7 +36,6 @@
#endif
-#include <boost/lexical_cast.hpp>
#include <boost/shared_array.hpp>
#include <openssl/err.h>
#include <openssl/rand.h>
@@ -45,6 +44,7 @@
#include <thrift/concurrency/Mutex.h>
#include <thrift/transport/TSSLSocket.h>
#include <thrift/transport/PlatformSocket.h>
+#include <thrift/TToString.h>
#define OPENSSL_VERSION_NO_THREAD_ID 0x10000000L
@@ -629,7 +629,7 @@
}
struct THRIFT_POLLFD fds[2];
- std::memset(fds, 0, sizeof(fds));
+ memset(fds, 0, sizeof(fds));
fds[0].fd = fdSocket;
fds[0].events = wantRead ? THRIFT_POLLIN : THRIFT_POLLOUT;
@@ -851,7 +851,7 @@
}
}
if (errors.empty()) {
- errors = "error code: " + boost::lexical_cast<string>(errno_copy);
+ errors = "error code: " + to_string(errno_copy);
}
}
diff --git a/lib/cpp/src/thrift/transport/TTransportException.cpp b/lib/cpp/src/thrift/transport/TTransportException.cpp
index 612e7b7..9e15dbd 100644
--- a/lib/cpp/src/thrift/transport/TTransportException.cpp
+++ b/lib/cpp/src/thrift/transport/TTransportException.cpp
@@ -18,13 +18,11 @@
*/
#include <thrift/transport/TTransportException.h>
-#include <boost/lexical_cast.hpp>
#include <cstring>
#include <thrift/thrift-config.h>
using std::string;
-using boost::lexical_cast;
namespace apache {
namespace thrift {
diff --git a/lib/cpp/src/thrift/transport/TZlibTransport.h b/lib/cpp/src/thrift/transport/TZlibTransport.h
index 1e7b5ec..18ca588 100644
--- a/lib/cpp/src/thrift/transport/TZlibTransport.h
+++ b/lib/cpp/src/thrift/transport/TZlibTransport.h
@@ -20,9 +20,9 @@
#ifndef _THRIFT_TRANSPORT_TZLIBTRANSPORT_H_
#define _THRIFT_TRANSPORT_TZLIBTRANSPORT_H_ 1
-#include <boost/lexical_cast.hpp>
#include <thrift/transport/TTransport.h>
#include <thrift/transport/TVirtualTransport.h>
+#include <thrift/TToString.h>
#include <zlib.h>
struct z_stream_s;
@@ -51,7 +51,7 @@
rv += "(no message)";
}
rv += " (status = ";
- rv += boost::lexical_cast<std::string>(status);
+ rv += to_string(status);
rv += ")";
return rv;
}
@@ -105,7 +105,7 @@
int minimum = MIN_DIRECT_DEFLATE_SIZE;
throw TTransportException(TTransportException::BAD_ARGS,
"TZLibTransport: uncompressed write buffer must be at least"
- + boost::lexical_cast<std::string>(minimum) + ".");
+ + to_string(minimum) + ".");
}
try {
diff --git a/lib/rb/lib/thrift/protocol/json_protocol.rb b/lib/rb/lib/thrift/protocol/json_protocol.rb
index 2b8ac15..514bdbf 100644
--- a/lib/rb/lib/thrift/protocol/json_protocol.rb
+++ b/lib/rb/lib/thrift/protocol/json_protocol.rb
@@ -333,7 +333,7 @@
# "NaN" or "Infinity" or "-Infinity".
def write_json_double(num)
@context.write(trans)
- # Normalize output of boost::lexical_cast for NaNs and Infinities
+ # Normalize output of thrift::to_string for NaNs and Infinities
special = false;
if (num.nan?)
special = true;