blob: 56c5811013d9e11ce1c144cb586825ab0b07daea [file] [log] [blame]
David Reissea2cba82009-03-30 21:35:00 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
Mark Sleee9ce01c2007-05-16 02:29:53 +000019
Mark Slee31985722006-05-24 21:45:31 +000020/**
21 * thrift - a lightweight cross-language rpc/serialization tool
22 *
23 * This file contains the main compiler engine for Thrift, which invokes the
24 * scanner/parser to build the thrift object tree. The interface generation
Mark Sleef5377b32006-10-10 01:42:59 +000025 * code for each language lives in a file by the language name under the
26 * generate/ folder, and all parse structures live in parse/
Mark Slee31985722006-05-24 21:45:31 +000027 *
Mark Slee31985722006-05-24 21:45:31 +000028 */
29
David Reissf10984b2008-03-27 21:39:52 +000030#include <cassert>
Mark Slee31985722006-05-24 21:45:31 +000031#include <stdlib.h>
32#include <stdio.h>
33#include <stdarg.h>
David Reiss5ad12602010-08-31 16:51:30 +000034#include <time.h>
Mark Slee31985722006-05-24 21:45:31 +000035#include <string>
David Reiss739cbe22008-04-15 05:44:00 +000036#include <algorithm>
Mark Sleef0712dc2006-10-25 19:03:57 +000037#include <sys/types.h>
38#include <sys/stat.h>
dweatherford65b70752007-10-31 02:18:14 +000039#include <errno.h>
David Reissab55ed52008-06-11 01:17:00 +000040#include <limits.h>
Mark Slee31985722006-05-24 21:45:31 +000041
Ben Craige9576752013-10-11 08:19:16 -050042#ifdef _WIN32
Konrad Grochowski16a23a62014-11-13 15:33:38 +010043#include <windows.h> /* for GetFullPathName */
David Reiss204420f2008-01-11 20:59:03 +000044#endif
45
Mark Sleef0712dc2006-10-25 19:03:57 +000046// Careful: must include globals first for extern definitions
dtmuller052abc32016-07-26 11:58:28 +020047#include "thrift/common.h"
48#include "thrift/globals.h"
Mark Slee31985722006-05-24 21:45:31 +000049
dtmuller052abc32016-07-26 11:58:28 +020050#include "thrift/platform.h"
51#include "thrift/main.h"
52#include "thrift/parse/t_program.h"
53#include "thrift/parse/t_scope.h"
54#include "thrift/generate/t_generator.h"
55#include "thrift/audit/t_audit.h"
Mark Slee31985722006-05-24 21:45:31 +000056
dtmuller052abc32016-07-26 11:58:28 +020057#include "thrift/version.h"
David Reissdd08f6d2008-06-30 20:24:24 +000058
Mark Slee31985722006-05-24 21:45:31 +000059using namespace std;
60
Mark Sleef5377b32006-10-10 01:42:59 +000061/**
62 * Global program tree
63 */
Mark Slee31985722006-05-24 21:45:31 +000064t_program* g_program;
65
Mark Sleef5377b32006-10-10 01:42:59 +000066/**
Mark Sleef0712dc2006-10-25 19:03:57 +000067 * Global scope
68 */
69t_scope* g_scope;
70
71/**
72 * Parent scope to also parse types
73 */
74t_scope* g_parent_scope;
75
76/**
77 * Prefix for putting types in parent scope
78 */
79string g_parent_prefix;
80
81/**
82 * Parsing pass
83 */
84PARSE_MODE g_parse_mode;
85
86/**
87 * Current directory of file being parsed
88 */
89string g_curdir;
90
91/**
92 * Current file being parsed
93 */
94string g_curpath;
95
96/**
Martin Kraemer32c66e12006-11-09 00:06:36 +000097 * Search path for inclusions
98 */
Mark Slee2329a832006-11-09 00:23:30 +000099vector<string> g_incl_searchpath;
Martin Kraemer32c66e12006-11-09 00:06:36 +0000100
101/**
Mark Sleef5377b32006-10-10 01:42:59 +0000102 * Global debug state
103 */
Mark Slee31985722006-05-24 21:45:31 +0000104int g_debug = 0;
105
Mark Sleef5377b32006-10-10 01:42:59 +0000106/**
Bryan Duxburya145b4d2009-04-03 17:29:25 +0000107 * Strictness level
108 */
109int g_strict = 127;
110
111/**
Mark Sleef0712dc2006-10-25 19:03:57 +0000112 * Warning level
113 */
114int g_warn = 1;
115
116/**
117 * Verbose output
118 */
119int g_verbose = 0;
120
121/**
Mark Sleef5377b32006-10-10 01:42:59 +0000122 * Global time string
123 */
Mark Slee31985722006-05-24 21:45:31 +0000124char* g_time_str;
125
Mark Slee31985722006-05-24 21:45:31 +0000126/**
David Reisscbd4bac2007-08-14 17:12:33 +0000127 * The last parsed doctext comment.
128 */
129char* g_doctext;
130
131/**
Jens Geyere8379b52014-01-25 00:59:45 +0100132 * The First doctext comment
133 */
134char* g_program_doctext_candidate;
Jens Geyere8379b52014-01-25 00:59:45 +0100135
David Reisscbd4bac2007-08-14 17:12:33 +0000136/**
Bryan Duxburyc7206a42011-08-17 23:17:04 +0000137 * Whether or not negative field keys are accepted.
138 */
139int g_allow_neg_field_keys;
140
141/**
Roger Meier887ff752011-08-19 11:25:39 +0000142 * Whether or not 64-bit constants will generate a warning.
143 */
144int g_allow_64bit_consts = 0;
145
146/**
Mark Sleef0712dc2006-10-25 19:03:57 +0000147 * Flags to control code generation
148 */
Mark Sleef0712dc2006-10-25 19:03:57 +0000149bool gen_recurse = false;
150
151/**
Ben Craig262cfb42015-07-08 20:37:15 -0500152 * Flags to control thrift audit
153 */
154bool g_audit = false;
155
156/**
157 * Flag to control return status
158 */
159bool g_return_failure = false;
160bool g_audit_fatal = true;
Nobuaki Sukegawa11da87e2016-09-10 14:02:19 +0900161bool g_generator_failure = false;
Ben Craig262cfb42015-07-08 20:37:15 -0500162
163/**
Ben Craige9576752013-10-11 08:19:16 -0500164 * Win32 doesn't have realpath, so use fallback implementation in that case,
David Reiss204420f2008-01-11 20:59:03 +0000165 * otherwise this just calls through to realpath
166 */
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100167char* saferealpath(const char* path, char* resolved_path) {
Ben Craige9576752013-10-11 08:19:16 -0500168#ifdef _WIN32
David Reiss204420f2008-01-11 20:59:03 +0000169 char buf[MAX_PATH];
170 char* basename;
James E. King III17355422019-01-11 23:06:08 -0500171 DWORD len = GetFullPathNameA(path, MAX_PATH, buf, &basename);
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100172 if (len == 0 || len > MAX_PATH - 1) {
David Reiss204420f2008-01-11 20:59:03 +0000173 strcpy(resolved_path, path);
174 } else {
David Reiss204420f2008-01-11 20:59:03 +0000175 strcpy(resolved_path, buf);
176 }
Bryan Duxbury0137af62010-04-22 21:21:46 +0000177
178 // Replace backslashes with forward slashes so the
179 // rest of the code behaves correctly.
180 size_t resolved_len = strlen(resolved_path);
181 for (size_t i = 0; i < resolved_len; i++) {
182 if (resolved_path[i] == '\\') {
183 resolved_path[i] = '/';
184 }
185 }
David Reiss204420f2008-01-11 20:59:03 +0000186 return resolved_path;
187#else
188 return realpath(path, resolved_path);
189#endif
190}
191
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100192bool check_is_directory(const char* dir_name) {
Ben Craige9576752013-10-11 08:19:16 -0500193#ifdef _WIN32
Roger Meier061d4a22012-10-07 11:51:00 +0000194 DWORD attributes = ::GetFileAttributesA(dir_name);
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100195 if (attributes == INVALID_FILE_ATTRIBUTES) {
196 fprintf(stderr,
197 "Output directory %s is unusable: GetLastError() = %ld\n",
198 dir_name,
199 GetLastError());
Roger Meier061d4a22012-10-07 11:51:00 +0000200 return false;
201 }
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100202 if ((attributes & FILE_ATTRIBUTE_DIRECTORY) != FILE_ATTRIBUTE_DIRECTORY) {
Roger Meier061d4a22012-10-07 11:51:00 +0000203 fprintf(stderr, "Output directory %s exists but is not a directory\n", dir_name);
204 return false;
205 }
206 return true;
207#else
208 struct stat sb;
209 if (stat(dir_name, &sb) < 0) {
210 fprintf(stderr, "Output directory %s is unusable: %s\n", dir_name, strerror(errno));
211 return false;
212 }
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100213 if (!S_ISDIR(sb.st_mode)) {
Roger Meier061d4a22012-10-07 11:51:00 +0000214 fprintf(stderr, "Output directory %s exists but is not a directory\n", dir_name);
215 return false;
216 }
217 return true;
218#endif
219}
David Reiss204420f2008-01-11 20:59:03 +0000220
221/**
Mark Slee31985722006-05-24 21:45:31 +0000222 * Report an error to the user. This is called yyerror for historical
223 * reasons (lex and yacc expect the error reporting routine to be called
224 * this). Call this function to report any errors to the user.
225 * yyerror takes printf style arguments.
226 *
227 * @param fmt C format string followed by additional arguments
228 */
David Reiss0babe402008-06-10 22:56:12 +0000229void yyerror(const char* fmt, ...) {
Mark Slee31985722006-05-24 21:45:31 +0000230 va_list args;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100231 fprintf(stderr, "[ERROR:%s:%d] (last token was '%s')\n", g_curpath.c_str(), yylineno, yytext);
Mark Slee31985722006-05-24 21:45:31 +0000232
233 va_start(args, fmt);
234 vfprintf(stderr, fmt, args);
235 va_end(args);
236
237 fprintf(stderr, "\n");
238}
239
240/**
241 * Prints a debug message from the parser.
242 *
243 * @param fmt C format string followed by additional arguments
244 */
David Reiss0babe402008-06-10 22:56:12 +0000245void pdebug(const char* fmt, ...) {
Mark Slee31985722006-05-24 21:45:31 +0000246 if (g_debug == 0) {
247 return;
248 }
249 va_list args;
Mark Slee30152872006-11-28 01:24:07 +0000250 printf("[PARSE:%d] ", yylineno);
Mark Sleef0712dc2006-10-25 19:03:57 +0000251 va_start(args, fmt);
252 vprintf(fmt, args);
253 va_end(args);
254 printf("\n");
255}
256
257/**
258 * Prints a verbose output mode message
259 *
260 * @param fmt C format string followed by additional arguments
261 */
David Reiss0babe402008-06-10 22:56:12 +0000262void pverbose(const char* fmt, ...) {
Mark Sleef0712dc2006-10-25 19:03:57 +0000263 if (g_verbose == 0) {
264 return;
265 }
266 va_list args;
267 va_start(args, fmt);
268 vprintf(fmt, args);
269 va_end(args);
270}
271
272/**
273 * Prints a warning message
274 *
275 * @param fmt C format string followed by additional arguments
276 */
David Reiss0babe402008-06-10 22:56:12 +0000277void pwarning(int level, const char* fmt, ...) {
Mark Sleef0712dc2006-10-25 19:03:57 +0000278 if (g_warn < level) {
279 return;
280 }
281 va_list args;
282 printf("[WARNING:%s:%d] ", g_curpath.c_str(), yylineno);
Mark Slee31985722006-05-24 21:45:31 +0000283 va_start(args, fmt);
284 vprintf(fmt, args);
285 va_end(args);
286 printf("\n");
287}
288
289/**
290 * Prints a failure message and exits
291 *
292 * @param fmt C format string followed by additional arguments
293 */
Mark Slee30152872006-11-28 01:24:07 +0000294void failure(const char* fmt, ...) {
Mark Slee2c44d202007-05-16 02:18:07 +0000295 va_list args;
Mark Sleef0712dc2006-10-25 19:03:57 +0000296 fprintf(stderr, "[FAILURE:%s:%d] ", g_curpath.c_str(), yylineno);
Mark Slee31985722006-05-24 21:45:31 +0000297 va_start(args, fmt);
298 vfprintf(stderr, fmt, args);
299 va_end(args);
300 printf("\n");
301 exit(1);
302}
303
304/**
Mark Sleef0712dc2006-10-25 19:03:57 +0000305 * Converts a string filename into a thrift program name
306 */
307string program_name(string filename) {
308 string::size_type slash = filename.rfind("/");
309 if (slash != string::npos) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100310 filename = filename.substr(slash + 1);
Mark Sleef0712dc2006-10-25 19:03:57 +0000311 }
312 string::size_type dot = filename.rfind(".");
313 if (dot != string::npos) {
314 filename = filename.substr(0, dot);
315 }
316 return filename;
317}
318
319/**
320 * Gets the directory path of a filename
321 */
322string directory_name(string filename) {
323 string::size_type slash = filename.rfind("/");
324 // No slash, just use the current directory
325 if (slash == string::npos) {
326 return ".";
327 }
328 return filename.substr(0, slash);
329}
330
331/**
332 * Finds the appropriate file path for the given filename
333 */
334string include_file(string filename) {
335 // Absolute path? Just try that
Martin Kraemer32c66e12006-11-09 00:06:36 +0000336 if (filename[0] == '/') {
337 // Realpath!
Ben Craige9576752013-10-11 08:19:16 -0500338 char rp[THRIFT_PATH_MAX];
Nobuaki Sukegawad479e232016-02-28 11:28:19 +0900339 // cppcheck-suppress uninitvar
zeshuai00726681fb2020-06-03 17:24:38 +0800340 if (saferealpath(filename.c_str(), rp) == nullptr) {
Martin Kraemer32c66e12006-11-09 00:06:36 +0000341 pwarning(0, "Cannot open include file %s\n", filename.c_str());
342 return std::string();
343 }
Mark Slee2c44d202007-05-16 02:18:07 +0000344
345 // Stat this file
Martin Kraemer32c66e12006-11-09 00:06:36 +0000346 struct stat finfo;
347 if (stat(rp, &finfo) == 0) {
348 return rp;
349 }
350 } else { // relative path, start searching
351 // new search path with current dir global
352 vector<string> sp = g_incl_searchpath;
353 sp.insert(sp.begin(), g_curdir);
Mark Slee2c44d202007-05-16 02:18:07 +0000354
Martin Kraemer32c66e12006-11-09 00:06:36 +0000355 // iterate through paths
356 vector<string>::iterator it;
357 for (it = sp.begin(); it != sp.end(); it++) {
358 string sfilename = *(it) + "/" + filename;
Mark Slee2c44d202007-05-16 02:18:07 +0000359
Martin Kraemer32c66e12006-11-09 00:06:36 +0000360 // Realpath!
Ben Craige9576752013-10-11 08:19:16 -0500361 char rp[THRIFT_PATH_MAX];
Nobuaki Sukegawad479e232016-02-28 11:28:19 +0900362 // cppcheck-suppress uninitvar
zeshuai00726681fb2020-06-03 17:24:38 +0800363 if (saferealpath(sfilename.c_str(), rp) == nullptr) {
Martin Kraemer32c66e12006-11-09 00:06:36 +0000364 continue;
365 }
Mark Slee2c44d202007-05-16 02:18:07 +0000366
Martin Kraemer32c66e12006-11-09 00:06:36 +0000367 // Stat this files
368 struct stat finfo;
369 if (stat(rp, &finfo) == 0) {
370 return rp;
371 }
372 }
Mark Sleef0712dc2006-10-25 19:03:57 +0000373 }
Mark Slee2c44d202007-05-16 02:18:07 +0000374
Mark Sleef0712dc2006-10-25 19:03:57 +0000375 // Uh oh
Yuxuan 'fishy' Wang438fc822024-07-25 09:56:55 -0700376 if (g_strict >= 192) {
377 // On strict mode this should be failure instead of warning
378 failure("Could not find include file %s", filename.c_str());
379 } else {
380 pwarning(0, "Could not find include file %s\n", filename.c_str());
381 }
Mark Sleef0712dc2006-10-25 19:03:57 +0000382 return std::string();
383}
384
385/**
David Reisscbd4bac2007-08-14 17:12:33 +0000386 * Clears any previously stored doctext string.
387 * Also prints a warning if we are discarding information.
388 */
389void clear_doctext() {
zeshuai00726681fb2020-06-03 17:24:38 +0800390 if (g_doctext != nullptr) {
David Reisscbd4bac2007-08-14 17:12:33 +0000391 pwarning(2, "Uncaptured doctext at on line %d.", g_doctext_lineno);
392 }
393 free(g_doctext);
zeshuai00726681fb2020-06-03 17:24:38 +0800394 g_doctext = nullptr;
David Reisscbd4bac2007-08-14 17:12:33 +0000395}
396
397/**
Jens Geyere8379b52014-01-25 00:59:45 +0100398 * Reset program doctext information after processing a file
399 */
400void reset_program_doctext_info() {
zeshuai00726681fb2020-06-03 17:24:38 +0800401 if (g_program_doctext_candidate != nullptr) {
Jens Geyere8379b52014-01-25 00:59:45 +0100402 free(g_program_doctext_candidate);
zeshuai00726681fb2020-06-03 17:24:38 +0800403 g_program_doctext_candidate = nullptr;
Jens Geyere8379b52014-01-25 00:59:45 +0100404 }
405 g_program_doctext_lineno = 0;
406 g_program_doctext_status = INVALID;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100407 pdebug("%s", "program doctext set to INVALID");
Jens Geyere8379b52014-01-25 00:59:45 +0100408}
409
410/**
411 * We are sure the program doctext candidate is really the program doctext.
412 */
413void declare_valid_program_doctext() {
zeshuai00726681fb2020-06-03 17:24:38 +0800414 if ((g_program_doctext_candidate != nullptr) && (g_program_doctext_status == STILL_CANDIDATE)) {
Roger Meier4f4b15b2014-11-05 16:51:04 +0100415 g_program_doctext_status = ABSOLUTELY_SURE;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100416 pdebug("%s", "program doctext set to ABSOLUTELY_SURE");
Jens Geyer813749d2014-01-31 23:42:57 +0100417 } else {
Roger Meier4f4b15b2014-11-05 16:51:04 +0100418 g_program_doctext_status = NO_PROGRAM_DOCTEXT;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100419 pdebug("%s", "program doctext set to NO_PROGRAM_DOCTEXT");
Jens Geyere8379b52014-01-25 00:59:45 +0100420 }
421}
422
423/**
David Reiss1ac05802007-07-30 22:00:27 +0000424 * Cleans up text commonly found in doxygen-like comments
425 *
426 * Warning: if you mix tabs and spaces in a non-uniform way,
427 * you will get what you deserve.
428 */
429char* clean_up_doctext(char* doctext) {
430 // Convert to C++ string, and remove Windows's carriage returns.
431 string docstring = doctext;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100432 docstring.erase(remove(docstring.begin(), docstring.end(), '\r'), docstring.end());
David Reiss1ac05802007-07-30 22:00:27 +0000433
434 // Separate into lines.
435 vector<string> lines;
436 string::size_type pos = string::npos;
437 string::size_type last;
438 while (true) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100439 last = (pos == string::npos) ? 0 : pos + 1;
David Reiss1ac05802007-07-30 22:00:27 +0000440 pos = docstring.find('\n', last);
441 if (pos == string::npos) {
442 // First bit of cleaning. If the last line is only whitespace, drop it.
443 string::size_type nonwhite = docstring.find_first_not_of(" \t", last);
444 if (nonwhite != string::npos) {
445 lines.push_back(docstring.substr(last));
446 }
447 break;
448 }
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100449 lines.push_back(docstring.substr(last, pos - last));
David Reiss1ac05802007-07-30 22:00:27 +0000450 }
451
452 // A very profound docstring.
453 if (lines.empty()) {
zeshuai00726681fb2020-06-03 17:24:38 +0800454 return nullptr;
David Reiss1ac05802007-07-30 22:00:27 +0000455 }
456
457 // Clear leading whitespace from the first line.
458 pos = lines.front().find_first_not_of(" \t");
459 lines.front().erase(0, pos);
460
461 // If every nonblank line after the first has the same number of spaces/tabs,
462 // then a star, remove them.
463 bool have_prefix = true;
464 bool found_prefix = false;
465 string::size_type prefix_len = 0;
466 vector<string>::iterator l_iter;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100467 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
David Reiss1ac05802007-07-30 22:00:27 +0000468 if (l_iter->empty()) {
469 continue;
470 }
471
472 pos = l_iter->find_first_not_of(" \t");
473 if (!found_prefix) {
474 if (pos != string::npos) {
475 if (l_iter->at(pos) == '*') {
476 found_prefix = true;
477 prefix_len = pos;
478 } else {
479 have_prefix = false;
480 break;
481 }
482 } else {
483 // Whitespace-only line. Truncate it.
484 l_iter->clear();
485 }
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100486 } else if (l_iter->size() > pos && l_iter->at(pos) == '*' && pos == prefix_len) {
David Reiss1ac05802007-07-30 22:00:27 +0000487 // Business as usual.
488 } else if (pos == string::npos) {
489 // Whitespace-only line. Let's truncate it for them.
490 l_iter->clear();
491 } else {
492 // The pattern has been broken.
493 have_prefix = false;
494 break;
495 }
496 }
497
498 // If our prefix survived, delete it from every line.
499 if (have_prefix) {
500 // Get the star too.
501 prefix_len++;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100502 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
David Reiss1ac05802007-07-30 22:00:27 +0000503 l_iter->erase(0, prefix_len);
504 }
505 }
506
507 // Now delete the minimum amount of leading whitespace from each line.
508 prefix_len = string::npos;
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100509 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
David Reiss1ac05802007-07-30 22:00:27 +0000510 if (l_iter->empty()) {
511 continue;
512 }
513 pos = l_iter->find_first_not_of(" \t");
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100514 if (pos != string::npos && (prefix_len == string::npos || pos < prefix_len)) {
David Reiss1ac05802007-07-30 22:00:27 +0000515 prefix_len = pos;
516 }
517 }
518
519 // If our prefix survived, delete it from every line.
520 if (prefix_len != string::npos) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100521 for (l_iter = lines.begin() + 1; l_iter != lines.end(); ++l_iter) {
David Reiss1ac05802007-07-30 22:00:27 +0000522 l_iter->erase(0, prefix_len);
523 }
524 }
525
526 // Remove trailing whitespace from every line.
527 for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
528 pos = l_iter->find_last_not_of(" \t");
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100529 if (pos != string::npos && pos != l_iter->length() - 1) {
530 l_iter->erase(pos + 1);
David Reiss1ac05802007-07-30 22:00:27 +0000531 }
532 }
533
534 // If the first line is empty, remove it.
535 // Don't do this earlier because a lot of steps skip the first line.
536 if (lines.front().empty()) {
537 lines.erase(lines.begin());
538 }
539
540 // Now rejoin the lines and copy them back into doctext.
541 docstring.clear();
542 for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
543 docstring += *l_iter;
544 docstring += '\n';
545 }
546
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100547 // assert(docstring.length() <= strlen(doctext)); may happen, see THRIFT-1755
548 if (docstring.length() <= strlen(doctext)) {
Jens Geyer8cd3efe2013-09-16 22:17:52 +0200549 strcpy(doctext, docstring.c_str());
550 } else {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100551 free(doctext); // too short
Jens Geyer8cd3efe2013-09-16 22:17:52 +0200552 doctext = strdup(docstring.c_str());
553 }
David Reiss1ac05802007-07-30 22:00:27 +0000554 return doctext;
555}
556
557/** Set to true to debug docstring parsing */
558static bool dump_docs = false;
559
560/**
561 * Dumps docstrings to stdout
David Reisscdffe262007-08-14 17:12:31 +0000562 * Only works for top-level definitions and the whole program doc
563 * (i.e., not enum constants, struct fields, or functions.
David Reiss1ac05802007-07-30 22:00:27 +0000564 */
565void dump_docstrings(t_program* program) {
David Reisscdffe262007-08-14 17:12:31 +0000566 string progdoc = program->get_doc();
David Reissc2532a92007-07-30 23:46:11 +0000567 if (!progdoc.empty()) {
568 printf("Whole program doc:\n%s\n", progdoc.c_str());
569 }
David Reiss1ac05802007-07-30 22:00:27 +0000570 const vector<t_typedef*>& typedefs = program->get_typedefs();
571 vector<t_typedef*>::const_iterator t_iter;
572 for (t_iter = typedefs.begin(); t_iter != typedefs.end(); ++t_iter) {
573 t_typedef* td = *t_iter;
574 if (td->has_doc()) {
David Reisscdffe262007-08-14 17:12:31 +0000575 printf("typedef %s:\n%s\n", td->get_name().c_str(), td->get_doc().c_str());
576 }
577 }
578 const vector<t_enum*>& enums = program->get_enums();
579 vector<t_enum*>::const_iterator e_iter;
580 for (e_iter = enums.begin(); e_iter != enums.end(); ++e_iter) {
581 t_enum* en = *e_iter;
582 if (en->has_doc()) {
583 printf("enum %s:\n%s\n", en->get_name().c_str(), en->get_doc().c_str());
584 }
585 }
586 const vector<t_const*>& consts = program->get_consts();
587 vector<t_const*>::const_iterator c_iter;
588 for (c_iter = consts.begin(); c_iter != consts.end(); ++c_iter) {
589 t_const* co = *c_iter;
590 if (co->has_doc()) {
591 printf("const %s:\n%s\n", co->get_name().c_str(), co->get_doc().c_str());
592 }
593 }
594 const vector<t_struct*>& structs = program->get_structs();
595 vector<t_struct*>::const_iterator s_iter;
596 for (s_iter = structs.begin(); s_iter != structs.end(); ++s_iter) {
597 t_struct* st = *s_iter;
598 if (st->has_doc()) {
599 printf("struct %s:\n%s\n", st->get_name().c_str(), st->get_doc().c_str());
600 }
601 }
602 const vector<t_struct*>& xceptions = program->get_xceptions();
603 vector<t_struct*>::const_iterator x_iter;
604 for (x_iter = xceptions.begin(); x_iter != xceptions.end(); ++x_iter) {
605 t_struct* xn = *x_iter;
606 if (xn->has_doc()) {
607 printf("xception %s:\n%s\n", xn->get_name().c_str(), xn->get_doc().c_str());
608 }
609 }
610 const vector<t_service*>& services = program->get_services();
611 vector<t_service*>::const_iterator v_iter;
612 for (v_iter = services.begin(); v_iter != services.end(); ++v_iter) {
613 t_service* sv = *v_iter;
614 if (sv->has_doc()) {
615 printf("service %s:\n%s\n", sv->get_name().c_str(), sv->get_doc().c_str());
David Reiss1ac05802007-07-30 22:00:27 +0000616 }
617 }
618}
619
620/**
Jens Geyer6fe77e82014-03-16 16:48:53 +0200621 * Emits a warning on list<byte>, binary type is typically a much better choice.
622 */
623void check_for_list_of_bytes(t_type* list_elem_type) {
zeshuai00726681fb2020-06-03 17:24:38 +0800624 if ((g_parse_mode == PROGRAM) && (list_elem_type != nullptr) && list_elem_type->is_base_type()) {
Jens Geyer6fe77e82014-03-16 16:48:53 +0200625 t_base_type* tbase = (t_base_type*)list_elem_type;
Jens Geyer40c28d32015-10-20 23:13:02 +0200626 if (tbase->get_base() == t_base_type::TYPE_I8) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100627 pwarning(1, "Consider using the more efficient \"binary\" type instead of \"list<byte>\".");
Jens Geyer6fe77e82014-03-16 16:48:53 +0200628 }
629 }
630}
631
Jens Geyer40c28d32015-10-20 23:13:02 +0200632static bool g_byte_warning_emitted = false;
633
634/**
635 * Emits a one-time warning on byte type, promoting the new i8 type instead
636 */
637void emit_byte_type_warning() {
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +0100638 if (!g_byte_warning_emitted) {
639 pwarning(1,
640 "The \"byte\" type is a compatibility alias for \"i8\". Use \"i8\" to emphasize the "
641 "signedness of this type.\n");
642 g_byte_warning_emitted = true;
643 }
Jens Geyer40c28d32015-10-20 23:13:02 +0200644}
645
David Reiss18bf22d2007-08-28 20:49:17 +0000646/**
Jens Geyer73880372015-11-14 15:21:57 +0100647 * Prints deprecation notice for old NS declarations that are no longer supported
zeshuai00726681fb2020-06-03 17:24:38 +0800648 * If new_form is nullptr, old_form is assumed to be a language identifier, such as "cpp"
649 * If new_form is not nullptr, both arguments are used exactly as given
Jens Geyer73880372015-11-14 15:21:57 +0100650 */
Jens Geyereb5f1172015-12-11 20:58:45 +0100651void error_unsupported_namespace_decl(const char* old_form, const char* new_form) {
652 const char* remainder = "";
zeshuai00726681fb2020-06-03 17:24:38 +0800653 if( new_form == nullptr) {
Jens Geyer73880372015-11-14 15:21:57 +0100654 new_form = old_form;
655 remainder = "_namespace";
656 }
657 failure("Unsupported declaration '%s%s'. Use 'namespace %s' instead.", old_form, remainder, new_form);
658}
659
660/**
David Reissdd08f6d2008-06-30 20:24:24 +0000661 * Prints the version number
662 */
663void version() {
Jens Geyerb5fe1db2021-02-11 22:49:49 +0100664 printf("Thrift version %s\n", THRIFT_VERSION);
David Reissdd08f6d2008-06-30 20:24:24 +0000665}
666
667/**
Jake Farrell2fd8a152012-09-29 00:26:36 +0000668 * Display the usage message and then exit with an error code.
Mark Slee31985722006-05-24 21:45:31 +0000669 */
670void usage() {
Jake Farrell2fd8a152012-09-29 00:26:36 +0000671 fprintf(stderr, "Usage: thrift [options] file\n\n");
672 fprintf(stderr, "Use thrift -help for a list of options\n");
673 exit(1);
674}
675
676/**
677 * Diplays the help message and then exits with an error code.
678 */
679void help() {
Mark Sleeb15a68b2006-06-07 06:46:24 +0000680 fprintf(stderr, "Usage: thrift [options] file\n");
681 fprintf(stderr, "Options:\n");
David Reissdd08f6d2008-06-30 20:24:24 +0000682 fprintf(stderr, " -version Print the compiler version\n");
dweatherford65b70752007-10-31 02:18:14 +0000683 fprintf(stderr, " -o dir Set the output directory for gen-* packages\n");
684 fprintf(stderr, " (default: current directory)\n");
Bryan Duxburybdca9f62011-03-01 19:53:07 +0000685 fprintf(stderr, " -out dir Set the ouput location for generated files.\n");
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100686 fprintf(stderr, " (no gen-* folder will be created)\n");
David Reissd779cbe2007-08-31 01:42:55 +0000687 fprintf(stderr, " -I dir Add a directory to the list of directories\n");
Mark Slee227ac2c2007-03-07 05:46:50 +0000688 fprintf(stderr, " searched for include directives\n");
Mark Slee2329a832006-11-09 00:23:30 +0000689 fprintf(stderr, " -nowarn Suppress all compiler warnings (BAD!)\n");
690 fprintf(stderr, " -strict Strict compiler warnings on\n");
691 fprintf(stderr, " -v[erbose] Verbose mode\n");
692 fprintf(stderr, " -r[ecurse] Also generate included files\n");
693 fprintf(stderr, " -debug Parse debug trace to stdout\n");
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100694 fprintf(stderr,
695 " --allow-neg-keys Allow negative field keys (Used to "
Bryan Duxburyc7206a42011-08-17 23:17:04 +0000696 "preserve protocol\n");
697 fprintf(stderr, " compatibility with older .thrift files)\n");
Roger Meier887ff752011-08-19 11:25:39 +0000698 fprintf(stderr, " --allow-64bit-consts Do not print warnings about using 64-bit constants\n");
David Reissbd0db882008-02-27 01:54:51 +0000699 fprintf(stderr, " --gen STR Generate code with a dynamically-registered generator.\n");
Jens Geyere8c51ed2014-04-18 02:27:57 +0200700 fprintf(stderr, " STR has the form language[:key1=val1[,key2[,key3=val3]]].\n");
David Reissbd0db882008-02-27 01:54:51 +0000701 fprintf(stderr, " Keys and values are options passed to the generator.\n");
702 fprintf(stderr, " Many options will not require values.\n");
703 fprintf(stderr, "\n");
Ben Craig262cfb42015-07-08 20:37:15 -0500704 fprintf(stderr, "Options related to audit operation\n");
705 fprintf(stderr, " --audit OldFile Old Thrift file to be audited with 'file'\n");
706 fprintf(stderr, " -Iold dir Add a directory to the list of directories\n");
707 fprintf(stderr, " searched for include directives for old thrift file\n");
708 fprintf(stderr, " -Inew dir Add a directory to the list of directories\n");
709 fprintf(stderr, " searched for include directives for new thrift file\n");
710 fprintf(stderr, "\n");
David Reissbd0db882008-02-27 01:54:51 +0000711 fprintf(stderr, "Available generators (and options):\n");
712
713 t_generator_registry::gen_map_t gen_map = t_generator_registry::get_generator_map();
714 t_generator_registry::gen_map_t::iterator iter;
715 for (iter = gen_map.begin(); iter != gen_map.end(); ++iter) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100716 fprintf(stderr,
717 " %s (%s):\n",
718 iter->second->get_short_name().c_str(),
719 iter->second->get_long_name().c_str());
David Reissbd0db882008-02-27 01:54:51 +0000720 fprintf(stderr, "%s", iter->second->get_documentation().c_str());
721 }
Mark Slee31985722006-05-24 21:45:31 +0000722 exit(1);
723}
724
725/**
Mark Slee30152872006-11-28 01:24:07 +0000726 * You know, when I started working on Thrift I really thought it wasn't going
727 * to become a programming language because it was just a generator and it
728 * wouldn't need runtime type information and all that jazz. But then we
729 * decided to add constants, and all of a sudden that means runtime type
730 * validation and inference, except the "runtime" is the code generator
David Reiss3bb5e052010-01-25 19:31:31 +0000731 * runtime.
Mark Slee30152872006-11-28 01:24:07 +0000732 */
733void validate_const_rec(std::string name, t_type* type, t_const_value* value) {
734 if (type->is_void()) {
735 throw "type error: cannot declare a void const: " + name;
736 }
737
738 if (type->is_base_type()) {
739 t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
740 switch (tbase) {
741 case t_base_type::TYPE_STRING:
742 if (value->get_type() != t_const_value::CV_STRING) {
743 throw "type error: const \"" + name + "\" was declared as string";
744 }
745 break;
Jens Geyer62445c12022-06-29 00:00:00 +0200746 case t_base_type::TYPE_UUID:
747 if (value->get_type() != t_const_value::CV_STRING) {
748 throw "type error: const \"" + name + "\" was declared as uuid";
749 }
Jens Geyer0b1e9512022-10-14 21:46:37 +0200750 value->set_uuid(value->get_uuid()); // validates constant
Jens Geyer62445c12022-06-29 00:00:00 +0200751 break;
Mark Slee30152872006-11-28 01:24:07 +0000752 case t_base_type::TYPE_BOOL:
753 if (value->get_type() != t_const_value::CV_INTEGER) {
754 throw "type error: const \"" + name + "\" was declared as bool";
755 }
756 break;
Jens Geyer40c28d32015-10-20 23:13:02 +0200757 case t_base_type::TYPE_I8:
Mark Slee30152872006-11-28 01:24:07 +0000758 if (value->get_type() != t_const_value::CV_INTEGER) {
759 throw "type error: const \"" + name + "\" was declared as byte";
760 }
761 break;
762 case t_base_type::TYPE_I16:
763 if (value->get_type() != t_const_value::CV_INTEGER) {
764 throw "type error: const \"" + name + "\" was declared as i16";
765 }
766 break;
767 case t_base_type::TYPE_I32:
768 if (value->get_type() != t_const_value::CV_INTEGER) {
769 throw "type error: const \"" + name + "\" was declared as i32";
770 }
771 break;
772 case t_base_type::TYPE_I64:
773 if (value->get_type() != t_const_value::CV_INTEGER) {
774 throw "type error: const \"" + name + "\" was declared as i64";
775 }
776 break;
777 case t_base_type::TYPE_DOUBLE:
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100778 if (value->get_type() != t_const_value::CV_INTEGER
779 && value->get_type() != t_const_value::CV_DOUBLE) {
Mark Slee30152872006-11-28 01:24:07 +0000780 throw "type error: const \"" + name + "\" was declared as double";
781 }
782 break;
783 default:
David Reissdd7796f2007-08-28 21:09:06 +0000784 throw "compiler error: no const of base type " + t_base_type::t_base_name(tbase) + name;
Mark Slee30152872006-11-28 01:24:07 +0000785 }
786 } else if (type->is_enum()) {
Bryan Duxbury2d804702009-12-18 19:41:11 +0000787 if (value->get_type() != t_const_value::CV_IDENTIFIER) {
Mark Slee30152872006-11-28 01:24:07 +0000788 throw "type error: const \"" + name + "\" was declared as enum";
789 }
Bryan Duxbury2d804702009-12-18 19:41:11 +0000790
Bryan Duxbury1606f252010-11-24 00:25:57 +0000791 // see if there's a dot in the identifier
792 std::string name_portion = value->get_identifier_name();
793
Bryan Duxbury2d804702009-12-18 19:41:11 +0000794 const vector<t_enum_value*>& enum_values = ((t_enum*)type)->get_constants();
795 vector<t_enum_value*>::const_iterator c_iter;
796 bool found = false;
Bryan Duxbury9f0a7862010-09-12 14:38:36 +0000797
Bryan Duxbury1606f252010-11-24 00:25:57 +0000798 for (c_iter = enum_values.begin(); c_iter != enum_values.end(); ++c_iter) {
Bryan Duxbury9f0a7862010-09-12 14:38:36 +0000799 if ((*c_iter)->get_name() == name_portion) {
Bryan Duxbury2d804702009-12-18 19:41:11 +0000800 found = true;
801 break;
802 }
803 }
804 if (!found) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100805 throw "type error: const " + name + " was declared as type " + type->get_name()
806 + " which is an enum, but " + value->get_identifier()
807 + " is not a valid value for that enum";
Bryan Duxbury2d804702009-12-18 19:41:11 +0000808 }
Mark Slee30152872006-11-28 01:24:07 +0000809 } else if (type->is_struct() || type->is_xception()) {
810 if (value->get_type() != t_const_value::CV_MAP) {
811 throw "type error: const \"" + name + "\" was declared as struct/xception";
812 }
813 const vector<t_field*>& fields = ((t_struct*)type)->get_members();
814 vector<t_field*>::const_iterator f_iter;
815
Roman Sorokae58f75d2018-03-08 15:45:22 -0800816 const map<t_const_value*, t_const_value*, t_const_value::value_compare>& val = value->get_map();
817 map<t_const_value*, t_const_value*, t_const_value::value_compare>::const_iterator v_iter;
Mark Slee30152872006-11-28 01:24:07 +0000818 for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
819 if (v_iter->first->get_type() != t_const_value::CV_STRING) {
820 throw "type error: " + name + " struct key must be string";
821 }
zeshuai00726681fb2020-06-03 17:24:38 +0800822 t_type* field_type = nullptr;
Mark Slee30152872006-11-28 01:24:07 +0000823 for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
824 if ((*f_iter)->get_name() == v_iter->first->get_string()) {
825 field_type = (*f_iter)->get_type();
826 }
827 }
zeshuai00726681fb2020-06-03 17:24:38 +0800828 if (field_type == nullptr) {
Mark Slee30152872006-11-28 01:24:07 +0000829 throw "type error: " + type->get_name() + " has no field " + v_iter->first->get_string();
830 }
831
832 validate_const_rec(name + "." + v_iter->first->get_string(), field_type, v_iter->second);
833 }
834 } else if (type->is_map()) {
835 t_type* k_type = ((t_map*)type)->get_key_type();
836 t_type* v_type = ((t_map*)type)->get_val_type();
Roman Sorokae58f75d2018-03-08 15:45:22 -0800837 const map<t_const_value*, t_const_value*, t_const_value::value_compare>& val = value->get_map();
838 map<t_const_value*, t_const_value*, t_const_value::value_compare>::const_iterator v_iter;
Mark Slee30152872006-11-28 01:24:07 +0000839 for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
840 validate_const_rec(name + "<key>", k_type, v_iter->first);
841 validate_const_rec(name + "<val>", v_type, v_iter->second);
Mark Slee2c44d202007-05-16 02:18:07 +0000842 }
Mark Slee30152872006-11-28 01:24:07 +0000843 } else if (type->is_list() || type->is_set()) {
844 t_type* e_type;
845 if (type->is_list()) {
846 e_type = ((t_list*)type)->get_elem_type();
847 } else {
848 e_type = ((t_set*)type)->get_elem_type();
849 }
850 const vector<t_const_value*>& val = value->get_list();
851 vector<t_const_value*>::const_iterator v_iter;
852 for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
853 validate_const_rec(name + "<elem>", e_type, *v_iter);
854 }
855 }
856}
857
858/**
Jens Geyer12c09f42013-08-25 14:16:27 +0200859 * Check simple identifier names
860 * It's easier to do it this way instead of rewriting the whole grammar etc.
861 */
862void validate_simple_identifier(const char* identifier) {
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100863 string name(identifier);
864 if (name.find(".") != string::npos) {
Jens Geyer12c09f42013-08-25 14:16:27 +0200865 yyerror("Identifier %s can't have a dot.", identifier);
866 exit(1);
867 }
868}
869
870/**
Mark Slee30152872006-11-28 01:24:07 +0000871 * Check the type of the parsed const information against its declared type
872 */
873void validate_const_type(t_const* c) {
874 validate_const_rec(c->get_name(), c->get_type(), c->get_value());
875}
876
877/**
Mark Slee7ff32452007-02-01 05:26:18 +0000878 * Check the type of a default value assigned to a field.
879 */
880void validate_field_value(t_field* field, t_const_value* cv) {
881 validate_const_rec(field->get_name(), field->get_type(), cv);
882}
883
884/**
Mark Slee91f2b7b2008-01-31 01:49:16 +0000885 * Check that all the elements of a throws block are actually exceptions.
886 */
887bool validate_throws(t_struct* throws) {
888 const vector<t_field*>& members = throws->get_members();
889 vector<t_field*>::const_iterator m_iter;
890 for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
Bryan Duxburycff83572011-08-24 20:53:03 +0000891 if (!t_generator::get_true_type((*m_iter)->get_type())->is_xception()) {
Mark Slee91f2b7b2008-01-31 01:49:16 +0000892 return false;
893 }
894 }
895 return true;
896}
897
898/**
Jens Geyer03d49442013-09-04 22:34:41 +0200899 * Skips UTF-8 BOM if there is one
900 */
901bool skip_utf8_bom(FILE* f) {
902
903 // pretty straightforward, but works
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100904 if (fgetc(f) == 0xEF) {
905 if (fgetc(f) == 0xBB) {
906 if (fgetc(f) == 0xBF) {
Jens Geyer03d49442013-09-04 22:34:41 +0200907 return true;
Roger Meier4f4b15b2014-11-05 16:51:04 +0100908 }
909 }
910 }
911
912 rewind(f);
Jens Geyer03d49442013-09-04 22:34:41 +0200913 return false;
914}
915
916/**
Mark Sleef0712dc2006-10-25 19:03:57 +0000917 * Parses a program
918 */
Jens Geyer346aa812023-09-02 22:24:07 +0200919void parse(t_program* program, t_program* parent_program, std::set<std::string>& known_includes) {
Mark Sleef0712dc2006-10-25 19:03:57 +0000920 // Get scope file path
921 string path = program->get_path();
Jens Geyer346aa812023-09-02 22:24:07 +0200922 if( ! known_includes.insert(path).second) {
923 failure("Recursion detected, file: \"%s\"", path.c_str());
924 }
Mark Slee2c44d202007-05-16 02:18:07 +0000925
Mark Sleef0712dc2006-10-25 19:03:57 +0000926 // Set current dir global, which is used in the include_file function
927 g_curdir = directory_name(path);
928 g_curpath = path;
929
930 // Open the file
Jens Geyer03d49442013-09-04 22:34:41 +0200931 // skip UTF-8 BOM if there is one
Mark Sleef0712dc2006-10-25 19:03:57 +0000932 yyin = fopen(path.c_str(), "r");
933 if (yyin == 0) {
934 failure("Could not open input file: \"%s\"", path.c_str());
935 }
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100936 if (skip_utf8_bom(yyin))
Jens Geyer03d49442013-09-04 22:34:41 +0200937 pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
Roger Meier4f4b15b2014-11-05 16:51:04 +0100938
Mark Sleef0712dc2006-10-25 19:03:57 +0000939 // Create new scope and scan for includes
940 pverbose("Scanning %s for includes\n", path.c_str());
Mark Slee2c44d202007-05-16 02:18:07 +0000941 g_parse_mode = INCLUDES;
Mark Sleef0712dc2006-10-25 19:03:57 +0000942 g_program = program;
943 g_scope = program->scope();
Mark Slee30152872006-11-28 01:24:07 +0000944 try {
Mark Slee36bfa2e2007-01-19 20:09:51 +0000945 yylineno = 1;
Mark Slee30152872006-11-28 01:24:07 +0000946 if (yyparse() != 0) {
947 failure("Parser error during include pass.");
948 }
Gaurav Singh7d30e2c2020-02-02 10:56:26 -0500949 } catch (string &x) {
Mark Slee30152872006-11-28 01:24:07 +0000950 failure(x.c_str());
Mark Sleef0712dc2006-10-25 19:03:57 +0000951 }
952 fclose(yyin);
953
954 // Recursively parse all the include programs
955 vector<t_program*>& includes = program->get_includes();
956 vector<t_program*>::iterator iter;
957 for (iter = includes.begin(); iter != includes.end(); ++iter) {
Jens Geyer346aa812023-09-02 22:24:07 +0200958 parse(*iter, program, known_includes);
Mark Sleef0712dc2006-10-25 19:03:57 +0000959 }
960
Jens Geyere8379b52014-01-25 00:59:45 +0100961 // reset program doctext status before parsing a new file
962 reset_program_doctext_info();
963
David Reiss204420f2008-01-11 20:59:03 +0000964 // Parse the program file
Mark Sleef0712dc2006-10-25 19:03:57 +0000965 g_parse_mode = PROGRAM;
966 g_program = program;
967 g_scope = program->scope();
zeshuai00726681fb2020-06-03 17:24:38 +0800968 g_parent_scope = (parent_program != nullptr) ? parent_program->scope() : nullptr;
Mark Sleef0712dc2006-10-25 19:03:57 +0000969 g_parent_prefix = program->get_name() + ".";
970 g_curpath = path;
Jens Geyer03d49442013-09-04 22:34:41 +0200971
972 // Open the file
973 // skip UTF-8 BOM if there is one
Mark Sleef0712dc2006-10-25 19:03:57 +0000974 yyin = fopen(path.c_str(), "r");
975 if (yyin == 0) {
976 failure("Could not open input file: \"%s\"", path.c_str());
977 }
Konrad Grochowski16a23a62014-11-13 15:33:38 +0100978 if (skip_utf8_bom(yyin))
Jens Geyer03d49442013-09-04 22:34:41 +0200979 pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
Roger Meier4f4b15b2014-11-05 16:51:04 +0100980
Mark Sleef0712dc2006-10-25 19:03:57 +0000981 pverbose("Parsing %s for types\n", path.c_str());
Mark Slee36bfa2e2007-01-19 20:09:51 +0000982 yylineno = 1;
David Reiss877237a2007-07-27 00:40:19 +0000983 try {
984 if (yyparse() != 0) {
985 failure("Parser error during types pass.");
986 }
Gaurav Singh7d30e2c2020-02-02 10:56:26 -0500987 } catch (string &x) {
David Reiss877237a2007-07-27 00:40:19 +0000988 failure(x.c_str());
Mark Sleef0712dc2006-10-25 19:03:57 +0000989 }
990 fclose(yyin);
Jens Geyer346aa812023-09-02 22:24:07 +0200991
992 known_includes.erase(path);
Mark Sleef0712dc2006-10-25 19:03:57 +0000993}
994
995/**
996 * Generate code
997 */
David Reissbd0db882008-02-27 01:54:51 +0000998void generate(t_program* program, const vector<string>& generator_strings) {
Mark Sleef0712dc2006-10-25 19:03:57 +0000999 // Oooohh, recursive code generation, hot!!
1000 if (gen_recurse) {
Mustafa Senol Cosar3f0d4442019-03-01 18:57:09 +03001001 program->set_recursive(true);
Mark Sleef0712dc2006-10-25 19:03:57 +00001002 const vector<t_program*>& includes = program->get_includes();
cyy64750162019-02-08 13:40:59 +08001003 for (auto include : includes) {
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +01001004 // Propagate output path from parent to child programs
cyy64750162019-02-08 13:40:59 +08001005 include->set_out_path(program->get_out_path(), program->is_out_path_absolute());
Mark Slee5b743072007-11-13 04:00:29 +00001006
cyy64750162019-02-08 13:40:59 +08001007 generate(include, generator_strings);
Mark Sleef0712dc2006-10-25 19:03:57 +00001008 }
1009 }
1010
1011 // Generate code!
1012 try {
1013 pverbose("Program: %s\n", program->get_path().c_str());
1014
David Reiss1ac05802007-07-30 22:00:27 +00001015 if (dump_docs) {
1016 dump_docstrings(program);
1017 }
David Reissbd0db882008-02-27 01:54:51 +00001018
Jens Geyer19f60f22022-03-16 23:26:37 +01001019 // make sure all symbolic constants are properly resolved
1020 program->scope()->resolve_all_consts();
1021
David Reissbd0db882008-02-27 01:54:51 +00001022 vector<string>::const_iterator iter;
1023 for (iter = generator_strings.begin(); iter != generator_strings.end(); ++iter) {
1024 t_generator* generator = t_generator_registry::get_generator(program, *iter);
1025
zeshuai00726681fb2020-06-03 17:24:38 +08001026 if (generator == nullptr) {
David Reissbd0db882008-02-27 01:54:51 +00001027 pwarning(1, "Unable to get a generator for \"%s\".\n", iter->c_str());
Nobuaki Sukegawa11da87e2016-09-10 14:02:19 +09001028 g_generator_failure = true;
Nobuaki Sukegawa11da87e2016-09-10 14:02:19 +09001029 } else if (generator) {
nsrtvwls014f53f2018-09-28 08:11:21 -07001030 generator->validate_input();
David Reissbd0db882008-02-27 01:54:51 +00001031 pverbose("Generating \"%s\"\n", iter->c_str());
1032 generator->generate_program();
David Reissc9342682008-03-27 21:39:49 +00001033 delete generator;
David Reissbd0db882008-02-27 01:54:51 +00001034 }
1035 }
Gaurav Singh7d30e2c2020-02-02 10:56:26 -05001036 } catch (string &s) {
Jens Geyerd4722d92016-02-13 23:25:11 +01001037 failure("Error: %s\n", s.c_str());
Mark Sleef0712dc2006-10-25 19:03:57 +00001038 } catch (const char* exc) {
Jens Geyerd4722d92016-02-13 23:25:11 +01001039 failure("Error: %s\n", exc);
Mustafa Senol Cosar3f0d4442019-03-01 18:57:09 +03001040 } catch (const std::invalid_argument& invalid_argument_exception) {
1041 failure("Error: %s\n", invalid_argument_exception.what());
Mark Sleef0712dc2006-10-25 19:03:57 +00001042 }
Mark Sleef0712dc2006-10-25 19:03:57 +00001043}
1044
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001045void audit(t_program* new_program,
1046 t_program* old_program,
1047 string new_thrift_include_path,
1048 string old_thrift_include_path) {
Ben Craig262cfb42015-07-08 20:37:15 -05001049 vector<string> temp_incl_searchpath = g_incl_searchpath;
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001050 if (!old_thrift_include_path.empty()) {
Ben Craig262cfb42015-07-08 20:37:15 -05001051 g_incl_searchpath.push_back(old_thrift_include_path);
1052 }
1053
Jens Geyer346aa812023-09-02 22:24:07 +02001054 std::set<std::string> old_includes;
1055 parse(old_program, nullptr, old_includes);
Ben Craig262cfb42015-07-08 20:37:15 -05001056
1057 g_incl_searchpath = temp_incl_searchpath;
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001058 if (!new_thrift_include_path.empty()) {
Ben Craig262cfb42015-07-08 20:37:15 -05001059 g_incl_searchpath.push_back(new_thrift_include_path);
1060 }
1061
Jens Geyer346aa812023-09-02 22:24:07 +02001062 std::set<std::string> new_includes;
1063 parse(new_program, nullptr, new_includes);
Ben Craig262cfb42015-07-08 20:37:15 -05001064
1065 compare_namespace(new_program, old_program);
1066 compare_services(new_program->get_services(), old_program->get_services());
1067 compare_enums(new_program->get_enums(), old_program->get_enums());
1068 compare_structs(new_program->get_structs(), old_program->get_structs());
1069 compare_structs(new_program->get_xceptions(), old_program->get_xceptions());
1070 compare_consts(new_program->get_consts(), old_program->get_consts());
1071}
1072
Mark Sleef0712dc2006-10-25 19:03:57 +00001073/**
Mark Sleef5377b32006-10-10 01:42:59 +00001074 * Parse it up.. then spit it back out, in pretty much every language. Alright
1075 * not that many languages, but the cool ones that we care about.
Mark Slee31985722006-05-24 21:45:31 +00001076 */
1077int main(int argc, char** argv) {
1078 int i;
dweatherford65b70752007-10-31 02:18:14 +00001079 std::string out_path;
Bryan Duxburybdca9f62011-03-01 19:53:07 +00001080 bool out_path_is_absolute = false;
Mark Sleef5377b32006-10-10 01:42:59 +00001081
Mark Sleeb15a68b2006-06-07 06:46:24 +00001082 // Setup time string
zeshuai00726681fb2020-06-03 17:24:38 +08001083 time_t now = time(nullptr);
Mark Sleeb15a68b2006-06-07 06:46:24 +00001084 g_time_str = ctime(&now);
Mark Slee31985722006-05-24 21:45:31 +00001085
Mark Sleef0712dc2006-10-25 19:03:57 +00001086 // Check for necessary arguments, you gotta have at least a filename and
1087 // an output language flag
Mark Sleeb15a68b2006-06-07 06:46:24 +00001088 if (argc < 2) {
1089 usage();
1090 }
Mark Slee31985722006-05-24 21:45:31 +00001091
David Reissbd0db882008-02-27 01:54:51 +00001092 vector<string> generator_strings;
Ben Craig262cfb42015-07-08 20:37:15 -05001093 string old_thrift_include_path;
1094 string new_thrift_include_path;
1095 string old_input_file;
David Reissbd0db882008-02-27 01:54:51 +00001096
David Reiss9cc2c132008-02-27 01:54:47 +00001097 // Set the current path to a dummy value to make warning messages clearer.
1098 g_curpath = "arguments";
1099
Mark Sleef5377b32006-10-10 01:42:59 +00001100 // Hacky parameter handling... I didn't feel like using a library sorry!
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001101 for (i = 1; i < argc - 1; i++) {
Mark Sleefdbee812006-09-27 18:50:48 +00001102 char* arg;
Mark Slee2329a832006-11-09 00:23:30 +00001103
Mark Sleefdbee812006-09-27 18:50:48 +00001104 arg = strtok(argv[i], " ");
zeshuai00726681fb2020-06-03 17:24:38 +08001105 while (arg != nullptr) {
Mark Slee2329a832006-11-09 00:23:30 +00001106 // Treat double dashes as single dashes
Mark Slee52cb2232006-11-10 22:32:07 +00001107 if (arg[0] == '-' && arg[1] == '-') {
Mark Slee2329a832006-11-09 00:23:30 +00001108 ++arg;
1109 }
1110
Jake Farrell2fd8a152012-09-29 00:26:36 +00001111 if (strcmp(arg, "-help") == 0) {
1112 help();
1113 } else if (strcmp(arg, "-version") == 0) {
David Reissdd08f6d2008-06-30 20:24:24 +00001114 version();
jfarrell70969422013-09-09 20:33:38 -04001115 exit(0);
David Reissdd08f6d2008-06-30 20:24:24 +00001116 } else if (strcmp(arg, "-debug") == 0) {
Mark Sleefdbee812006-09-27 18:50:48 +00001117 g_debug = 1;
Mark Slee2329a832006-11-09 00:23:30 +00001118 } else if (strcmp(arg, "-nowarn") == 0) {
Mark Sleef0712dc2006-10-25 19:03:57 +00001119 g_warn = 0;
Mark Slee2329a832006-11-09 00:23:30 +00001120 } else if (strcmp(arg, "-strict") == 0) {
Bryan Duxburya145b4d2009-04-03 17:29:25 +00001121 g_strict = 255;
Mark Sleef0712dc2006-10-25 19:03:57 +00001122 g_warn = 2;
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001123 } else if (strcmp(arg, "-v") == 0 || strcmp(arg, "-verbose") == 0) {
Mark Sleef0712dc2006-10-25 19:03:57 +00001124 g_verbose = 1;
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001125 } else if (strcmp(arg, "-r") == 0 || strcmp(arg, "-recurse") == 0) {
Mark Sleef0712dc2006-10-25 19:03:57 +00001126 gen_recurse = true;
Bryan Duxburyc7206a42011-08-17 23:17:04 +00001127 } else if (strcmp(arg, "-allow-neg-keys") == 0) {
1128 g_allow_neg_field_keys = true;
Roger Meier887ff752011-08-19 11:25:39 +00001129 } else if (strcmp(arg, "-allow-64bit-consts") == 0) {
1130 g_allow_64bit_consts = true;
David Reissbd0db882008-02-27 01:54:51 +00001131 } else if (strcmp(arg, "-gen") == 0) {
1132 arg = argv[++i];
zeshuai00726681fb2020-06-03 17:24:38 +08001133 if (arg == nullptr) {
Jake Farrell2fd8a152012-09-29 00:26:36 +00001134 fprintf(stderr, "Missing generator specification\n");
David Reissbd0db882008-02-27 01:54:51 +00001135 usage();
1136 }
cyy64750162019-02-08 13:40:59 +08001137 generator_strings.emplace_back(arg);
Martin Kraemer32c66e12006-11-09 00:06:36 +00001138 } else if (strcmp(arg, "-I") == 0) {
1139 // An argument of "-I\ asdf" is invalid and has unknown results
1140 arg = argv[++i];
1141
zeshuai00726681fb2020-06-03 17:24:38 +08001142 if (arg == nullptr) {
Jake Farrell2fd8a152012-09-29 00:26:36 +00001143 fprintf(stderr, "Missing Include directory\n");
Martin Kraemer32c66e12006-11-09 00:06:36 +00001144 usage();
1145 }
cyy64750162019-02-08 13:40:59 +08001146 g_incl_searchpath.emplace_back(arg);
Bryan Duxburybdca9f62011-03-01 19:53:07 +00001147 } else if ((strcmp(arg, "-o") == 0) || (strcmp(arg, "-out") == 0)) {
1148 out_path_is_absolute = (strcmp(arg, "-out") == 0) ? true : false;
Roger Meier6d7473d2013-05-06 01:08:36 +02001149 arg = argv[++i];
zeshuai00726681fb2020-06-03 17:24:38 +08001150 if (arg == nullptr) {
David Reiss9d866ac2008-06-10 22:56:19 +00001151 fprintf(stderr, "-o: missing output directory\n");
dweatherford65b70752007-10-31 02:18:14 +00001152 usage();
Mark Slee5b743072007-11-13 04:00:29 +00001153 }
dweatherford65b70752007-10-31 02:18:14 +00001154 out_path = arg;
David Reiss204420f2008-01-11 20:59:03 +00001155
Ben Craige9576752013-10-11 08:19:16 -05001156#ifdef _WIN32
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001157 // strip out trailing \ on Windows
Jim King9de9b1f2015-04-30 16:03:34 -04001158 std::string::size_type last = out_path.length() - 1;
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001159 if (out_path[last] == '\\') {
David Reiss204420f2008-01-11 20:59:03 +00001160 out_path.erase(last);
1161 }
1162#endif
Roger Meier061d4a22012-10-07 11:51:00 +00001163 if (!check_is_directory(out_path.c_str()))
dweatherford65b70752007-10-31 02:18:14 +00001164 return -1;
Ben Craig262cfb42015-07-08 20:37:15 -05001165 } else if (strcmp(arg, "-audit") == 0) {
1166 g_audit = true;
1167 arg = argv[++i];
zeshuai00726681fb2020-06-03 17:24:38 +08001168 if (arg == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001169 fprintf(stderr, "Missing old thrift file name for audit operation\n");
1170 usage();
1171 }
1172 char old_thrift_file_rp[THRIFT_PATH_MAX];
1173
Nobuaki Sukegawad479e232016-02-28 11:28:19 +09001174 // cppcheck-suppress uninitvar
zeshuai00726681fb2020-06-03 17:24:38 +08001175 if (saferealpath(arg, old_thrift_file_rp) == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001176 failure("Could not open input file with realpath: %s", arg);
1177 }
1178 old_input_file = string(old_thrift_file_rp);
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001179 } else if (strcmp(arg, "-audit-nofatal") == 0) {
Ben Craig262cfb42015-07-08 20:37:15 -05001180 g_audit_fatal = false;
1181 } else if (strcmp(arg, "-Iold") == 0) {
1182 arg = argv[++i];
zeshuai00726681fb2020-06-03 17:24:38 +08001183 if (arg == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001184 fprintf(stderr, "Missing Include directory for old thrift file\n");
1185 usage();
1186 }
1187 old_thrift_include_path = string(arg);
1188 } else if (strcmp(arg, "-Inew") == 0) {
1189 arg = argv[++i];
zeshuai00726681fb2020-06-03 17:24:38 +08001190 if (arg == nullptr) {
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001191 fprintf(stderr, "Missing Include directory for new thrift file\n");
1192 usage();
Ben Craig262cfb42015-07-08 20:37:15 -05001193 }
1194 new_thrift_include_path = string(arg);
Mark Sleefdbee812006-09-27 18:50:48 +00001195 } else {
Jake Farrell2fd8a152012-09-29 00:26:36 +00001196 fprintf(stderr, "Unrecognized option: %s\n", arg);
Mark Sleefdbee812006-09-27 18:50:48 +00001197 usage();
1198 }
1199
1200 // Tokenize more
zeshuai00726681fb2020-06-03 17:24:38 +08001201 arg = strtok(nullptr, " ");
Mark Slee31985722006-05-24 21:45:31 +00001202 }
1203 }
Mark Slee2c44d202007-05-16 02:18:07 +00001204
Jake Farrell2fd8a152012-09-29 00:26:36 +00001205 // display help
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001206 if ((strcmp(argv[argc - 1], "-help") == 0) || (strcmp(argv[argc - 1], "--help") == 0)) {
Jake Farrell2fd8a152012-09-29 00:26:36 +00001207 help();
1208 }
1209
David Reissdd08f6d2008-06-30 20:24:24 +00001210 // if you're asking for version, you have a right not to pass a file
Konrad Grochowski16a23a62014-11-13 15:33:38 +01001211 if ((strcmp(argv[argc - 1], "-version") == 0) || (strcmp(argv[argc - 1], "--version") == 0)) {
David Reissdd08f6d2008-06-30 20:24:24 +00001212 version();
jfarrell8b1799f2014-04-10 22:06:11 -04001213 exit(0);
David Reissdd08f6d2008-06-30 20:24:24 +00001214 }
1215
Mark Sleef0712dc2006-10-25 19:03:57 +00001216 // Initialize global types
Nobuaki Sukegawa11da87e2016-09-10 14:02:19 +09001217 initGlobals();
Mark Sleee8540632006-05-30 09:24:40 +00001218
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001219 if (g_audit) {
Ben Craig262cfb42015-07-08 20:37:15 -05001220 // Audit operation
Mark Slee31985722006-05-24 21:45:31 +00001221
Ben Craig262cfb42015-07-08 20:37:15 -05001222 if (old_input_file.empty()) {
1223 fprintf(stderr, "Missing file name of old thrift file for audit\n");
1224 usage();
1225 }
David Reiss9cc2c132008-02-27 01:54:47 +00001226
Ben Craig262cfb42015-07-08 20:37:15 -05001227 char new_thrift_file_rp[THRIFT_PATH_MAX];
zeshuai00726681fb2020-06-03 17:24:38 +08001228 if (argv[i] == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001229 fprintf(stderr, "Missing file name of new thrift file for audit\n");
1230 usage();
1231 }
Nobuaki Sukegawad479e232016-02-28 11:28:19 +09001232 // cppcheck-suppress uninitvar
zeshuai00726681fb2020-06-03 17:24:38 +08001233 if (saferealpath(argv[i], new_thrift_file_rp) == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001234 failure("Could not open input file with realpath: %s", argv[i]);
1235 }
1236 string new_input_file(new_thrift_file_rp);
1237
1238 t_program new_program(new_input_file);
1239 t_program old_program(old_input_file);
1240
1241 audit(&new_program, &old_program, new_thrift_include_path, old_thrift_include_path);
1242
1243 } else {
1244 // Generate options
Konrad Grochowski7f4be5f2015-11-05 20:23:11 +01001245
Ben Craig262cfb42015-07-08 20:37:15 -05001246 // You gotta generate something!
1247 if (generator_strings.empty()) {
1248 fprintf(stderr, "No output language(s) specified\n");
1249 usage();
1250 }
1251
1252 // Real-pathify it
1253 char rp[THRIFT_PATH_MAX];
zeshuai00726681fb2020-06-03 17:24:38 +08001254 if (argv[i] == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001255 fprintf(stderr, "Missing file name\n");
1256 usage();
1257 }
Nobuaki Sukegawad479e232016-02-28 11:28:19 +09001258 // cppcheck-suppress uninitvar
zeshuai00726681fb2020-06-03 17:24:38 +08001259 if (saferealpath(argv[i], rp) == nullptr) {
Ben Craig262cfb42015-07-08 20:37:15 -05001260 failure("Could not open input file with realpath: %s", argv[i]);
1261 }
1262 string input_file(rp);
1263
1264 // Instance of the global parse tree
1265 t_program* program = new t_program(input_file);
1266 if (out_path.size()) {
1267 program->set_out_path(out_path, out_path_is_absolute);
1268 }
1269
1270 // Compute the cpp include prefix.
1271 // infer this from the filename passed in
1272 string input_filename = argv[i];
1273 string include_prefix;
1274
1275 string::size_type last_slash = string::npos;
1276 if ((last_slash = input_filename.rfind("/")) != string::npos) {
1277 include_prefix = input_filename.substr(0, last_slash);
1278 }
1279
1280 program->set_include_prefix(include_prefix);
1281
1282 // Parse it!
Jens Geyer346aa812023-09-02 22:24:07 +02001283 std::set<std::string> known_includes;
1284 parse(program, nullptr, known_includes);
Ben Craig262cfb42015-07-08 20:37:15 -05001285
1286 // The current path is not really relevant when we are doing generation.
1287 // Reset the variable to make warning messages clearer.
1288 g_curpath = "generation";
1289 // Reset yylineno for the heck of it. Use 1 instead of 0 because
1290 // That is what shows up during argument parsing.
1291 yylineno = 1;
1292
1293 // Generate it!
1294 generate(program, generator_strings);
1295 delete program;
1296 }
Mark Sleeb15a68b2006-06-07 06:46:24 +00001297
Mark Sleef0712dc2006-10-25 19:03:57 +00001298 // Clean up. Who am I kidding... this program probably orphans heap memory
1299 // all over the place, but who cares because it is about to exit and it is
1300 // all referenced and used by this wacky parse tree up until now anyways.
Nobuaki Sukegawa11da87e2016-09-10 14:02:19 +09001301 clearGlobals();
Mark Slee31985722006-05-24 21:45:31 +00001302
1303 // Finished
Ben Craig262cfb42015-07-08 20:37:15 -05001304 if (g_return_failure && g_audit_fatal) {
1305 exit(2);
1306 }
Nobuaki Sukegawa11da87e2016-09-10 14:02:19 +09001307 if (g_generator_failure) {
1308 exit(3);
1309 }
Ben Craig262cfb42015-07-08 20:37:15 -05001310 // Finished
Mark Slee31985722006-05-24 21:45:31 +00001311 return 0;
1312}