blob: 45c59260d6913a0cd643c425859eff2364ed16db [file] [log] [blame]
David Reissea2cba82009-03-30 21:35:00 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
Mark Sleee9ce01c2007-05-16 02:29:53 +00009 *
David Reissea2cba82009-03-30 21:35:00 +000010 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
Mark Sleee9ce01c2007-05-16 02:29:53 +000018 */
19
20/**
Mark Slee31985722006-05-24 21:45:31 +000021 * Thrift scanner.
Mark Slee27ed6ec2007-08-16 01:26:31 +000022 *
Mark Slee31985722006-05-24 21:45:31 +000023 * Tokenizes a thrift definition file.
Mark Slee31985722006-05-24 21:45:31 +000024 */
Mark Sleef5377b32006-10-10 01:42:59 +000025
Mark Slee31985722006-05-24 21:45:31 +000026%{
27
Christian Lavoieaf65f1b2010-11-24 21:58:05 +000028/* This is redundant with some of the flags in Makefile.am, but it works
29 * when people override CXXFLAGS without being careful. The pragmas are
30 * the 'right' way to do it, but don't work on old-enough GCC (in particular
31 * the GCC that ship on Mac OS X 10.6.5, *counter* to what the GNU docs say)
32 *
33 * We should revert the Makefile.am changes once Apple ships a reasonable
34 * GCC.
35 */
Ben Craige9576752013-10-11 08:19:16 -050036#ifdef __GNUC__
Roger Meier3b771a12010-11-17 22:11:26 +000037#pragma GCC diagnostic ignored "-Wunused-function"
38#pragma GCC diagnostic ignored "-Wunused-label"
Ben Craige9576752013-10-11 08:19:16 -050039#endif
40
41#ifdef _MSC_VER
James E. King, III7edc8fa2017-01-20 10:11:41 -050042#pragma warning( push )
43
44// warning C4102: 'find_rule' : unreferenced label
45#pragma warning( disable : 4102 )
46
47// warning C4267: 'argument' : conversion from 'size_t' to 'int', possible loss of data
48#pragma warning( disable : 4267 )
49
50// avoid isatty redefinition
Ben Craige9576752013-10-11 08:19:16 -050051#define YY_NEVER_INTERACTIVE 1
Roger Meier86fded22015-05-15 12:01:38 +020052
53#define YY_NO_UNISTD_H 1
Ben Craige9576752013-10-11 08:19:16 -050054#endif
Roger Meier3b771a12010-11-17 22:11:26 +000055
Jens Geyer8cd3efe2013-09-16 22:17:52 +020056#include <cassert>
David Reiss82e6fc02009-03-26 23:32:36 +000057#include <string>
David Reissf1454162008-06-30 20:45:47 +000058#include <errno.h>
Roger Meier9212e792012-06-12 21:01:06 +000059#include <stdlib.h>
David Reissf1454162008-06-30 20:45:47 +000060
Ben Craige9576752013-10-11 08:19:16 -050061#ifdef _MSC_VER
dtmuller052abc32016-07-26 11:58:28 +020062#include "thrift/windows/config.h"
Roger Meier57e6de42014-07-16 10:19:59 +020063#endif
dtmuller052abc32016-07-26 11:58:28 +020064#include "thrift/main.h"
65#include "thrift/common.h"
66#include "thrift/globals.h"
67#include "thrift/parse/t_program.h"
Mark Slee31985722006-05-24 21:45:31 +000068
Mark Sleef5377b32006-10-10 01:42:59 +000069/**
70 * Must be included AFTER parse/t_program.h, but I can't remember why anymore
71 * because I wrote this a while ago.
72 */
jfarrell4f54d132014-07-10 09:23:43 -040073#if defined(BISON_USE_PARSER_H_EXTENSION)
dtmuller052abc32016-07-26 11:58:28 +020074#include "thrift/thrifty.h"
jfarrell4f54d132014-07-10 09:23:43 -040075#else
dtmuller052abc32016-07-26 11:58:28 +020076#include "thrift/thrifty.hh"
jfarrell4f54d132014-07-10 09:23:43 -040077#endif
Mark Slee31985722006-05-24 21:45:31 +000078
Mario Emmenlauer695bfb22022-09-07 11:09:27 +020079void integer_overflow(const char* text) {
David Reissf1454162008-06-30 20:45:47 +000080 yyerror("This integer is too big: \"%s\"\n", text);
81 exit(1);
82}
83
Mario Emmenlauer695bfb22022-09-07 11:09:27 +020084void unexpected_token(const char* text) {
Bryan Duxbury235f8b52011-08-19 18:27:47 +000085 yyerror("Unexpected token in input: \"%s\"\n", text);
86 exit(1);
87}
88
Mario Emmenlauer695bfb22022-09-07 11:09:27 +020089void error_no_longer_supported(const char* text, const char* replace_with) {
Jens Geyer0b1eb6b2022-06-05 11:12:49 +020090 yyerror("\"%s\" is no longer supported, use \"%s\" instead. Line %d\n", text, replace_with, yylineno);
91 exit(1);
92}
93
94
Mark Slee31985722006-05-24 21:45:31 +000095%}
96
Mark Sleef5377b32006-10-10 01:42:59 +000097/**
98 * Provides the yylineno global, useful for debugging output
99 */
Mark Slee27ed6ec2007-08-16 01:26:31 +0000100%option lex-compat
Mark Slee31985722006-05-24 21:45:31 +0000101
Mark Slee27ed6ec2007-08-16 01:26:31 +0000102/**
David Reiss4563acd2010-08-31 16:51:29 +0000103 * Our inputs are all single files, so no need for yywrap
104 */
105%option noyywrap
106
107/**
Christian Lavoie77215d82010-11-07 19:42:48 +0000108 * We don't use it, and it fires up warnings at -Wall
109 */
110%option nounput
111
112/**
Mark Sleef5377b32006-10-10 01:42:59 +0000113 * Helper definitions, comments, constants, and whatnot
114 */
115
Mark Sleebd588222007-11-21 08:43:35 +0000116intconstant ([+-]?[0-9]+)
Jens Geyer5ec21212015-04-26 15:24:59 +0200117hexconstant ([+-]?"0x"[0-9A-Fa-f]+)
Mark Sleebd588222007-11-21 08:43:35 +0000118dubconstant ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?)
Carl Yeksigiande074082013-06-04 04:28:31 -0400119identifier ([a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*)
Mark Sleebd588222007-11-21 08:43:35 +0000120whitespace ([ \t\r\n]*)
121sillycomm ("/*""*"*"*/")
Jens Geyer775671a2016-03-06 19:02:42 +0100122multicm_begin ("/*")
123doctext_begin ("/**")
Mark Sleebd588222007-11-21 08:43:35 +0000124comment ("//"[^\n]*)
125unixcomment ("#"[^\n]*)
126symbol ([:;\,\{\}\(\)\=<>\[\]])
David Reiss82e6fc02009-03-26 23:32:36 +0000127literal_begin (['\"])
Mark Slee31985722006-05-24 21:45:31 +0000128
129%%
130
Mark Sleebd588222007-11-21 08:43:35 +0000131{whitespace} { /* do nothing */ }
132{sillycomm} { /* do nothing */ }
Jens Geyer775671a2016-03-06 19:02:42 +0100133
134{doctext_begin} {
135 std::string parsed("/**");
136 int state = 0; // 0 = normal, 1 = "*" seen, "*/" seen
137 while(state < 2)
138 {
139 int ch = yyinput();
140 parsed.push_back(ch);
141 switch (ch) {
142 case EOF:
143 yyerror("Unexpected end of file in doc-comment at %d\n", yylineno);
144 exit(1);
145 case '*':
146 state = 1;
147 break;
148 case '/':
149 state = (state == 1) ? 2 : 0;
150 break;
151 default:
152 state = 0;
153 break;
154 }
155 }
156 pdebug("doctext = \"%s\"\n",parsed.c_str());
157
158 /* This does not show up in the parse tree. */
159 /* Rather, the parser will grab it out of the global. */
160 if (g_parse_mode == PROGRAM) {
161 clear_doctext();
162 g_doctext = strdup(parsed.c_str() + 3);
163 assert(strlen(g_doctext) >= 2);
164 g_doctext[strlen(g_doctext) - 2] = ' ';
165 g_doctext[strlen(g_doctext) - 1] = '\0';
166 g_doctext = clean_up_doctext(g_doctext);
167 g_doctext_lineno = yylineno;
zeshuai00726681fb2020-06-03 17:24:38 +0800168 if( (g_program_doctext_candidate == nullptr) && (g_program_doctext_status == INVALID)){
Jens Geyer775671a2016-03-06 19:02:42 +0100169 g_program_doctext_candidate = strdup(g_doctext);
170 g_program_doctext_lineno = g_doctext_lineno;
171 g_program_doctext_status = STILL_CANDIDATE;
172 pdebug("%s","program doctext set to STILL_CANDIDATE");
173 }
174 }
175}
176
177{multicm_begin} { /* parsed, but thrown away */
178 std::string parsed("/*");
179 int state = 0; // 0 = normal, 1 = "*" seen, "*/" seen
180 while(state < 2)
181 {
182 int ch = yyinput();
183 parsed.push_back(ch);
184 switch (ch) {
185 case EOF:
186 yyerror("Unexpected end of file in multiline comment at %d\n", yylineno);
187 exit(1);
188 case '*':
189 state = 1;
190 break;
191 case '/':
192 state = (state == 1) ? 2 : 0;
193 break;
194 default:
195 state = 0;
196 break;
197 }
198 }
199 pdebug("multi_comm = \"%s\"\n",parsed.c_str());
200}
201
Mark Sleebd588222007-11-21 08:43:35 +0000202{comment} { /* do nothing */ }
203{unixcomment} { /* do nothing */ }
Mark Slee31985722006-05-24 21:45:31 +0000204
Mark Sleebd588222007-11-21 08:43:35 +0000205{symbol} { return yytext[0]; }
Roger Meier0c3c8952011-08-22 21:38:16 +0000206"*" { return yytext[0]; }
Mark Slee9cb7c612006-09-01 22:17:45 +0000207
Bryan Duxbury6c928f32011-10-13 21:32:52 +0000208"false" { yylval.iconst=0; return tok_int_constant; }
209"true" { yylval.iconst=1; return tok_int_constant; }
210
Mark Sleebd588222007-11-21 08:43:35 +0000211"namespace" { return tok_namespace; }
Mark Sleebd588222007-11-21 08:43:35 +0000212"cpp_include" { return tok_cpp_include; }
213"cpp_type" { return tok_cpp_type; }
Mark Sleebd588222007-11-21 08:43:35 +0000214"xsd_all" { return tok_xsd_all; }
215"xsd_optional" { return tok_xsd_optional; }
216"xsd_nillable" { return tok_xsd_nillable; }
Mark Sleebd588222007-11-21 08:43:35 +0000217"xsd_attrs" { return tok_xsd_attrs; }
218"include" { return tok_include; }
219"void" { return tok_void; }
220"bool" { return tok_bool; }
Jens Geyer154d1542022-09-10 14:30:15 +0200221"byte" { emit_byte_type_warning(); return tok_byte; }
Jens Geyer40c28d32015-10-20 23:13:02 +0200222"i8" { return tok_i8; }
Mark Sleebd588222007-11-21 08:43:35 +0000223"i16" { return tok_i16; }
224"i32" { return tok_i32; }
225"i64" { return tok_i64; }
226"double" { return tok_double; }
227"string" { return tok_string; }
228"binary" { return tok_binary; }
Jens Geyer62445c12022-06-29 00:00:00 +0200229"uuid" { return tok_uuid; }
Mark Sleebd588222007-11-21 08:43:35 +0000230"map" { return tok_map; }
231"list" { return tok_list; }
232"set" { return tok_set; }
David Reisscecbed82009-03-24 20:02:22 +0000233"oneway" { return tok_oneway; }
Mark Sleebd588222007-11-21 08:43:35 +0000234"typedef" { return tok_typedef; }
235"struct" { return tok_struct; }
Bryan Duxburyab3666e2009-09-01 23:03:47 +0000236"union" { return tok_union; }
Mark Sleebd588222007-11-21 08:43:35 +0000237"exception" { return tok_xception; }
238"extends" { return tok_extends; }
239"throws" { return tok_throws; }
240"service" { return tok_service; }
241"enum" { return tok_enum; }
242"const" { return tok_const; }
243"required" { return tok_required; }
244"optional" { return tok_optional; }
David Reisscecbed82009-03-24 20:02:22 +0000245"async" {
246 pwarning(0, "\"async\" is deprecated. It is called \"oneway\" now.\n");
Jens Geyer154d1542022-09-10 14:30:15 +0200247 return tok_async;
David Reisscecbed82009-03-24 20:02:22 +0000248}
Jens Geyer885c6792014-05-02 21:31:55 +0200249"&" { return tok_reference; }
Mark Sleef0712dc2006-10-25 19:03:57 +0000250
Mark Slee4f8da1d2006-10-12 02:47:27 +0000251{intconstant} {
David Reissf1454162008-06-30 20:45:47 +0000252 errno = 0;
zeshuai00726681fb2020-06-03 17:24:38 +0800253 yylval.iconst = strtoll(yytext, nullptr, 10);
David Reissf1454162008-06-30 20:45:47 +0000254 if (errno == ERANGE) {
255 integer_overflow(yytext);
256 }
Mark Slee4f8da1d2006-10-12 02:47:27 +0000257 return tok_int_constant;
258}
Mark Sleef5377b32006-10-10 01:42:59 +0000259
Mark Slee600cdb32006-11-29 22:06:42 +0000260{hexconstant} {
David Reissf1454162008-06-30 20:45:47 +0000261 errno = 0;
Jens Geyer5ec21212015-04-26 15:24:59 +0200262 char sign = yytext[0];
263 int shift = sign == '0' ? 2 : 3;
zeshuai00726681fb2020-06-03 17:24:38 +0800264 yylval.iconst = strtoll(yytext+shift, nullptr, 16);
Jens Geyer5ec21212015-04-26 15:24:59 +0200265 if (sign == '-') {
266 yylval.iconst = -yylval.iconst;
267 }
David Reissf1454162008-06-30 20:45:47 +0000268 if (errno == ERANGE) {
269 integer_overflow(yytext);
270 }
Mark Slee600cdb32006-11-29 22:06:42 +0000271 return tok_int_constant;
272}
273
Mark Slee4f8da1d2006-10-12 02:47:27 +0000274{identifier} {
275 yylval.id = strdup(yytext);
276 return tok_identifier;
277}
278
Jens Geyer5eed3a12015-12-08 01:32:12 +0100279{dubconstant} {
280 /* Deliberately placed after identifier, since "e10" is NOT a double literal (THRIFT-3477) */
281 yylval.dconst = atof(yytext);
282 return tok_dub_constant;
283}
284
David Reiss82e6fc02009-03-26 23:32:36 +0000285{literal_begin} {
286 char mark = yytext[0];
287 std::string result;
288 for(;;)
289 {
290 int ch = yyinput();
291 switch (ch) {
292 case EOF:
293 yyerror("End of file while read string at %d\n", yylineno);
294 exit(1);
295 case '\n':
296 yyerror("End of line while read string at %d\n", yylineno - 1);
297 exit(1);
298 case '\\':
299 ch = yyinput();
300 switch (ch) {
301 case 'r':
302 result.push_back('\r');
303 continue;
304 case 'n':
305 result.push_back('\n');
306 continue;
307 case 't':
308 result.push_back('\t');
309 continue;
310 case '"':
311 result.push_back('"');
312 continue;
313 case '\'':
314 result.push_back('\'');
315 continue;
316 case '\\':
317 result.push_back('\\');
318 continue;
319 default:
320 yyerror("Bad escape character\n");
321 return -1;
322 }
323 break;
324 default:
325 if (ch == mark) {
326 yylval.id = strdup(result.c_str());
327 return tok_literal;
328 } else {
329 result.push_back(ch);
330 }
331 }
332 }
Mark Slee30152872006-11-28 01:24:07 +0000333}
334
Mark Slee31985722006-05-24 21:45:31 +0000335
Bryan Duxbury235f8b52011-08-19 18:27:47 +0000336. {
337 unexpected_token(yytext);
338}
339
Mark Slee31985722006-05-24 21:45:31 +0000340%%
David Reiss4a054342009-03-26 23:32:27 +0000341
James E. King, III7edc8fa2017-01-20 10:11:41 -0500342#ifdef _MSC_VER
343#pragma warning( pop )
344#endif
345
David Reiss4a054342009-03-26 23:32:27 +0000346/* vim: filetype=lex
347*/