/* Copyright (C) 2017 xaizek <xaizek@posteo.net>
*
* This file is part of zograscope.
*
* zograscope is free software: you can redistribute it and/or modify
* it under the terms of version 3 of the GNU Affero General Public License as
* published by the Free Software Foundation.
*
* zograscope is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with zograscope. If not, see <http://www.gnu.org/licenses/>.
*/
%option bison-bridge
%option bison-locations
%option reentrant
%option noyywrap
%option extra-type="struct C11LexerData *"
%option prefix="c11_"
%{
#include <iostream>
#include <locale>
#include <string>
#include "c/C11LexerData.hpp"
#include "c/C11SType.hpp"
#include "c/c11-parser.gen.hpp"
#include "TreeBuilder.hpp"
#define YYSTYPE C11_STYPE
#define YYLTYPE C11_LTYPE
#define YY_INPUT(buf, result, maxSize) \
do { (result) = yyextra->readInput((buf), (maxSize)); } while (false)
#define YY_USER_ACTION \
yylval->text = { }; \
yylval->text.from = yyextra->offset; \
yylval->text.len = yyleng; \
yylloc->first_line = yyextra->line; \
yylloc->first_column = yyextra->col; \
yylloc->last_line = yyextra->line; \
yylloc->last_column = yyextra->col + yyleng; \
yyextra->offset += yyleng; \
yyextra->col += yyleng;
#define TOKEN(t) \
yyextra->tb->markWithPostponed(yylval->text); \
return (yylval->text.token = (t))
#define KW(t) \
BEGIN(INITIAL); \
TOKEN(t)
#define ADVANCE_LINE() \
++yyextra->line; \
yyextra->col = 1U; \
yyextra->lineoffset = yyextra->offset;
using namespace c11stypes;
static void reportError(C11_LTYPE *loc, const char text[], std::size_t len,
C11LexerData *data);
%}
%X directive dirmlcomment beforeparen slcomment mlcomment slit
/* (6.4.3) hex-quad:
* hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
*/
HEXQUAD [[:xdigit:]]{4}
/* (6.4.3) universal-character-name:
* \u hex-quad
* \U hex-quad hex-quad
*/
UCN \\u{HEXQUAD}|\\U{HEXQUAD}{2}
/* (6.4.2.1) nondigit: one of
* _ a b c d e f g h i j k l m
* n o p q r s t u v w x y z
* A B C D E F G H I J K L M
* N O P Q R S T U V W X Y Z
*/
NONDIGIT [_a-zA-Z]
/* (6.4.2.1) identifier-nondigit:
* nondigit
* universal-character-name
* other implementation-defined characters
*/
ID_NONDIGIT {NONDIGIT}|{UCN}
/* (6.4.2.1) identifier:
* identifier-nondigit
* identifier identifier-nondigit
* identifier digit
*/
ID {ID_NONDIGIT}({ID_NONDIGIT}|[[:digit:]])*
/* (6.4.4.1) octal-digit: one of */
/* 0 1 2 3 4 5 6 7 */
ODIGIT [0-7]
/* (6.4.4.4) octal-escape-sequence: */
/* \ octal-digit */
/* \ octal-digit octal-digit */
/* \ octal-digit octal-digit octal-digit */
OESC \\{ODIGIT}{1,3}
/* (6.4.4.4) hexadecimal-escape-sequence: */
/* \x hexadecimal-digit */
/* hexadecimal-escape-sequence hexadecimal-digit */
HESC \\x[[:xdigit:]]+
/* (6.4.4.4) simple-escape-sequence: one of */
/* \' \" \? \\ */
/* \a \b \f \n \r \t \v */
SESC \\['"?\\abfnrtv]
/* (6.4.4.4) escape-sequence: */
/* simple-escape-sequence */
/* octal-escape-sequence */
/* hexadecimal-escape-sequence */
/* universal-character-name */
ESEQ {SESC}|{OESC}|{HESC}|{UCN}
/* (6.4.4.4) c-char: */
/* any member of the source character set except */
/* the single-quote ', backslash \, or new-line character */
/* escape-sequence */
CCHAR [^'\\\n]|{ESEQ}
/* (6.4.4.4) c-char-sequence: */
/* c-char */
/* c-char-sequence c-char */
CCHARSEQ {CCHAR}+
/* (6.4.4.2) floating-suffix: one of */
/* f l F L */
FSUFFIX [flFL]
/* (6.4.4.2) hexadecimal-digit-sequence: */
/* hexadecimal-digit */
/* hexadecimal-digit-sequence hexadecimal-digit */
HSEQ [[:xdigit:]]+
/* (6.4.4.2) sign: one of */
/* + - */
SIGN [-+]
/* (6.4.4.2) digit-sequence: */
/* digit */
/* digit-sequence digit */
DSEQ [[:digit:]]+
/* (6.4.4.2) binary-exponent-part: */
/* p signopt digit-sequence */
/* P signopt digit-sequence */
BEXP [pP]{SIGN}?{DSEQ}
/* (6.4.4.2) hexadecimal-fractional-constant: */
/* hexadecimal-digit-sequenceopt . */
/* hexadecimal-digit-sequence */
/* hexadecimal-digit-sequence . */
HFRAC {HSEQ}?\.|{HSEQ}\.?
/* (6.4.4.2) exponent-part: */
/* e signopt digit-sequence */
/* E signopt digit-sequence */
EPART [eE]{SIGN}?{DSEQ}
/* (6.4.4.2) fractional-constant: */
/* digit-sequenceopt . digit-sequence */
/* digit-sequence . */
FRACCONST {DSEQ}?\.{DSEQ}|{DSEQ}\.
/* (6.4.4.1) hexadecimal-prefix: one of */
/* 0x 0X */
HPREFIX 0[xX]
/* (6.4.4.2) hexadecimal-floating-constant: */
/* hexadecimal-prefix hexadecimal-fractional-constant binary-exponent-part floating-suffixopt */
/* hexadecimal-prefix hexadecimal-digit-sequence binary-exponent-part floating-suffixopt */
HFCONST {HPREFIX}{HFRAC}{BEXP}{FSUFFIX}?|{HPREFIX}{HSEQ}{BEXP}{FSUFFIX}?
/* (6.4.4.1) unsigned-suffix: one of */
/* u U */
USUFFIX [uU]
/* (6.4.4.1) long-suffix: one of */
/* l L */
LSUFFIX [lL]
/* (6.4.4.1) long-long-suffix: one of */
/* ll LL */
LLSUFFIX ll|LL
/* (6.4.4.1) nonzero-digit: one of */
/* 1 2 3 4 5 6 7 8 9 */
NZDIGIT [1-9]
/* (6.4.4.1) decimal-constant: */
/* nonzero-digit */
/* decimal-constant digit */
DCONST {NZDIGIT}[[:digit:]]*
/* (6.4.4.1) octal-constant: */
/* 0 */
/* octal-constant octal-digit */
OCONST 0{ODIGIT}*
/* (6.4.4.1) integer-suffix: */
/* unsigned-suffix long-suffixopt */
/* unsigned-suffix long-long-suffix */
/* long-suffix unsigned-suffixopt */
/* long-long-suffix unsigned-suffixopt */
ISUFFIX {USUFFIX}{LSUFFIX}?|{USUFFIX}{LLSUFFIX}|{LSUFFIX}{USUFFIX}?|{LLSUFFIX}{USUFFIX}?
/* (6.4.4.2) decimal-floating-constant: */
/* fractional-constant exponent-partopt floating-suffixopt */
/* digit-sequence exponent-part floating-suffixopt */
DFCONST {FRACCONST}{EPART}?{FSUFFIX}?|{DSEQ}{EPART}{FSUFFIX}?
/* (6.4.4.1) hexadecimal-constant: */
/* hexadecimal-prefix hexadecimal-digit */
/* hexadecimal-constant hexadecimal-digit */
HCONST {HPREFIX}[[:xdigit:]]+
/* (6.4.5) encoding-prefix: */
/* u8 */
/* u */
/* U */
/* L */
EPREFIX u8|u|U|L
/* (6.4.5) s-char: */
/* any member of the source character set except */
/* the double-quote ", backslash \, or new-line character */
/* escape-sequence */
SCHAR [^"\\\n]|{ESEQ}
/* (6.4.7) header-name: */
/* < h-char-sequence > */
/* " q-char-sequence " */
HEADERNAME <{HCHARSEQ}>|"{QCHARSEQ}"
/* (6.4.7) h-char-sequence: */
/* h-char */
/* h-char-sequence h-char */
HCHARSEQ {HCHAR}+
/* (6.4.7) h-char: */
/* any member of the source character set except */
/* the new-line character and > */
HCHAR [^>\n]
/* (6.4.7) q-char-sequence: */
/* q-char */
/* q-char-sequence q-char */
QCHARSEQ {QCHAR}+
/* (6.4.7) q-char: */
/* any member of the source character set except */
/* the new-line character and " */
QCHAR [^"\n]
NL \n|\r|\r\n
%%
[ ] ;
\t {
if (yyextra->tabWidth > 1) {
yyextra->col += yyextra->tabWidth;
yyextra->col -= (yyextra->col - 1)%yyextra->tabWidth;
}
}
{NL} { ADVANCE_LINE(); }
\\{NL} {
yylval->text.len = 1;
yylloc->last_column = yylloc->first_column + 1;
yyextra->tb->addPostponed(yylval->text, *yylloc, +C11SType::LineGlue);
ADVANCE_LINE();
}
<INITIAL,beforeparen>"case" { KW(CASE); }
<INITIAL,beforeparen>"default" { KW(DEFAULT); }
<INITIAL,beforeparen>"sizeof" { KW(SIZEOF); }
<INITIAL,beforeparen>"return" { KW(RETURN); }
<INITIAL,beforeparen>"_Alignof" { KW(_ALIGNOF); }
<INITIAL,beforeparen>"_Generic" { KW(_GENERIC); }
<INITIAL,beforeparen>"typedef" { KW(TYPEDEF); }
<INITIAL,beforeparen>"extern" { KW(EXTERN); }
<INITIAL,beforeparen>"static" { KW(STATIC); }
<INITIAL,beforeparen>"_Thread_local" { KW(_THREAD_LOCAL); }
<INITIAL,beforeparen>"auto" { KW(AUTO); }
<INITIAL,beforeparen>"register" { KW(REGISTER); }
<INITIAL,beforeparen>"void" { KW(VOID); }
<INITIAL,beforeparen>"char" { KW(CHAR); }
<INITIAL,beforeparen>"short" { KW(SHORT); }
<INITIAL,beforeparen>"int" { KW(INT); }
<INITIAL,beforeparen>"long" { KW(LONG); }
<INITIAL,beforeparen>"float" { KW(FLOAT); }
<INITIAL,beforeparen>"double" { KW(DOUBLE); }
<INITIAL,beforeparen>"signed" { KW(SIGNED); }
<INITIAL,beforeparen>"unsigned" { KW(UNSIGNED); }
<INITIAL,beforeparen>"_Bool" { KW(_BOOL); }
<INITIAL,beforeparen>"_Complex" { KW(_COMPLEX); }
<INITIAL,beforeparen>"struct" { KW(STRUCT); }
<INITIAL,beforeparen>"union" { KW(UNION); }
<INITIAL,beforeparen>"enum" { KW(ENUM); }
<INITIAL,beforeparen>"_Atomic" { KW(_ATOMIC); }
<INITIAL,beforeparen>"const" { KW(CONST); }
<INITIAL,beforeparen>"restrict" { KW(RESTRICT); }
<INITIAL,beforeparen>"volatile" { KW(VOLATILE); }
<INITIAL,beforeparen>"inline" { KW(INLINE); }
<INITIAL,beforeparen>"_Noreturn" { KW(_NORETURN); }
<INITIAL,beforeparen>"_Alignas" { KW(_ALIGNAS); }
<INITIAL,beforeparen>"_Static_assert" { KW(_STATIC_ASSERT); }
<INITIAL,beforeparen>"if" { KW(IF); }
<INITIAL,beforeparen>"else" { KW(ELSE); }
<INITIAL,beforeparen>"switch" { KW(SWITCH); }
<INITIAL,beforeparen>"while" { KW(WHILE); }
<INITIAL,beforeparen>"do" { KW(DO); }
<INITIAL,beforeparen>"for" { KW(FOR); }
<INITIAL,beforeparen>"break" { KW(BREAK); }
<INITIAL,beforeparen>"continue" { KW(CONTINUE); }
<INITIAL,beforeparen>"goto" { KW(GOTO); }
<INITIAL,beforeparen>"asm" { KW(ASM); }
<INITIAL,beforeparen>"__asm__" { KW(ASM); }
<INITIAL,beforeparen>"__volatile__" { KW(VOLATILE); }
<INITIAL,beforeparen>"__attribute__" { KW(ATTRIBUTE); }
<INITIAL>{ID} { TOKEN(ID); }
<beforeparen>{ID} {
BEGIN(INITIAL);
yyextra->tb->markWithPostponed(yylval->text);
yylval->text.token = FUNCTION;
return ID;
}
{ID}[[:space:]]*"(" {
BEGIN(beforeparen);
yyextra->offset -= yyleng;
yyextra->col -= yyleng;
yyless(0);
}
/* A.1.5 Constants */
/* (6.4.4.1) integer-constant: */
/* decimal-constant integer-suffixopt */
/* octal-constant integer-suffixopt */
/* hexadecimal-constant integer-suffixopt */
{DCONST}{ISUFFIX}?|{OCONST}{ISUFFIX}?|{HCONST}{ISUFFIX}? { TOKEN(ICONST); }
/* (6.4.4.2) floating-constant: */
/* decimal-floating-constant */
/* hexadecimal-floating-constant */
{DFCONST}|{HFCONST} { TOKEN(FCONST); }
/* (6.4.4.4) character-constant: */
/* ' c-char-sequence ' */
/* L' c-char-sequence ' */
/* u' c-char-sequence ' */
/* U' c-char-sequence ' */
[LuU]?'{CCHARSEQ}' { TOKEN(CHCONST); }
/* A.1.6 String literals */
/* (6.4.5) string-literal: */
/* encoding-prefixopt " s-char-sequenceopt " */
/* (6.4.5) s-char-sequence: */
/* s-char */
/* s-char-sequence s-char */
{EPREFIX}?\" {
yyextra->startTok = *yylval;
yyextra->startTok.text.token = SLIT;
yyextra->startLoc = *yylloc;
BEGIN(slit);
}
<slit>{SCHAR} ;
<slit>\" {
yyextra->startTok.text.len = yyextra->offset - yyextra->startTok.text.from;
yyextra->tb->markWithPostponed(yyextra->startTok.text);
*yylval = yyextra->startTok;
*yylloc = yyextra->startLoc;
BEGIN(INITIAL);
return SLIT;
}
<slit>\\?{NL} { ADVANCE_LINE(); }
<slit>. { reportError(yylloc, yytext, yyleng, yyextra); }
"->" { TOKEN(ARR_OP); }
"++" { TOKEN(INC_OP); }
"--" { TOKEN(DEC_OP); }
"<<" { TOKEN(LSH_OP); }
">>" { TOKEN(RSH_OP); }
"<=" { TOKEN(LTE_OP); }
">=" { TOKEN(GTE_OP); }
"==" { TOKEN(EQ_OP); }
"!=" { TOKEN(NE_OP); }
"&&" { TOKEN(AND_OP); }
"||" { TOKEN(OR_OP); }
"*=" { TOKEN(TIMESEQ_OP); }
"/=" { TOKEN(DIVEQ_OP); }
"%=" { TOKEN(MODEQ_OP); }
"+=" { TOKEN(PLUSEQ_OP); }
"-=" { TOKEN(MINUSEQ_OP); }
"<<=" { TOKEN(LSHIFTEQ_OP); }
">>=" { TOKEN(RSHIFTEQ_OP); }
"&=" { TOKEN(ANDEQ_OP); }
"^=" { TOKEN(XOREQ_OP); }
"|=" { TOKEN(OREQ_OP); }
^[[:space:]]{-}[\n\r]*# {
yyextra->startTok = *yylval;
yyextra->startTok.text.token = DIRECTIVE;
yyextra->startLoc = *yylloc;
BEGIN(directive);
}
<directive>\\{NL} {
ADVANCE_LINE();
}
<directive>{NL} {
yyextra->startTok.text.len = yyextra->offset
- yyextra->startTok.text.from - 1;
yyextra->startLoc.last_line = yylloc->last_line;
yyextra->startLoc.last_column = yylloc->last_column;
yyextra->tb->addPostponed(yyextra->startTok.text, yyextra->startLoc,
+C11SType::Directive);
ADVANCE_LINE();
BEGIN(INITIAL);
}
<directive>"/*" BEGIN(dirmlcomment);
<dirmlcomment>"*/" BEGIN(directive);
<dirmlcomment>{NL} ADVANCE_LINE();
<dirmlcomment>. ;
<directive>HEADERNAME ;
<directive>. ;
"//" {
yyextra->startTok = *yylval;
yyextra->startTok.text.token = SLCOMMENT;
yyextra->startLoc = *yylloc;
BEGIN(slcomment);
}
<slcomment>{NL} {
yyextra->startTok.text.len = yyextra->offset
- yyextra->startTok.text.from - 1;
yyextra->startLoc.last_line = yylloc->last_line;
yyextra->startLoc.last_column = yylloc->last_column;
yyextra->tb->addPostponed(yyextra->startTok.text, yyextra->startLoc,
+C11SType::Comment);
ADVANCE_LINE();
BEGIN(INITIAL);
}
<slcomment>. ;
"/*" {
yyextra->startTok = *yylval;
yyextra->startTok.text.token = MLCOMMENT;
yyextra->startLoc = *yylloc;
BEGIN(mlcomment);
}
<mlcomment>"*/" {
yyextra->startTok.text.len = yyextra->offset - yyextra->startTok.text.from;
yyextra->startLoc.last_line = yylloc->last_line;
yyextra->startLoc.last_column = yylloc->last_column;
yyextra->tb->addPostponed(yyextra->startTok.text, yyextra->startLoc,
+C11SType::Comment);
BEGIN(INITIAL);
}
<mlcomment>{NL} { ADVANCE_LINE(); }
<mlcomment>. ;
"..." { TOKEN(DOTS); }
"("|")"|";"|"{"|"}"|"["|"]"|"."|","|"?"|":"|"&"|"|"|"^"|"*"|"/"|"%"|"+"|"-"|"~"|"!"|"<"|">"|"=" {
TOKEN(yytext[0]);
}
. { reportError(yylloc, yytext, yyleng, yyextra); }
%%
static void
reportError(C11_LTYPE *loc, const char text[], std::size_t len,
C11LexerData *data)
{
std::string error;
if (len > 1U) {
error = std::string("Unknown token: ") + text;
} else if (std::isprint(text[0], std::locale())) {
error = std::string("Unknown token: ") + text[0];
} else {
error = std::string("Unknown token: <") + std::to_string(text[0]) + '>';
}
C11_LTYPE changedLoc = *loc;
changedLoc.first_column = data->offset - data->lineoffset;
c11_error(&changedLoc, nullptr, data->tb, data->pd, error.c_str());
}
void
fakeYYunputUseC11()
{
// This is needed to prevent compilation error on -Werror=unused.
static_cast<void>(&yyunput);
}