/* C++ to LaTeX formatter. This parser gets a C++ program and builds a latex segment using few MACROS. You should define these MACROS in your latex program: \PREPROCESSOR{keyword} - preprocessor keywords \COMMENT text \ENDCOMMENT - old C style remark (multi-line comment) \NOTE text \ENDNOTE - single-line comment \NL{number} - where number is the line number \KW{keyword} - C++ or Java keyword \BKW{keyword} - BNF keyword \ID{identifier} - non-keyword identifier \STRING str \ESTRING - string (in double quotes) \CHAR char(s) \ECHAR - string (in single quotes) \TAB - tab \SP - space \BVAR - BNF variable \REGEXP exp \EREGEXP - regular expression (in BNF) */ /* Enable Start-Conditions stack: */ %option stack /* Define Start Conditions. "%s" defines a normal (inclusive) Start Condition, so rules with no start condition are also active. "%x" defines an exclusive Start Condition, so rules with no SC are not active. Start conditions are used in the rules using the format "rule" or "rule". The Start Conditions are: INITIAL - the default flex SC -- NOT USED here. NOTE - line comments (//) LATEX_NOTE - (exclusive) a line comment that supports LaTeX formatting (//{}) COMMENT - multi-line C++/Java comments BCOMMENT - BNF comment ("--" till the end of the line; supports LaTeX formatting). LATEX_COMMENT - (exclusive) a multi-line comment that supports LaTeX formatting (/*{}). DOC_COMMENT - "javadoc" comments in Java code. ALIGN - (exclusive) STRING - quoted string BSTRING - quoted string in BNF REGEXP - regular expression (inside BNF notation) CHAR - quoted character CPP - block C++ mode (\CPP ... \END) CPP1 - inline C++ mode (\cpp{ ... }) BNF - block BNF mode (\BNF ... \END) BNF1 - inline BNF mode (\bnf{ ... }) JAVA - block Java mode (\JAVA ... \END) JAVA1 - inline Java mode (\java{ ... }) LATEX - (exclusive) normal LaTeX processing mode -- pass all input to output MARKER - (exclusive) looking for a marked (named) range within a file INCLUDE - (exclusive) expecting a language-included filename (after encountering a \cppfile macro, for example) or a LaTeX filename (after \include or \input). */ %x LATEX_NOTE %s NOTE %s COMMENT %s BCOMMENT %x LATEX_COMMENT %s DOC_COMMENT %x ALIGN %s STRING %s BSTRING %s REGEXP %s CHAR %s CPP %s CPP1 %s BNF %s BNF1 %s JAVA %s JAVA1 %x LATEX %x MARKER %x INCLUDE %{ #include #include #include #include // platform-specific #includes: #ifndef WIN32 #include #else #include #endif // Complementing Win32 with missing functions: #ifdef WIN32 void mkstemp(char *templt) { // Win32 does not have the mkstemp function; _mktemp is similar, // but does not actually create the file. _mktemp(templt); // create the file: FILE *f = fopen(templt, "w"); fclose(f); } extern "C" int yywrap(void) { // This function is required by the code generated by lex. It is // normally taken from the lex/yacc link library, but since that // library is unavailable under Windows, we define it ourselves. return 1; // No additional input files } #endif #define PATHSIZE (1024) // Macro for identifying the STDIN input source #define STDIN "" //#define DEBUG #undef DEBUG // _: status report (to stderr) in DEBUG mode; null operation in non-DEBUG mode. #ifdef DEBUG #define _ fprintf(stderr, "%s(%d);\n", __FILE__, __LINE__) #else #define _ #endif // dump: print to stderr in debug mode, do nothing othersie #ifdef DEBUG #define dump(s) fprintf(stderr, s); fflush(stderr); #else #define dump(s) #endif // macros_file: output file containing the generated macros FILE *macros_file; /*--begin:stack--*/ // Stack: Generic stack type. // Operations: // size -- maximal stack size. // push, top, pop -- normal stack operations. // empty, full -- boolean queries. template class Stack { T buff[N]; int sp; public: Stack(): sp(0) {} unsigned size() const { return N; } bool empty() const { return sp <= 0; } bool full() const { return sp >= N; } void push(const T item) { buff[sp++] = item; } T top() const { return buff[sp-1]; } void pop() { sp--; } }; /*--end:stack--*/ // SIZEOF: return an array's size, in elements #define SIZEOF(a) (sizeof(a)/sizeof(a[0])) // class Language is defined below class Language; /*--begin:FileState--*/ // FileState: track an input file's state // The file state includes the current line number within the file (fileline), and // the current line number within the source block (line). This is important because // many source files include several source blocks, and their numbering is independent. // The fileline is much less important (used only for generating location-comments in // the macro file), and hence it "earned" the more cumbersome name. // // Operations: // nl - start a new line. // inc_pos - increment the position (column) within the current line. // new_block - indicate the beginning of a new block of source code within the file. // set_marker - used to indicate we're looking for a marked (named) range within the file. // error - generate error report to stderr. class FileState { public: const char * &name; const char * &tempname; const char * ▮ const int &line; const int &fileline; const int &pos; const bool inplace; bool echo_mode; Language * ⟨ FileState(const char *filename, const char *temp_filename, const bool inplace_mode): _name(strcpy(new char[strlen(filename)+1], filename)), _tempname(inplace_mode ? strcpy(new char[strlen(temp_filename)+1], temp_filename) : (char *)0), _marker((char *)0), _line(0), _pos(0), _fileline(1), _lang((Language *)0), name(_name), tempname(_tempname), marker(_marker), line(_line), fileline(_fileline), pos(_pos), lang(_lang), inplace(inplace_mode), echo_mode(inplace_mode) // by default, in inplace-mode we're handling LaTeX file; so ECHO is enabled. // but if we're not in inplace-mode, we're handling a source-code file -> no ECHO. { } ~FileState() { _; if (_name != (char *)0) { delete[] (char *)_name; } if (_tempname != (char *)0) { delete[] (char *)_tempname; } if (_marker != (char *)0) { delete[] (char *)_marker; } _; // lang is not allocated inside the class } void nl() { _line++; _fileline++; _pos = 0; } void set_marker(char *markername) { _marker = strcpy(new char[strlen(markername)+1], markername); } void set_lang(Language *new_lang) { _lang = new_lang; } void inc_pos(int inc) { _pos += inc; } void new_block() { _line = 0; _pos = 0; } void error(const char *fmt, int val = 0) const { (void) fprintf(stderr, "%s(%d): ", _name, _fileline); (void) fprintf(stderr, fmt, val); (void) fprintf(stderr, "\n"); } void error(const char *fmt, const char* val) const { (void) fprintf(stderr, "%s(%d): ", _name, _line); (void) fprintf(stderr, fmt, val); (void) fprintf(stderr, "\n"); } private: int _line; int _fileline; int _pos; const char *_name; const char *_tempname; const char *_marker; Language *_lang; }; // input: the FileState of currently-parsed file FileState *input = (FileState *)0; /*--end:FileState--*/ /*--begin:Sources--*/ // Sources: a global (static) class maintaining the stack of input files // being processed. For each input file, the stack keeps: // * yyout value, // * FileState, // * flex buffer state (YY_BUFFER_STATE) // Operations: // full, empty - stack queries. // push - stores the current state, and sets up the system for // processing a new input file. // pop - stop processing the current file (flushes the buffer), // and resume processing the previous file. class Sources { static Stack outputs; static Stack buffers; static Stack inputs; private: static FILE* open_file(const char* fname, bool inplace) { // attempt opening a file for input, ensuring it can be written // if "inplace" is true FILE* f = (FILE *)0; if (inplace) { f = fopen(fname, "r+"); // ensure it's writable if (f == (FILE *)0) { return f; // failed } (void) fclose(f); } f = fopen(fname, "r"); return f; } // make_temp: create a temporary file, storing its name in "buffer". // the temp file is stored in the same directory as "fname" is. // "buffer" must be large enough! static bool make_temp(char *buffer, const char *fname) { static const char template_base[] = "prog2tex.XXXXXX"; char *p; // set "buffer" to fname's path (sans filename): (void) strcpy(buffer, fname); p = strrchr(buffer,'/'); // last occurance of '/' in filename if (p != (char *)0) p++; // point to after last slash else p = buffer; // no slash, point to whole filename *p = '\0'; // trancate at p, removing the filename and leaving the path // append the template-base to the pathname (void) strcat(buffer, template_base); // create the temporary file, based on the template. // the resulting filename is stored in "buffer". (void) mkstemp(buffer); if (*buffer == '\0') { (void) fprintf(stderr, "mkstemp(%s) error\n", buffer); return false; } return true; } public: static bool full() { return buffers.full(); } static bool empty() { return buffers.empty(); } static bool push(const char *fname, const char *ext, bool inplace) { if (full()) { input->error("Include nested too deeply, max is %d", buffers.size()); return false; } char *tempname = (char *)0; FILE *fout = (FILE *)0; FILE *fin = (FILE *)0; char *in_fname = new char[strlen(fname) + strlen(ext) + 1]; strcpy(in_fname, fname); if (strcmp(fname, STDIN) != 0) { fin = open_file(fname, inplace); if (fin == (FILE *)0) { // open attempt failed. If we have an optional extension, we try opening again: if (ext != (char *)0 && strlen(ext) > 0) { char *fname2 = new char[strlen(fname) + strlen(ext) + 1]; strcpy(fname2, fname); strcat(fname2, ext); strcpy(in_fname, fname2); fin = open_file(fname2, inplace); } if (fin == (FILE *)0) { perror(fname); delete[] in_fname; return false; } } dump("CHANGED FILE: New input file: "); dump(in_fname); dump("\n"); } if (inplace) { tempname = new char[PATHSIZE]; // generate the temp file. The temp filename will be stored in "tempname". if (!make_temp(tempname, fname)) { fclose(fin); perror("Could not create temp file"); delete[] in_fname; delete[] tempname; return false; } // open the temp file for output fout = fopen(tempname, "w"); if (fout == (FILE *)0) { (void) fclose(fin); perror(tempname); delete[] in_fname; delete[] tempname; return false; } } _; buffers.push(YY_CURRENT_BUFFER); outputs.push(yyout); inputs.push(input); if (fin != (FILE *)0) { // it will remain (FILE *)0 if we're reading from stdin. In that case, no need to switch. yy_switch_to_buffer(yy_create_buffer(fin, YY_BUF_SIZE)); } input = new FileState(in_fname, tempname, inplace); if (inplace) { // if it's NOT inplace (i.e., a .cpp file, etc.), we stay with the same output file. yyout = fout; } if (tempname != (char *)0) { delete[] tempname; } if (in_fname != (char *)0) { delete[] in_fname; } return true; } static bool pop() { _; char bakname[PATHSIZE]; if (empty() || (input == (FileState *)0)) { // nothing to pop from! _; return false; } dump("POPPING FROM INPUT FILE "); dump(input->name); dump("\n"); _; // restore yyin (void) fclose(yyin); YY_BUFFER_STATE state = buffers.top(); if (state != (YY_BUFFER_STATE)0) { yy_switch_to_buffer(buffers.top()); } _; // restore yyout, if it was different (close the old one, too). FILE* new_yyout = outputs.top(); if (new_yyout != yyout) { // if we're popping to a different output file (i.e., from an included // LaTeX file), we should close the current one. if (yyout != (FILE *)0) { (void) fclose(yyout); } yyout = new_yyout; } // After closing the output file, replace the source file if we were doing // in-place editing if ((input->tempname != (char *)0) && input->inplace) { // we were inplace-editing. Time to replace the previous source // with the new one, stored in the file called (*tempname). // create a temp file for backup; filename stored in "bakname". // The file is instantly removed, since all we need is the filename. if (!make_temp(bakname, input->name)) { return false; } (void) remove(bakname); // keep the original input file in backup, as "bakname" if (rename(input->name, bakname) != 0) { perror(bakname); return false; } // rename the temp file (where the output is stored) so // it replaces the original file if (rename(input->tempname, input->name) != 0) { perror(bakname); return false; } // remove the temporary backup (void) remove(bakname); } _; // restore "input", the input FileState delete input; input = inputs.top(); _; // pop from all stacks: buffers.pop(); outputs.pop(); inputs.pop(); _; if (input != (FileState *)0) { _; dump("NOW BACK TO INPUT FILE "); dump(input->name); dump("\n"); } _; return true; } }; /*--end:Sources--*/ /*--begin:stack--*/ Stack Sources::outputs; Stack Sources::inputs; Stack Sources::buffers; /*--end:stack--*/ // echo, ECHO: override flex's default ECHO so it ensures yyout isn't NULL. // Note: if we're not in echo_mode mode, the input file is an included source file, // and not a LaTeX file; we should NOT be echoing its content to the "main" // source file (yyout). #define echo ((yyout != (FILE *)0) && (input->echo_mode) && (fwrite(yytext, yyleng, 1, yyout))) #define ECHO echo // SAVE: save a string to the macro file, advancing the column counter accordingly. Also // sends the string to yyout (echo). #define SAVE(s) _; (input->inc_pos(adds(s)), echo) // SAVE0, SAVE1, SAVE2: save a string to the macro file, advancing "pos" in // a limited manner (or not at all); SAVE1 and SAVE1 // also send the string to yyout. #define SAVE0(s) _; (adds(s)) #define SAVE1(s) _; (adds(s), input->inc_pos(1), echo) #define SAVE2(s) _; (adds(s), input->inc_pos(2), echo) // TSIZE: tab size (in spaces) #define TSIZE (4) // no_more_language_keywords: used in BNF mode static int no_more_language_keywords = 0; // the level of nesting in inlined source-code fragments int nested = 0; // adds: add a string to the macros_file static int adds(const char *s) { int len = strlen(s); fwrite(s, len, 1, macros_file); //fflush(macros_file); return len; } // addsNs: shortcut to adds(string); adds(int); adds(string) static void addsNs(char *s1,int n, char *s3) { char s2[20]; (void) sprintf(s2,"%d",n); adds(s1); adds(s2); adds(s3); } // put_id: add a macro-call to the macro output stream // Usage: put_id(macro_name, id_string) // Sample use: put_id("KW", "public") for generating "\KW{public}", etc. // Handles '_'s in the id_string correctly (so they do not result in // TeX subscript). static void put_id(const char *const macro, const char *s) { char buff[2]; adds("\\"); adds(macro); adds("{"); input->inc_pos(strlen(s)); for (;*s; s++) { if (*s == '_') adds("\\_"); else { sprintf(buff,"%c",*s); adds(buff); } } adds("}"); } // tab: emit enough spaces (\SP) to reach a tab position, then a \TAB. static void tab(void) { do { adds("\\SP "); input->inc_pos(1); } while (input->pos % TSIZE != 1); adds("\\TAB "); } // btab: emit enough bspaces (\BSP) to reach a tab position, then a \BTAB. static void btab(void) { do { adds("\\BSP "); input->inc_pos(1); } while (input->pos % TSIZE != 1); adds("\\BTAB "); } // nl: begin a new line static void nl(void) { echo; input->nl(); addsNs("\\NL{",input->line,"}\n"); } // normal(s): create a C-quoted string out of "s". char* normal(char *s) { static char buff[1024]; char *p; if (s == (char*)0) { // handle null correctly return s; } memset(buff, 0, sizeof buff); for (p = buff; *s; s++) { switch (*s) { case '\n': strcpy(p, "\\n"); break; case '\b': strcpy(p, "\\b"); break; case '\f': strcpy(p, "\\f"); break; case '\r': strcpy(p, "\\r"); break; case '\t': strcpy(p, "\\t"); break; default: *p++ = *s; continue; } p += strlen(p); } return buff; } static void yy_pop_state(); static void yy_push_state YY_PROTO(( int new_state )); #undef _ #ifdef DEBUG /*--begin:States--*/ // States: a debug class used for tracking the states stack. // A single instance ("state") is used for implementing dBEGIN, // dPUSH and dPOP while keeping track of the state stack (in // DEBUG mode). class States { public: char *name; Stack names; States(char *s): name(s) {} void change(int state, char *s) { printf("Changing from state %s to %s on '%s'\n", name, s, normal(yytext)); fflush(stdout); name = s; BEGIN(state); } void push(int state, char *s) { printf("Pushing state %s on top of %s on '%s'\n",s, name, normal(yytext)); yy_push_state(state); names.push(name); name = s; } void pop() { printf("Popping from state %s to %s on '%s'\n", name, names.top(), normal(yytext)); yy_pop_state(); name = names.top(); names.pop(); } }; States state(""); /*--end:States--*/ #define _ printf("%s(%d): found `%s' at line %d\n", state.name, __LINE__, normal(yytext), input->fileline); #define dBEGIN_NAMED(s, name) state.change(s, name) #define dBEGIN(s) dBEGIN_NAMED(s, #s) #define dPUSH(s) state.push(s, #s) #define dPOP() state.pop() #else // non-DEBUG // In non-DEBUG mode, dBEGIN, dPUSH and dPOP are simply the flex // functions BEGIN, yy_push_state, and yy_pop_state. #define _ #define dBEGIN_NAMED(s, name) BEGIN(s) #define dBEGIN(s) BEGIN(s) #define dPUSH(s) yy_push_state(s) #define dPOP() yy_pop_state() #endif static void terminate_cpp(); static void terminate_bnf(); static void terminate_java(); static void begin_bnf(); static void end_bnf(); static void begin_cpp(); static void end_cpp(); static void begin_java(); static void end_java(); /*--begin:macro--*/ class Macro { // count: number of macros generated so far in the output file(s) static int count = 0; // PROG_macro_id: returns a PROG macro identifier, based on // the macro's serial number (PROGa, PROGb etc.) static char *PROG_macro_id(int d) { static char buffer[20]; char *s; sprintf(buffer,"PROG%d",d); for (s = buffer; *s != '\0'; s++) if (isdigit(*s)) *s = (*s - '0')+ 'a'; return buffer; } // open_macro: start a new macro definition. Updates "count", the global // counter of macros generated. static void open(void) { count++; (void) fprintf(macros_file, "\\def\\%s{%% %s:%d\n", PROG_macro_id(count), input->name, input->fileline); } // close_macro: end the current macro definition. void close(void) { fprintf(macros_file, "}%%\n"); } // use_macro: add (to yyout, the edited source file) a call to the macro. // Normally placed after the source-code block. void use(void) { if (yyout != (FILE *)0) (void) fprintf(yyout, "\\%s{}", PROG_macro_id(count)); } }; /*--end:macro--*/ %} /* White spaces, optional white spaces: */ WHITES [ \t]+ OPT_WHITES [ \t]* /* Filename (for use in "INCLUDE" mode). A file is included like this: "\cppfile{filename.h}". A marked range within a file is included like this: "\javafile{Hello.java:Section3". */ FILENAME {OPT_WHITES}"{"{OPT_WHITES}[^ \t}]+{OPT_WHITES}"}" /* Digit, ID, Java ID (including $), lower-case keyword (for BNF): */ DIGIT [0-9] ID [A-Za-z_][_A-Za-z0-9]* JID [A-Za-z$_][_A-Za-z0-9$]* LKEYWORD [a-z][_A-Za-z0-9]* /* Newline, Empty Line: */ NL [\n] EL ^[ \t]*[\n] /* Prog-Macro (in LaTeX source): */ PROG \\PROG[a-j]+\{\} /* C++ preprocessor directives: */ PREPROCESS define|else|endif|if|ifndef|ifdef|undef|endif|include|pragma|elif|line /* C++ keywords, in different shapes and sizes: */ TYPES void|bool|char|int|float|double|enum SIZE signed|unsigned|long|short KIND const|volatile|explicit|mutable STORAGE typedef|static|extern|auto|register KEYWORDS1 {TYPES}|{SIZE}|{KIND}|{STORAGE} MODIFIERS const|volatile|inline|virtual|friend STRUCTURE class|struct|typename|union|template CONTROL1 if|else|while|do|return|break|continue|for CONTROL2 goto|switch|case|default|true|false KEYWORDS2 {MODIFIERS}|{STRUCTURE}|{CONTROL1}|{CONTROL2} OPERATOR new|operator|delete|sizeof EXCEPTION throw|try|catch OBSOLETE asm|entry VISIBILITY private|public|protected|using|namespace CASTS const_cast|static_cast|dynamic_cast|reinterpret_cast KEYWORD3 {OPERATOR}|{EXCEPTION}|{OBSOLETE}|{VISIBILITY}|{CASTS} KW {KEYWORDS1}|{KEYWORDS2}|{KEYWORD3} /* Java keywords: */ JPRIMITIVES boolean|byte|char|double|int|float|long|short|void JMODIFIERS1 abstract|final|native|private|protected|public|static JMODIFIERS2 volative|transient|sychronized|depracated|strictfp JCONSTANTS null|true|false JHEADERS class|interface|package|implements|extends|throws|import JKEYWORD1 {JPRIMITIVES}|{JMODIFIERS1}|{JMODIFIERS2}|{JCONSTANTS}|{JHEADERS} JSTATEMENTS break|case|continue|switch|default|if|else|while|do|for|return|this|super JEXCEPTIONS try|catch|finally|throw JOPERATORS instanceof|new JRESERVED byvalue|const|goto|cast|future|generic|inner|outer|rest|var JKEYWORD2 {JSTATEMENTS}|{JEXCEPTIONS}|{JOPERATORS}|{JRESERVED} JKW {JKEYWORD1}|{JKEYWORD2} /* Keywords inside javadoc comments (e.g., "@param"): */ DOC_KEYWORD "@"{ID} /* BNF Keywords: */ BTYPES INTEGER|REAL|BOOLEAN|CHARACTER|STRING|OK BMODULES GRAMMAR|MODULE|SYNTAX|TYPES|END|USING|PARSE|USE|EMBED BDATA FEATURES|RETURN|TEMP BOTHERKW OF|ERROR|STATIC|DYNAMIC|PRIMITIVES|NOCASE BPRIORITIES PRIORITIES|LEFT|RIGHT BDICTIONARY DICTIONARY|INSERT|SEARCH|FERRET BKW {BTYPES}|{BMODULES}|{BDATA}|{BOTHERKW}|{BDICTIONARY}|{BPRIORITIES} /* BNF variable, starting in uppercase letter: */ BVAR [A-Z][_A-Za-z0-9]* /* LaTeX pseudo-macros used by prog2tex: */ END "\\END" BEGIN_CPP "\\CPP" BEGIN_CPP1 "\\cpp{" END_CPP "\\ECPP" BEGIN_BNF "\\BNF" BEGIN_BNF1 "\\bnf{" END_BNF "\\EBNF" BEGIN_JAVA "\\JAVA" BEGIN_JAVA1 "\\java{" END_JAVA "\\EJAVA" CPP_INCLUDE "\\CPPFILE"|"\\cppfile" BNF_INCLUDE "\\BNFFILE"|"\\bnffile" JAVA_INCLUDE "\\JAVAFILE"|"\\javafile" /* Real LaTeX macros used by prog2tex: */ LATEX_INCLUDE "\\include"|"\\input" /* Begin/end marked (name) range within a source file: */ MARK_BEGIN "/*--begin:"{ID}"--*/" MARK_END "/*--end:"{ID}"--*/" %{ // Language: abstract class used for language definitions // Used only for included source files, not for code // fragments (neither normal nor inlined). class Language { public: virtual void begin_file() { set_file_state(); begin_code(); } virtual void end_file() { end_code(); dPOP(); } virtual char *get_default_extension() { return ext(); } protected: virtual void set_file_state() = 0; virtual void begin_code() { Macro::open(); SAVE0("\\begin{@"); SAVE0(name()); SAVE0("}%\n"); } virtual void end_code() { SAVE0("\\end{@"); SAVE0(name()); SAVE0("}%\n"); close_macro(); use_macro(); } virtual char *name() = 0; // language name virtual char *ext() = 0; // default filename extension (including the ".") }; // next_lang: used for temporary keeping record of the language of the file to be included. Language* next_lang = (Language *)0; // Cpp: Language subclass for C++ class Cpp: public Language { public: virtual void set_file_state() { _; dPUSH(CPP); } virtual char *name() { return "CPP"; } virtual char *ext() { return ".cpp"; } }; // Singleton instance of Cpp: Cpp lang_cpp; // Bnf: Language subclass for BNF class Bnf: public Language { public: virtual void set_file_state() { _; dPUSH(BNF); } virtual char *name() { return "BNF"; } virtual char *ext() { return ".bnf"; } virtual void begin_code() { Language::begin_code(); no_more_language_keywords = 0; } }; // Singleton instance of Bnf: Bnf lang_bnf; // Java: Language subclass for Java class Java: public Language { virtual void set_file_state() { _; dPUSH(JAVA); } virtual char *name() { return "JAVA"; } virtual char *ext() { return ".java"; } }; // Singleton instance of Java: Java lang_java; %} %% {CPP_INCLUDE} { // begin macro for including a CPP file dBEGIN(INCLUDE); echo; next_lang = &lang_cpp; } {BNF_INCLUDE} { // begin macro for including a BNF file dBEGIN(INCLUDE); echo; next_lang = &lang_bnf; } {JAVA_INCLUDE} { // begin macro for including a Java file dBEGIN(INCLUDE); echo; next_lang = &lang_java; } {LATEX_INCLUDE} { // begin macro for including a TeX file dBEGIN(INCLUDE); echo; next_lang = (Language *)0; } {FILENAME} { // encountered filename in file-include mode _ echo; dBEGIN(LATEX); // quit the INCLUDE mode, so when we close the file, we'll pop to LATEX mode. char *temp = new char[strlen(yytext)+1]; strcpy(temp,yytext); char *included_name = strtok(yytext,"{ \t}"); // tokenize by removing non-filename characters char *marker_name = (char *)0; // if the filename is of the format "filename:markname", locate the marker name: char *colon = strchr(included_name, ':'); // locate the colon if (colon != (char *)0) { *colon = '\0'; // terminate the string marker_name = colon + 1; // the marker name begins on the next char } char *ext; bool inplace; if (next_lang != (Language *)0) { ext = next_lang->get_default_extension(); inplace = false; } else { ext = ".tex"; // LaTeX extension inplace = true; } Sources::push(included_name, ext, inplace); input->set_lang(next_lang); next_lang = (Language *)0; if (marker_name != (char *)0) { // we're looking for a marker dPUSH(MARKER); input->set_marker(marker_name); } else if (input->lang != (Language *)0) { input->lang->begin_file(); // Also PUSHes the language state } delete[] temp; } . { // filename not found in file-include mode _; input->error("Missing included file name"); dBEGIN(LATEX); // quit INCLUDE mode yyless(0); // forget about reading this character. } {MARK_BEGIN} { // marker found -- is this the one we're looking for? _; // make p point to the actual marker's name within yytext char *p = yytext + strlen("/*--begin:"); char *mark_name = new char[strlen(p) + 1]; strcpy(mark_name, p); // now mark_name is the marker + the trailing "--*/". Trancate that part. mark_name[strlen(mark_name) - strlen("--*/")] = '\0'; if (strcmp(mark_name, input->marker) == 0) { dPOP(); // quit MARKER mode input->lang->begin_file(); // PUSHes language mode } delete[] mark_name; } {MARK_END} { // end-of-marker found -- does this interest us? _; if (input->marker == (char *)0) { // we're not within a marker range REJECT; } // make p point to the actual marker's name within yytext char *p = yytext + strlen("/*--end:"); char *mark_name = new char[strlen(p) + 1]; strcpy(mark_name, p); // now mark_name is the marker + the trailing "--*/". Trancate that part. mark_name[strlen(mark_name) - strlen("--*/")] = '\0'; if (strcmp(mark_name, input->marker) != 0) { delete[] mark_name; REJECT; // no match; forget we ever saw this } input->lang->end_file(); // POP language mode dPUSH(MARKER); // resume MARKER mode delete[] mark_name; } <*><> { // End-Of-File, in any state _; // if the current state is MARKER, we have to pop it. if (YY_START == MARKER) { dPOP(); } else if (input->lang != (Language *)0) { while (YY_START == LATEX_NOTE || YY_START == NOTE || YY_START == COMMENT || YY_START == BCOMMENT || YY_START == LATEX_COMMENT || YY_START == DOC_COMMENT || YY_START == ALIGN || YY_START == STRING || YY_START == BSTRING || YY_START == REGEXP || YY_START == CHAR) { // if the file ended inside a comment, a string, etc. (possibly nested) end it gracefully: switch (YY_START) { case LATEX_NOTE: case NOTE: SAVE0("\\ENDNOTE "); break; case LATEX_COMMENT: case DOC_COMMENT: case COMMENT: SAVE2("\\ENDCOMMENT "); break; case BCOMMENT: SAVE0("\\ENDBCOMMENT "); break; case REGEXP: SAVE1("\\EREGEXP "); break; case STRING: case BSTRING: SAVE1("\\ESTRING "); break; case CHAR: SAVE1("\\ECHAR "); break; } dPOP(); } input->lang->end_file(); // we're popping from a source file. Also POPs the state stack } if (Sources::empty()) { // We're on the top-level file. This means we're done! return 0; } if (!Sources::pop() || (input == (FileState *)0)) { return 0; } } {PROG} { // a \PROG macro, "implanted" by a previous run of the program. _; /* Discard */ } {BEGIN_CPP} { // \CPP inside the LaTeX source echo; input->new_block(); dBEGIN(CPP); Macro::open(); begin_cpp(); } {BEGIN_BNF} { // \BNF inside the LaTeX source no_more_language_keywords = 0; echo; input->new_block(); dBEGIN(BNF); Macro::open(); begin_bnf(); } {BEGIN_JAVA} { // \JAVA inside the LaTeX source echo; input->new_block(); dBEGIN(JAVA); Macro::open(); begin_java(); } {BEGIN_CPP1} { // \cpp{...} echo; input->new_block(); dPUSH(CPP1); Macro::open(); begin_cpp(); } {BEGIN_BNF1} { // \bnf{...} no_more_language_keywords = 0; echo; input->new_block(); dPUSH(BNF1); Macro::open(); begin_bnf(); } {BEGIN_JAVA1} { // \java{...} echo; input->new_block(); dPUSH(JAVA1); Macro::open(); begin_java(); } \n { // end-of-line in the LaTeX source _; input->nl(); echo; } . { // any other (non-special) character in LaTeX: just store in output _; echo; } "{" { // Open-curlies in "inlined" source-code SAVE1("\\{"); nested++; } "}" { // Close-curlies in "inlined" C++ _; terminate_cpp(); } "}" { // Close-curlies in "inlined" BNF _; terminate_bnf(); } "}" { // Close-curlies in "inlined" Java _; terminate_java(); } {BEGIN_BNF1} { // inlined BNF inside C++ code _; echo; end_cpp(); begin_bnf(); dPUSH(BNF1); } {BEGIN_CPP1} { // inlined C++ inside BNF code _; echo; end_bnf(); begin_cpp(); dPUSH(CPP1); } "..." { // ldots in BNF notation SAVE1("\\ldots"); } "<" { // begin REGEXP mode in BNF notation SAVE2("\\REGEXP "); dPUSH(REGEXP); } ">" { // end REGEXP mode (return to BNF) SAVE1("\\EREGEXP "); dPOP(); } /* characters in source code that need special handling for LaTeX: */ "-" SAVE1("{-}\\relax "); "{" SAVE1("\\{"); // in non-inlined mode only "}" SAVE1("\\}"); // in non-inlined mode only "%" SAVE1("\\%"); "&" SAVE1("\\&"); "#" SAVE1("\\#"); "_" SAVE1("\\_"); "\$" SAVE1("\\$"); "~" SAVE1("\\textasciitilde "); "^" SAVE1("\\textasciicircum "); "|" SAVE1("\\textbar "); "<" SAVE1("\\textless "); ">" SAVE1("\\textgreater "); "\\" SAVE1("\\textbackslash "); " " SAVE1("\\SP "); "\t" _; echo; tab(); "\\>" { SAVE2("\\textbackslash\\textgreater"); } "\\\"" { SAVE2("\\textbackslash\""); } "\\\'" { SAVE2("\\textbackslash\'"); } "\\\\" { SAVE2("\\textbackslash\\textbackslash "); } {EL} { // empty line _; echo; input->nl(); addsNs("\\EMPTYLINE{", input->line, "}"); } {NL} { // new line _; nl(); } {KW} { // C++ keyword _; echo; put_id("KW",yytext); } {JKW} { // Java keyword _; echo; put_id("KW",yytext); } {LKEYWORD}/(({OPT_WHITES}[\:\.\[\(\?])|({WHITES}"OF")) { _; echo, put_id("ID", yytext); } "@"{LKEYWORD} { _; echo; put_id("ID", yytext); } {BKW} { // BNF keyword _; echo; if (strcmp(yytext,"FEATURES") == 0) { no_more_language_keywords = 1; } if (strcmp(yytext,"END") == 0) { no_more_language_keywords = 0; } put_id("BKW",yytext); } {ID} { // C++ identifier _; echo; put_id("ID", yytext); } {JID} { // Java identifier _; echo; put_id("ID", yytext); } -> { // "arrowhead" _; SAVE2("\\der"); } {BVAR} { // BNF variable _; echo; put_id("VAR", yytext); } {LKEYWORD} { // BNF lowercase keyword _; echo; put_id( no_more_language_keywords ? "ID" : "LKEYWORD", yytext ); } "/**" { // begin a javadoc comment _; SAVE2("\\COMMENT "); dPUSH(DOC_COMMENT); dPUSH(ALIGN); } "/*" { // begin a block-comment _; SAVE2("\\COMMENT "); dPUSH(COMMENT); dPUSH(ALIGN); } "/*\{\}" { // begin a block-comment with LaTeX formatting support _; SAVE2("\\LTCOMMENT "); dPUSH(LATEX_COMMENT); dPUSH(ALIGN); } "//" { // begin a single-line comment _; SAVE2("\\NOTE "); dPUSH(NOTE); } "//\{\}" { // begin a single-line comment with LaTeX formatting support _; SAVE2("\\NOTE "); dPUSH(LATEX_NOTE); } "--" { // begin a BNF single-line comment _; SAVE2("\\BCOMMENT "); dPUSH(BCOMMENT); } \" { // open a string in C++/Java _; SAVE2("\\STRING "); dPUSH(STRING); } \" { // open string in BNF mode _; SAVE2("\\STRING "); dPUSH(BSTRING); } \' { // open CHAR mode in C++/Java _; SAVE2("\\CHAR "); dPUSH(CHAR); } {END} { // end C++ mode _; echo; end_cpp(); close_macro(); use_macro(); dBEGIN(LATEX); } {END} { // end BNF mode _; echo; end_bnf(); close_macro(); use_macro(); dBEGIN(LATEX); } {END} { // end Java mode _; echo; end_java(); close_macro(); use_macro(); dBEGIN(LATEX); } . { // any other char in code-mode _; echo; adds(yytext); } \" { // close a string in C++/Java/BNF mode _; SAVE1("\\ESTRING "); dPOP(); } \' { // end CHAR mode _; SAVE1("\\ECHAR "); dPOP(); } \\\' SAVE2("\\textbackslash\'"); // quoted character in CHAR mode . SAVE1(yytext); // any other character in string/char/regexp modes {NL} { // new line in block-comment mode: return to ALIGN mode _; nl(); dPUSH(ALIGN); } {DOC_KEYWORD} { // keyword (such as "@param") inside a javadoc comment _; put_id("JDOC", yytext); echo; } [ ]\" { _ SAVE2("\\BSP``"); } [\t]\" { _ btab(); SAVE1("``"); } ^\" { _ SAVE1("``"); } [ ]\' { _ SAVE2("\\BSP`"); } [\t]\' { _ btab(); SAVE1("`"); } ^\" { _ SAVE1("`"); } . { _ SAVE1(yytext); } " " { _ SAVE1("\\BSP "); } "\t" { _ btab(); echo; } "*" { _ SAVE1("\\BSTAR "); } {NL} { _ nl(); _; } "*/" { // comment ended in ALIGN mode _; dPOP(); // resume previous mode yyless(0); // and let it handle things } . { // any other character indicates end-of-mode for ALIGN _; // (this includes {END} inside a comment). dPOP(); // resume previous mode yyless(0); // and let it handle things } "*/" { // end-of-comment _; SAVE2("\\ENDCOMMENT "); dPOP(); } {NL} { // BNF comment ends at end-of-line SAVE0("\\ENDBCOMMENT "); nl(); dPOP(); } {NL} { // one-line comment ends at end-of-line SAVE0("\\ENDNOTE "); nl(); dPOP(); } . SAVE1(yytext); // in LaTeX-comments mode, we just pass the characters "as are". {NL} _; nl(); _; %% #include // process: run normal yylex() processing, after flushing stdout and // setting the start-condition to the given state. bool process(int statenum, char *statename) { yytext = (char *)0; fflush(stdout); dBEGIN_NAMED(statenum, statename); fflush(stdout); return (yylex() == 0); } bool inplace(char *fname) { Sources::push(fname, ".tex", true); // "true" indicates in-place editing return process(LATEX, "LATEX"); } // create_macros_file: given a .tex filename, initializes macros_file as the // macros (.prg) file. // If the given filename is "x.tex", the macros file is called "x.prg"; otherwise, // for filename "x", it is called "x.prg". bool create_macros_file(const char *fname) { static char macros_fname[PATHSIZE]; if (strcmp(&fname[strlen(fname)-4], ".tex") == 0) { strcpy(macros_fname, fname); macros_fname[strlen(fname)-4] = '\0'; (void) sprintf(macros_fname, "%s.prg", macros_fname); } else (void) sprintf(macros_fname, "%s.prg", fname); macros_file = fopen(macros_fname, "w"); if (macros_file == (FILE *)0) { perror(macros_fname); return false; } return true; } // sprocess: start processing, given input and output file names. // input filename is optional, if not given "stdin" is used. bool sprocess(char *fin, char *fout) { bool result; // if no input or output files were specified, begin working. // We'll be using stdin for input and stdout (yyout) for output. if (fin == (char *)0 && fout == (char *)0) { dump("No input or output filenames specified."); create_macros_file("stdin"); Sources::push(STDIN, "", false); // no in-place editing input->echo_mode = true; // but we do want ECHO enabled result = process(LATEX, "LATEX"); fclose(macros_file); return result; } // if both input and output filenames were specified, set up // yyin and yyout accordingly, and begin processing. if (fin != (char *)0 && fout != (char *)0) { yyout = fopen(fout, "w"); if (yyout == (FILE *)0) { perror(fout); return false; } Sources::push(fin, ".tex", false); // no in-place editing input->echo_mode = true; // but we do want ECHO enabled create_macros_file(fin); result = process(LATEX, "LATEX"); (void) fclose(yyout); (void) fclose(macros_file); return result; } // we have an output filename, but no input filename. // process in-place based on output filename. // ** I believe this situation to be impossible in the current setup (TC) if (fout != (char *)0) { dump("Output filename, but no input filename."); create_macros_file(fout); result = inplace(fout); fclose(macros_file); return result; } // we have an input filename, but no output filename (normal run mode). // process in-place based on input filename. create_macros_file(fin); result = inplace(fin); fclose(macros_file); return result; } // begin_, end_: store LaTeX \begin or \end commands in the macro file, for // scope delimiting. void begin_bnf(void) { SAVE0("\\begin{@BNF}%\n"); } void end_bnf(void) { SAVE0("\\end{@BNF}%\n"); } void begin_cpp(void) { SAVE0("\\begin{@CPP}%\n"); } void end_cpp(void) { SAVE0("\\end{@CPP}%\n"); } void begin_java(void) { SAVE0("\\begin{@JAVA}%\n"); } void end_java(void) { SAVE0("\\end{@JAVA}%\n"); } // terminate_: called when close-curlies is encounted in "inlined" code fragments. // This is needed so we can find out if the curley closed belongs to // the inlined code, or to the enclosing environment (e.g., in cases // like: "\cpp{int f() { return this; }}"). // // NOTE: BNF and C++ environments can be nested within each other. Java environments // cannot be nested in either. static void terminate_cpp(void) { if (nested > 0) { // the close-curley belongs to the inlined C++. Just store it // in the macro file (and in yyout). nested--; SAVE1("\\}"); return; } // the close-curley belongs to the environment. echo; // send to yyout end_cpp(); // close the C++ fragment if (yy_top_state() == BNF || yy_top_state() == BNF1) { // we're returning to a BNF environment. begin_bnf(); } else { // we're returning to the external LaTeX environment. close_macro(); use_macro(); } dPOP(); // quit CPP1 mode } static void terminate_bnf(void) { if (nested > 0) { // the close-curley belongs to the inlined BNF. Just store it // in the macro file (and in yyout). nested--; SAVE1("\\}"); return; } // the close-curley belongs to the environment. echo; // send to yyout end_bnf(); // close the BNF fragment if (yy_top_state() == CPP || yy_top_state() == CPP1) { // we're returning to a C++ environment. begin_cpp(); } else { // we're returning to the external LaTeX environment. close_macro(); use_macro(); } dPOP(); // quit BNF1 mode } static void terminate_java() { if (nested > 0) { // the close-curley belongs to the inlined Java. Just store it // in the macro file (and in yyout). nested--; SAVE1("\\}"); return; } // the close-curley belongs to the environment. echo; // send to yyout end_java(); // close the Java fragment close_macro(); use_macro(); dPOP(); // quit JAVA1 mode } // main: usage: prog2tex [infile [outfile]] int main(int argc, char *argv[]) { char *fin = (char *)0; char *fout = (char *)0; switch (argc) { case 3: fout = argv[2]; /* Fall through */ case 2: fin = argv[1]; case 1: break; default: fprintf(stderr, "Usage: %s [ infile [outfile] ]\n",*argv); return 1; } if (!sprocess(fin, fout)) { return 1; } return 0; }