From 907aabb6b0f5863c8d97847851d8f4082dfebf8b Mon Sep 17 00:00:00 2001
From: John Kessenich <cepheus@frii.com>
Date: Tue, 20 Dec 2016 21:47:30 -0700
Subject: [PATCH] PP: Non-functional: Only use string <-> atom mapping when
 needed.

Also, eliminate the 'atom' field of TPpToken.

Parsing a real 300 line shader, through to making the AST, is about 10% faster.

Memory is slightly reduced (< 1%).

The whole google-test suite, inclusive of all testing overhead, SPIR-V generation,
etc., runs 3% faster.

Since this is a code *simplification* that leads to perf. improvement, I'm not
going to invest too much more in measuring the perf. than this. The PP code is
simply now in a better state to see how to further rationalize/improve it.
---
 glslang/Include/revision.h                    |  2 +-
 .../MachineIndependent/preprocessor/Pp.cpp    | 93 ++++++++++---------
 .../preprocessor/PpAtom.cpp                   | 25 +++--
 .../preprocessor/PpContext.h                  |  4 +-
 .../preprocessor/PpScanner.cpp                | 11 +--
 .../preprocessor/PpTokens.cpp                 |  1 -
 .../preprocessor/PpTokens.h                   |  1 -
 7 files changed, 74 insertions(+), 63 deletions(-)

diff --git a/glslang/Include/revision.h b/glslang/Include/revision.h
index 3bef93e4..68b970b3 100644
--- a/glslang/Include/revision.h
+++ b/glslang/Include/revision.h
@@ -2,5 +2,5 @@
 // For the version, it uses the latest git tag followed by the number of commits.
 // For the date, it uses the current date (when then script is run).
 
-#define GLSLANG_REVISION "Overload400-PrecQual.1713"
+#define GLSLANG_REVISION "Overload400-PrecQual.1715"
 #define GLSLANG_DATE "20-Dec-2016"
diff --git a/glslang/MachineIndependent/preprocessor/Pp.cpp b/glslang/MachineIndependent/preprocessor/Pp.cpp
index 2615f9a1..c96503e6 100644
--- a/glslang/MachineIndependent/preprocessor/Pp.cpp
+++ b/glslang/MachineIndependent/preprocessor/Pp.cpp
@@ -93,7 +93,7 @@ int TPpContext::CPPdefine(TPpToken* ppToken)
 {
     MacroSymbol mac;
 
-    // get macro name
+    // get the macro name
     int token = scanToken(ppToken);
     if (token != PpAtomIdentifier) {
         parseContext.ppError(ppToken->loc, "must be followed by macro name", "#define", "");
@@ -104,8 +104,8 @@ int TPpContext::CPPdefine(TPpToken* ppToken)
         parseContext.reservedPpErrorCheck(ppToken->loc, ppToken->name, "#define");
     }
 
-    // save the original macro name
-    const int defAtom = ppToken->atom;
+    // save the macro name
+    const int defAtom = LookUpAddString(ppToken->name);
 
     // gather parameters to the macro, between (...)
     token = scanToken(ppToken);
@@ -121,17 +121,19 @@ int TPpContext::CPPdefine(TPpToken* ppToken)
                 return token;
             }
             mac.emptyArgs = 0;
+            const int argAtom = LookUpAddString(ppToken->name);
+
             // check for duplication of parameter name
             bool duplicate = false;
             for (size_t a = 0; a < mac.args.size(); ++a) {
-                if (mac.args[a] == ppToken->atom) {
+                if (mac.args[a] == argAtom) {
                     parseContext.ppError(ppToken->loc, "duplicate macro parameter", "#define", "");
                     duplicate = true;
                     break;
                 }
             }
             if (! duplicate)
-                mac.args.push_back(ppToken->atom);
+                mac.args.push_back(argAtom);
             token = scanToken(ppToken);
         } while (token == ',');
         if (token != ')') {
@@ -199,7 +201,7 @@ int TPpContext::CPPundef(TPpToken* ppToken)
 
     parseContext.reservedPpErrorCheck(ppToken->loc, ppToken->name, "#undef");
 
-    MacroSymbol* macro = lookupMacroDef(ppToken->atom);
+    MacroSymbol* macro = lookupMacroDef(LookUpString(ppToken->name));
     if (macro != nullptr)
         macro->undef = 1;
     token = scanToken(ppToken);
@@ -234,13 +236,13 @@ int TPpContext::CPPelse(int matchelse, TPpToken* ppToken)
         if ((token = scanToken(ppToken)) != PpAtomIdentifier)
             continue;
 
-        int atom = ppToken->atom;
-        if (atom == PpAtomIf || atom == PpAtomIfdef || atom == PpAtomIfndef) {
+        int nextAtom = LookUpString(ppToken->name);
+        if (nextAtom == PpAtomIf || nextAtom == PpAtomIfdef || nextAtom == PpAtomIfndef) {
             depth++; 
             ifdepth++; 
             elsetracker++;
-        } else if (atom == PpAtomEndif) {
-            token = extraTokenCheck(atom, ppToken, scanToken(ppToken));
+        } else if (nextAtom == PpAtomEndif) {
+            token = extraTokenCheck(nextAtom, ppToken, scanToken(ppToken));
             elseSeen[elsetracker] = false;
             --elsetracker;
             if (depth == 0) {
@@ -252,12 +254,12 @@ int TPpContext::CPPelse(int matchelse, TPpToken* ppToken)
             --depth;
             --ifdepth;
         } else if (matchelse && depth == 0) {
-            if (atom == PpAtomElse) {
+            if (nextAtom == PpAtomElse) {
                 elseSeen[elsetracker] = true;
-                token = extraTokenCheck(atom, ppToken, scanToken(ppToken));
+                token = extraTokenCheck(nextAtom, ppToken, scanToken(ppToken));
                 // found the #else we are looking for
                 break;
-            } else if (atom == PpAtomElif) {
+            } else if (nextAtom == PpAtomElif) {
                 if (elseSeen[elsetracker])
                     parseContext.ppError(ppToken->loc, "#elif after #else", "#elif", "");
                 /* we decrement ifdepth here, because CPPif will increment
@@ -270,13 +272,13 @@ int TPpContext::CPPelse(int matchelse, TPpToken* ppToken)
 
                 return CPPif(ppToken);
             }
-        } else if (atom == PpAtomElse) {
+        } else if (nextAtom == PpAtomElse) {
             if (elseSeen[elsetracker])
                 parseContext.ppError(ppToken->loc, "#else after #else", "#else", "");
             else
                 elseSeen[elsetracker] = true;
-            token = extraTokenCheck(atom, ppToken, scanToken(ppToken));
-        } else if (atom == PpAtomElif) {
+            token = extraTokenCheck(nextAtom, ppToken, scanToken(ppToken));
+        } else if (nextAtom == PpAtomElif) {
             if (elseSeen[elsetracker])
                 parseContext.ppError(ppToken->loc, "#elif after #else", "#elif", "");
         }
@@ -286,21 +288,21 @@ int TPpContext::CPPelse(int matchelse, TPpToken* ppToken)
 }
 
 // Call when there should be no more tokens left on a line.
-int TPpContext::extraTokenCheck(int atom, TPpToken* ppToken, int token)
+int TPpContext::extraTokenCheck(int contextAtom, TPpToken* ppToken, int token)
 {
     if (token != '\n' && token != EndOfInput) {
         static const char* message = "unexpected tokens following directive";
 
         const char* label;
-        if (atom == PpAtomElse)
+        if (contextAtom == PpAtomElse)
             label = "#else";
-        else if (atom == PpAtomElif)
+        else if (contextAtom == PpAtomElif)
             label = "#elif";
-        else if (atom == PpAtomEndif)
+        else if (contextAtom == PpAtomEndif)
             label = "#endif";
-        else if (atom == PpAtomIf)
+        else if (contextAtom == PpAtomIf)
             label = "#if";
-        else if (atom == PpAtomLine)
+        else if (contextAtom == PpAtomLine)
             label = "#line";
         else
             label = "";
@@ -388,7 +390,7 @@ int TPpContext::eval(int token, int precedence, bool shortCircuit, int& res, boo
 {
     TSourceLoc loc = ppToken->loc;  // because we sometimes read the newline before reporting the error
     if (token == PpAtomIdentifier) {
-        if (ppToken->atom == PpAtomDefined) {
+        if (strcmp("defined", ppToken->name) == 0) {
             bool needclose = 0;
             token = scanToken(ppToken);
             if (token == '(') {
@@ -403,7 +405,7 @@ int TPpContext::eval(int token, int precedence, bool shortCircuit, int& res, boo
                 return token;
             }
 
-            MacroSymbol* macro = lookupMacroDef(ppToken->atom);
+            MacroSymbol* macro = lookupMacroDef(LookUpString(ppToken->name));
             res = macro != nullptr ? !macro->undef : 0;
             token = scanToken(ppToken);
             if (needclose) {
@@ -496,7 +498,7 @@ int TPpContext::eval(int token, int precedence, bool shortCircuit, int& res, boo
 // Expand macros, skipping empty expansions, to get to the first real token in those expansions.
 int TPpContext::evalToToken(int token, bool shortCircuit, int& res, bool& err, TPpToken* ppToken)
 {
-    while (token == PpAtomIdentifier && ppToken->atom != PpAtomDefined) {
+    while (token == PpAtomIdentifier && strcmp("defined", ppToken->name) != 0) {
         int macroReturn = MacroExpand(ppToken, true, false);
         if (macroReturn == 0) {
             parseContext.ppError(ppToken->loc, "can't evaluate expression", "preprocessor evaluation", "");
@@ -555,7 +557,7 @@ int TPpContext::CPPifdef(int defined, TPpToken* ppToken)
         else 
             parseContext.ppError(ppToken->loc, "must be followed by macro name", "#ifndef", "");
     } else {
-        MacroSymbol* macro = lookupMacroDef(ppToken->atom);
+        MacroSymbol* macro = lookupMacroDef(LookUpString(ppToken->name));
         token = scanToken(ppToken);
         if (token != '\n') {
             parseContext.ppError(ppToken->loc, "unexpected tokens following #ifdef directive - expected a newline", "#ifdef", "");
@@ -765,9 +767,10 @@ int TPpContext::CPPversion(TPpToken* ppToken)
         parseContext.notifyVersion(line, versionNumber, nullptr);
         return token;
     } else {
-        if (ppToken->atom != PpAtomCore &&
-            ppToken->atom != PpAtomCompatibility &&
-            ppToken->atom != PpAtomEs)
+        int profileAtom = LookUpString(ppToken->name);
+        if (profileAtom != PpAtomCore &&
+            profileAtom != PpAtomCompatibility &&
+            profileAtom != PpAtomEs)
             parseContext.ppError(ppToken->loc, "bad profile name; use es, core, or compatibility", "#version", "");
         parseContext.notifyVersion(line, versionNumber, ppToken->name);
         token = scanToken(ppToken);
@@ -828,7 +831,7 @@ int TPpContext::readCPPline(TPpToken* ppToken)
     int token = scanToken(ppToken);
 
     if (token == PpAtomIdentifier) {
-        switch (ppToken->atom) {
+        switch (LookUpString(ppToken->name)) {
         case PpAtomDefine:
             token = CPPdefine(ppToken);
             break;
@@ -913,17 +916,21 @@ int TPpContext::readCPPline(TPpToken* ppToken)
 // Returns nullptr if no expanded argument is created.
 TPpContext::TokenStream* TPpContext::PrescanMacroArg(TokenStream& arg, TPpToken* ppToken, bool newLineOkay)
 {
+    // pre-check, to see if anything in the argument needs to be expanded,
+    // to see if we can kick out early
     int token;
     RewindTokenStream(arg);
     do {
         token = ReadToken(arg, ppToken);
-        if (token == PpAtomIdentifier && lookupMacroDef(ppToken->atom) != nullptr)
+        if (token == PpAtomIdentifier && lookupMacroDef(LookUpString(ppToken->name)) != nullptr)
             break;
     } while (token != EndOfInput);
 
+    // if nothing needs to be expanded, kick out early
     if (token == EndOfInput)
         return nullptr;
 
+    // expand the argument
     TokenStream* expandedArg = new TokenStream;
     pushInput(new tMarkerInput(this));
     pushTokenStreamInput(arg);
@@ -985,7 +992,7 @@ int TPpContext::tMacroInput::scan(TPpToken* ppToken)
     if (token == PpAtomIdentifier) {
         int i;
         for (i = mac->args.size() - 1; i >= 0; i--)
-            if (mac->args[i] == ppToken->atom) 
+            if (strcmp(pp->GetAtomString(mac->args[i]), ppToken->name) == 0)
                 break;
         if (i >= 0) {
             TokenStream* arg = expandedArgs[i];
@@ -1053,7 +1060,8 @@ int TPpContext::tZeroInput::scan(TPpToken* ppToken)
 int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOkay)
 {
     ppToken->space = false;
-    switch (ppToken->atom) {
+    int macroAtom = LookUpString(ppToken->name);
+    switch (macroAtom) {
     case PpAtomLineMacro:
         ppToken->ival = parseContext.getCurrentLoc().line;
         snprintf(ppToken->name, sizeof(ppToken->name), "%d", ppToken->ival);
@@ -1079,7 +1087,7 @@ int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOka
         break;
     }
 
-    MacroSymbol* macro = lookupMacroDef(ppToken->atom);
+    MacroSymbol* macro = macroAtom == 0 ? nullptr : lookupMacroDef(macroAtom);
     int token;
     int depth = 0;
 
@@ -1101,7 +1109,6 @@ int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOka
 
     TSourceLoc loc = ppToken->loc;  // in case we go to the next line before discovering the error
     in->mac = macro;
-    int atom = ppToken->atom;
     if (macro->args.size() > 0 || macro->emptyArgs) {
         token = scanToken(ppToken);
         if (newLineOkay) {
@@ -1109,10 +1116,8 @@ int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOka
                 token = scanToken(ppToken);
         }
         if (token != '(') {
-            parseContext.ppError(loc, "expected '(' following", "macro expansion", GetAtomString(atom));
+            parseContext.ppError(loc, "expected '(' following", "macro expansion", GetAtomString(macroAtom));
             UngetToken(token, ppToken);
-            ppToken->atom = atom;
-
             delete in;
             return 0;
         }
@@ -1129,20 +1134,20 @@ int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOka
             while (1) {
                 token = scanToken(ppToken);
                 if (token == EndOfInput) {
-                    parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(atom));
+                    parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(macroAtom));
                     delete in;
                     return 0;
                 }
                 if (token == '\n') {
                     if (! newLineOkay) {
-                        parseContext.ppError(loc, "End of line in macro substitution:", "macro expansion", GetAtomString(atom));
+                        parseContext.ppError(loc, "End of line in macro substitution:", "macro expansion", GetAtomString(macroAtom));
                         delete in;
                         return 0;
                     }
                     continue;
                 }
                 if (token == '#') {
-                    parseContext.ppError(ppToken->loc, "unexpected '#'", "macro expansion", GetAtomString(atom));
+                    parseContext.ppError(ppToken->loc, "unexpected '#'", "macro expansion", GetAtomString(macroAtom));
                     delete in;
                     return 0;
                 }
@@ -1167,7 +1172,7 @@ int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOka
         } while (arg < in->mac->args.size());
 
         if (arg < in->mac->args.size())
-            parseContext.ppError(loc, "Too few args in Macro", "macro expansion", GetAtomString(atom));
+            parseContext.ppError(loc, "Too few args in Macro", "macro expansion", GetAtomString(macroAtom));
         else if (token != ')') {
             depth=0;
             while (token != EndOfInput && (depth > 0 || token != ')')) {
@@ -1179,11 +1184,11 @@ int TPpContext::MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOka
             }
 
             if (token == EndOfInput) {
-                parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(atom));
+                parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(macroAtom));
                 delete in;
                 return 0;
             }
-            parseContext.ppError(loc, "Too many args in macro", "macro expansion", GetAtomString(atom));
+            parseContext.ppError(loc, "Too many args in macro", "macro expansion", GetAtomString(macroAtom));
         }
 
         // We need both expanded and non-expanded forms of the argument, for whether or
diff --git a/glslang/MachineIndependent/preprocessor/PpAtom.cpp b/glslang/MachineIndependent/preprocessor/PpAtom.cpp
index 7d97cb9a..6d54a1c9 100644
--- a/glslang/MachineIndependent/preprocessor/PpAtom.cpp
+++ b/glslang/MachineIndependent/preprocessor/PpAtom.cpp
@@ -120,7 +120,6 @@ const struct {
     { PpAtomIncrement,      "++" },
 
     { PpAtomDefine,         "define" },
-    { PpAtomDefined,        "defined" },
     { PpAtomUndef,          "undef" },
     { PpAtomIf,             "if" },
     { PpAtomElif,           "elif" },
@@ -149,17 +148,29 @@ const struct {
 
 namespace glslang {
 
+//
+// Map an existing string to an atom.
+//
+// Return 0 if no existing string.
+//
+int TPpContext::LookUpString(const char* s)
+{
+    auto it = atomMap.find(s);
+    return it == atomMap.end() ? 0 : it->second;
+}
+
 //
 // Map a new or existing string to an atom, inventing a new atom if necessary.
 //
 int TPpContext::LookUpAddString(const char* s)
 {
-    auto it = atomMap.find(s);
-    if (it == atomMap.end()) {
-        AddAtomFixed(s, nextAtom);
-        return nextAtom++;
-    } else
-        return it->second;
+    int atom = LookUpString(s);
+    if (atom == 0) {
+        atom = nextAtom++;
+        AddAtomFixed(s, atom);
+    }
+
+    return atom;
 }
 
 //
diff --git a/glslang/MachineIndependent/preprocessor/PpContext.h b/glslang/MachineIndependent/preprocessor/PpContext.h
index 7265c0b3..77b49d2b 100644
--- a/glslang/MachineIndependent/preprocessor/PpContext.h
+++ b/glslang/MachineIndependent/preprocessor/PpContext.h
@@ -92,7 +92,7 @@ namespace glslang {
 
 class TPpToken {
 public:
-    TPpToken() : space(false), ival(0), dval(0.0), i64val(0), atom(0)
+    TPpToken() : space(false), ival(0), dval(0.0), i64val(0)
     {
         loc.init(); 
         name[0] = 0;
@@ -112,7 +112,6 @@ public:
     int    ival;
     double dval;
     long long i64val;
-    int    atom;
     char   name[MaxTokenLength + 1];
 };
 
@@ -551,6 +550,7 @@ protected:
     int nextAtom;
     void InitAtomTable();
     void AddAtomFixed(const char* s, int atom);
+    int LookUpString(const char* s);
     int LookUpAddString(const char* s);
     const char* GetAtomString(int atom);
 };
diff --git a/glslang/MachineIndependent/preprocessor/PpScanner.cpp b/glslang/MachineIndependent/preprocessor/PpScanner.cpp
index 2898c3c9..be7f4f3c 100644
--- a/glslang/MachineIndependent/preprocessor/PpScanner.cpp
+++ b/glslang/MachineIndependent/preprocessor/PpScanner.cpp
@@ -247,7 +247,6 @@ int TPpContext::lFloatConst(int len, int ch, TPpToken* ppToken)
 //
 int TPpContext::tStringInput::scan(TPpToken* ppToken)
 {
-    char* tokenText = ppToken->name;
     int AlreadyComplained = 0;
     int len = 0;
     int ch = 0;
@@ -286,7 +285,7 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
         case 'z':
             do {
                 if (len < MaxTokenLength) {
-                    tokenText[len++] = (char)ch;
+                    ppToken->name[len++] = (char)ch;
                     ch = getch();
                 } else {
                     if (! AlreadyComplained) {
@@ -304,9 +303,8 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
             if (len == 0)
                 continue;
 
-            tokenText[len] = '\0';
+            ppToken->name[len] = '\0';
             ungetch();
-            ppToken->atom = pp->LookUpAddString(tokenText);
             return PpAtomIdentifier;
         case '0':
             ppToken->name[len++] = (char)ch;
@@ -693,13 +691,13 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
             ch = getch();
             while (ch != '"' && ch != '\n' && ch != EndOfInput) {
                 if (len < MaxTokenLength) {
-                    tokenText[len] = (char)ch;
+                    ppToken->name[len] = (char)ch;
                     len++;
                     ch = getch();
                 } else
                     break;
             };
-            tokenText[len] = '\0';
+            ppToken->name[len] = '\0';
             if (ch != '"') {
                 ungetch();
                 pp->parseContext.ppError(ppToken->loc, "End of line in string", "string", "");
@@ -821,7 +819,6 @@ int TPpContext::tokenPaste(int token, TPpToken& ppToken)
         if (strlen(ppToken.name) + strlen(pastedPpToken.name) > MaxTokenLength)
             parseContext.ppError(ppToken.loc, "combined tokens are too long", "##", "");
         strncat(ppToken.name, pastedPpToken.name, MaxTokenLength - strlen(ppToken.name));
-        ppToken.atom = LookUpAddString(ppToken.name);
     }
 
     return resultToken;
diff --git a/glslang/MachineIndependent/preprocessor/PpTokens.cpp b/glslang/MachineIndependent/preprocessor/PpTokens.cpp
index 7cc5f249..471f26c0 100644
--- a/glslang/MachineIndependent/preprocessor/PpTokens.cpp
+++ b/glslang/MachineIndependent/preprocessor/PpTokens.cpp
@@ -218,7 +218,6 @@ int TPpContext::ReadToken(TokenStream& pTok, TPpToken *ppToken)
 
         switch (ltoken) {
         case PpAtomIdentifier:
-            ppToken->atom = LookUpAddString(ppToken->name);
             break;
         case PpAtomConstString:
             break;
diff --git a/glslang/MachineIndependent/preprocessor/PpTokens.h b/glslang/MachineIndependent/preprocessor/PpTokens.h
index c84431d3..7a09327f 100644
--- a/glslang/MachineIndependent/preprocessor/PpTokens.h
+++ b/glslang/MachineIndependent/preprocessor/PpTokens.h
@@ -134,7 +134,6 @@ enum EFixedAtoms {
     // preprocessor "keywords"
 
     PpAtomDefine,
-    PpAtomDefined,
     PpAtomUndef,
 
     PpAtomIf,