Initial commit

2020-12-28 03:35:30 -05:00
commit ef5c22972b
37 changed files with 14826 additions and 0 deletions
--- a/cxxheaderparser/init.py
+++ b/cxxheaderparser/init.py
@@ -0,0 +1,4 @@
+try:
+    from .version import __version__
+except ImportError:
+    __version__ = "master"
--- a/cxxheaderparser/main.py
+++ b/cxxheaderparser/main.py
@@ -0,0 +1,4 @@
+from cxxheaderparser.dump import dumpmain
+
+if __name__ == "__main__":
+    dumpmain()
--- a/cxxheaderparser/_ply/init.py
+++ b/cxxheaderparser/_ply/init.py
--- a/cxxheaderparser/_ply/lex.py
+++ b/cxxheaderparser/_ply/lex.py
@@ -0,0 +1,902 @@
+# fmt: off
+# -----------------------------------------------------------------------------
+# ply: lex.py
+#
+# Copyright (C) 2001-2020
+# David M. Beazley (Dabeaz LLC)
+# All rights reserved.
+#
+# Latest version: https://github.com/dabeaz/ply
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of David Beazley or Dabeaz LLC may be used to
+#   endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+
+import re
+import sys
+import types
+import copy
+import os
+import inspect
+
+# This tuple contains acceptable string types
+StringTypes = (str, bytes)
+
+# This regular expression is used to match valid token names
+_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
+
+# Exception thrown when invalid token encountered and no default error
+# handler is defined.
+class LexError(Exception):
+    def __init__(self, message, s):
+        self.args = (message,)
+        self.text = s
+
+# Token class.  This class is used to represent the tokens produced.
+class LexToken(object):
+    def __repr__(self):
+        return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})'
+
+# This object is a stand-in for a logging object created by the
+# logging module.
+
+class PlyLogger(object):
+    def __init__(self, f):
+        self.f = f
+
+    def critical(self, msg, *args, **kwargs):
+        self.f.write((msg % args) + '\n')
+
+    def warning(self, msg, *args, **kwargs):
+        self.f.write('WARNING: ' + (msg % args) + '\n')
+
+    def error(self, msg, *args, **kwargs):
+        self.f.write('ERROR: ' + (msg % args) + '\n')
+
+    info = critical
+    debug = critical
+
+# -----------------------------------------------------------------------------
+#                        === Lexing Engine ===
+#
+# The following Lexer class implements the lexer runtime.   There are only
+# a few public methods and attributes:
+#
+#    input()          -  Store a new string in the lexer
+#    token()          -  Get the next token
+#    clone()          -  Clone the lexer
+#
+#    lineno           -  Current line number
+#    lexpos           -  Current position in the input string
+# -----------------------------------------------------------------------------
+
+class Lexer:
+    def __init__(self):
+        self.lexre = None             # Master regular expression. This is a list of
+                                      # tuples (re, findex) where re is a compiled
+                                      # regular expression and findex is a list
+                                      # mapping regex group numbers to rules
+        self.lexretext = None         # Current regular expression strings
+        self.lexstatere = {}          # Dictionary mapping lexer states to master regexs
+        self.lexstateretext = {}      # Dictionary mapping lexer states to regex strings
+        self.lexstaterenames = {}     # Dictionary mapping lexer states to symbol names
+        self.lexstate = 'INITIAL'     # Current lexer state
+        self.lexstatestack = []       # Stack of lexer states
+        self.lexstateinfo = None      # State information
+        self.lexstateignore = {}      # Dictionary of ignored characters for each state
+        self.lexstateerrorf = {}      # Dictionary of error functions for each state
+        self.lexstateeoff = {}        # Dictionary of eof functions for each state
+        self.lexreflags = 0           # Optional re compile flags
+        self.lexdata = None           # Actual input data (as a string)
+        self.lexpos = 0               # Current position in input text
+        self.lexlen = 0               # Length of the input text
+        self.lexerrorf = None         # Error rule (if any)
+        self.lexeoff = None           # EOF rule (if any)
+        self.lextokens = None         # List of valid tokens
+        self.lexignore = ''           # Ignored characters
+        self.lexliterals = ''         # Literal characters that can be passed through
+        self.lexmodule = None         # Module
+        self.lineno = 1               # Current line number
+
+    def clone(self, object=None):
+        c = copy.copy(self)
+
+        # If the object parameter has been supplied, it means we are attaching the
+        # lexer to a new object.  In this case, we have to rebind all methods in
+        # the lexstatere and lexstateerrorf tables.
+
+        if object:
+            newtab = {}
+            for key, ritem in self.lexstatere.items():
+                newre = []
+                for cre, findex in ritem:
+                    newfindex = []
+                    for f in findex:
+                        if not f or not f[0]:
+                            newfindex.append(f)
+                            continue
+                        newfindex.append((getattr(object, f[0].__name__), f[1]))
+                newre.append((cre, newfindex))
+                newtab[key] = newre
+            c.lexstatere = newtab
+            c.lexstateerrorf = {}
+            for key, ef in self.lexstateerrorf.items():
+                c.lexstateerrorf[key] = getattr(object, ef.__name__)
+            c.lexmodule = object
+        return c
+
+    # ------------------------------------------------------------
+    # input() - Push a new string into the lexer
+    # ------------------------------------------------------------
+    def input(self, s):
+        self.lexdata = s
+        self.lexpos = 0
+        self.lexlen = len(s)
+
+    # ------------------------------------------------------------
+    # begin() - Changes the lexing state
+    # ------------------------------------------------------------
+    def begin(self, state):
+        if state not in self.lexstatere:
+            raise ValueError(f'Undefined state {state!r}')
+        self.lexre = self.lexstatere[state]
+        self.lexretext = self.lexstateretext[state]
+        self.lexignore = self.lexstateignore.get(state, '')
+        self.lexerrorf = self.lexstateerrorf.get(state, None)
+        self.lexeoff = self.lexstateeoff.get(state, None)
+        self.lexstate = state
+
+    # ------------------------------------------------------------
+    # push_state() - Changes the lexing state and saves old on stack
+    # ------------------------------------------------------------
+    def push_state(self, state):
+        self.lexstatestack.append(self.lexstate)
+        self.begin(state)
+
+    # ------------------------------------------------------------
+    # pop_state() - Restores the previous state
+    # ------------------------------------------------------------
+    def pop_state(self):
+        self.begin(self.lexstatestack.pop())
+
+    # ------------------------------------------------------------
+    # current_state() - Returns the current lexing state
+    # ------------------------------------------------------------
+    def current_state(self):
+        return self.lexstate
+
+    # ------------------------------------------------------------
+    # skip() - Skip ahead n characters
+    # ------------------------------------------------------------
+    def skip(self, n):
+        self.lexpos += n
+
+    # ------------------------------------------------------------
+    # token() - Return the next token from the Lexer
+    #
+    # Note: This function has been carefully implemented to be as fast
+    # as possible.  Don't make changes unless you really know what
+    # you are doing
+    # ------------------------------------------------------------
+    def token(self):
+        # Make local copies of frequently referenced attributes
+        lexpos    = self.lexpos
+        lexlen    = self.lexlen
+        lexignore = self.lexignore
+        lexdata   = self.lexdata
+
+        while lexpos < lexlen:
+            # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
+            if lexdata[lexpos] in lexignore:
+                lexpos += 1
+                continue
+
+            # Look for a regular expression match
+            for lexre, lexindexfunc in self.lexre:
+                m = lexre.match(lexdata, lexpos)
+                if not m:
+                    continue
+
+                # Create a token for return
+                tok = LexToken()
+                tok.value = m.group()
+                tok.lineno = self.lineno
+                tok.lexpos = lexpos
+
+                i = m.lastindex
+                func, tok.type = lexindexfunc[i]
+
+                if not func:
+                    # If no token type was set, it's an ignored token
+                    if tok.type:
+                        self.lexpos = m.end()
+                        return tok
+                    else:
+                        lexpos = m.end()
+                        break
+
+                lexpos = m.end()
+
+                # If token is processed by a function, call it
+
+                tok.lexer = self      # Set additional attributes useful in token rules
+                self.lexmatch = m
+                self.lexpos = lexpos
+                newtok = func(tok)
+                del tok.lexer
+                del self.lexmatch
+
+                # Every function must return a token, if nothing, we just move to next token
+                if not newtok:
+                    lexpos    = self.lexpos         # This is here in case user has updated lexpos.
+                    lexignore = self.lexignore      # This is here in case there was a state change
+                    break
+                return newtok
+            else:
+                # No match, see if in literals
+                if lexdata[lexpos] in self.lexliterals:
+                    tok = LexToken()
+                    tok.value = lexdata[lexpos]
+                    tok.lineno = self.lineno
+                    tok.type = tok.value
+                    tok.lexpos = lexpos
+                    self.lexpos = lexpos + 1
+                    return tok
+
+                # No match. Call t_error() if defined.
+                if self.lexerrorf:
+                    tok = LexToken()
+                    tok.value = self.lexdata[lexpos:]
+                    tok.lineno = self.lineno
+                    tok.type = 'error'
+                    tok.lexer = self
+                    tok.lexpos = lexpos
+                    self.lexpos = lexpos
+                    newtok = self.lexerrorf(tok)
+                    if lexpos == self.lexpos:
+                        # Error method didn't change text position at all. This is an error.
+                        raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}",
+                                       lexdata[lexpos:])
+                    lexpos = self.lexpos
+                    if not newtok:
+                        continue
+                    return newtok
+
+                self.lexpos = lexpos
+                raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}",
+                               lexdata[lexpos:])
+
+        if self.lexeoff:
+            tok = LexToken()
+            tok.type = 'eof'
+            tok.value = ''
+            tok.lineno = self.lineno
+            tok.lexpos = lexpos
+            tok.lexer = self
+            self.lexpos = lexpos
+            newtok = self.lexeoff(tok)
+            return newtok
+
+        self.lexpos = lexpos + 1
+        if self.lexdata is None:
+            raise RuntimeError('No input string given with input()')
+        return None
+
+    # Iterator interface
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        t = self.token()
+        if t is None:
+            raise StopIteration
+        return t
+
+# -----------------------------------------------------------------------------
+#                           ==== Lex Builder ===
+#
+# The functions and classes below are used to collect lexing information
+# and build a Lexer object from it.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# _get_regex(func)
+#
+# Returns the regular expression assigned to a function either as a doc string
+# or as a .regex attribute attached by the @TOKEN decorator.
+# -----------------------------------------------------------------------------
+def _get_regex(func):
+    return getattr(func, 'regex', func.__doc__)
+
+# -----------------------------------------------------------------------------
+# get_caller_module_dict()
+#
+# This function returns a dictionary containing all of the symbols defined within
+# a caller further down the call stack.  This is used to get the environment
+# associated with the yacc() call if none was provided.
+# -----------------------------------------------------------------------------
+def get_caller_module_dict(levels):
+    f = sys._getframe(levels)
+    return { **f.f_globals, **f.f_locals }
+
+# -----------------------------------------------------------------------------
+# _form_master_re()
+#
+# This function takes a list of all of the regex components and attempts to
+# form the master regular expression.  Given limitations in the Python re
+# module, it may be necessary to break the master regex into separate expressions.
+# -----------------------------------------------------------------------------
+def _form_master_re(relist, reflags, ldict, toknames):
+    if not relist:
+        return [], [], []
+    regex = '|'.join(relist)
+    try:
+        lexre = re.compile(regex, reflags)
+
+        # Build the index to function map for the matching engine
+        lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
+        lexindexnames = lexindexfunc[:]
+
+        for f, i in lexre.groupindex.items():
+            handle = ldict.get(f, None)
+            if type(handle) in (types.FunctionType, types.MethodType):
+                lexindexfunc[i] = (handle, toknames[f])
+                lexindexnames[i] = f
+            elif handle is not None:
+                lexindexnames[i] = f
+                if f.find('ignore_') > 0:
+                    lexindexfunc[i] = (None, None)
+                else:
+                    lexindexfunc[i] = (None, toknames[f])
+
+        return [(lexre, lexindexfunc)], [regex], [lexindexnames]
+    except Exception:
+        m = (len(relist) // 2) + 1
+        llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames)
+        rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames)
+        return (llist+rlist), (lre+rre), (lnames+rnames)
+
+# -----------------------------------------------------------------------------
+# def _statetoken(s,names)
+#
+# Given a declaration name s of the form "t_" and a dictionary whose keys are
+# state names, this function returns a tuple (states,tokenname) where states
+# is a tuple of state names and tokenname is the name of the token.  For example,
+# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
+# -----------------------------------------------------------------------------
+def _statetoken(s, names):
+    parts = s.split('_')
+    for i, part in enumerate(parts[1:], 1):
+        if part not in names and part != 'ANY':
+            break
+
+    if i > 1:
+        states = tuple(parts[1:i])
+    else:
+        states = ('INITIAL',)
+
+    if 'ANY' in states:
+        states = tuple(names)
+
+    tokenname = '_'.join(parts[i:])
+    return (states, tokenname)
+
+
+# -----------------------------------------------------------------------------
+# LexerReflect()
+#
+# This class represents information needed to build a lexer as extracted from a
+# user's input file.
+# -----------------------------------------------------------------------------
+class LexerReflect(object):
+    def __init__(self, ldict, log=None, reflags=0):
+        self.ldict      = ldict
+        self.error_func = None
+        self.tokens     = []
+        self.reflags    = reflags
+        self.stateinfo  = {'INITIAL': 'inclusive'}
+        self.modules    = set()
+        self.error      = False
+        self.log        = PlyLogger(sys.stderr) if log is None else log
+
+    # Get all of the basic information
+    def get_all(self):
+        self.get_tokens()
+        self.get_literals()
+        self.get_states()
+        self.get_rules()
+
+    # Validate all of the information
+    def validate_all(self):
+        self.validate_tokens()
+        self.validate_literals()
+        self.validate_rules()
+        return self.error
+
+    # Get the tokens map
+    def get_tokens(self):
+        tokens = self.ldict.get('tokens', None)
+        if not tokens:
+            self.log.error('No token list is defined')
+            self.error = True
+            return
+
+        if not isinstance(tokens, (list, tuple)):
+            self.log.error('tokens must be a list or tuple')
+            self.error = True
+            return
+
+        if not tokens:
+            self.log.error('tokens is empty')
+            self.error = True
+            return
+
+        self.tokens = tokens
+
+    # Validate the tokens
+    def validate_tokens(self):
+        terminals = {}
+        for n in self.tokens:
+            if not _is_identifier.match(n):
+                self.log.error(f"Bad token name {n!r}")
+                self.error = True
+            if n in terminals:
+                self.log.warning(f"Token {n!r} multiply defined")
+            terminals[n] = 1
+
+    # Get the literals specifier
+    def get_literals(self):
+        self.literals = self.ldict.get('literals', '')
+        if not self.literals:
+            self.literals = ''
+
+    # Validate literals
+    def validate_literals(self):
+        try:
+            for c in self.literals:
+                if not isinstance(c, StringTypes) or len(c) > 1:
+                    self.log.error(f'Invalid literal {c!r}. Must be a single character')
+                    self.error = True
+
+        except TypeError:
+            self.log.error('Invalid literals specification. literals must be a sequence of characters')
+            self.error = True
+
+    def get_states(self):
+        self.states = self.ldict.get('states', None)
+        # Build statemap
+        if self.states:
+            if not isinstance(self.states, (tuple, list)):
+                self.log.error('states must be defined as a tuple or list')
+                self.error = True
+            else:
+                for s in self.states:
+                    if not isinstance(s, tuple) or len(s) != 2:
+                        self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s)
+                        self.error = True
+                        continue
+                    name, statetype = s
+                    if not isinstance(name, StringTypes):
+                        self.log.error('State name %r must be a string', name)
+                        self.error = True
+                        continue
+                    if not (statetype == 'inclusive' or statetype == 'exclusive'):
+                        self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name)
+                        self.error = True
+                        continue
+                    if name in self.stateinfo:
+                        self.log.error("State %r already defined", name)
+                        self.error = True
+                        continue
+                    self.stateinfo[name] = statetype
+
+    # Get all of the symbols with a t_ prefix and sort them into various
+    # categories (functions, strings, error functions, and ignore characters)
+
+    def get_rules(self):
+        tsymbols = [f for f in self.ldict if f[:2] == 't_']
+
+        # Now build up a list of functions and a list of strings
+        self.toknames = {}        # Mapping of symbols to token names
+        self.funcsym  = {}        # Symbols defined as functions
+        self.strsym   = {}        # Symbols defined as strings
+        self.ignore   = {}        # Ignore strings by state
+        self.errorf   = {}        # Error functions by state
+        self.eoff     = {}        # EOF functions by state
+
+        for s in self.stateinfo:
+            self.funcsym[s] = []
+            self.strsym[s] = []
+
+        if len(tsymbols) == 0:
+            self.log.error('No rules of the form t_rulename are defined')
+            self.error = True
+            return
+
+        for f in tsymbols:
+            t = self.ldict[f]
+            states, tokname = _statetoken(f, self.stateinfo)
+            self.toknames[f] = tokname
+
+            if hasattr(t, '__call__'):
+                if tokname == 'error':
+                    for s in states:
+                        self.errorf[s] = t
+                elif tokname == 'eof':
+                    for s in states:
+                        self.eoff[s] = t
+                elif tokname == 'ignore':
+                    line = t.__code__.co_firstlineno
+                    file = t.__code__.co_filename
+                    self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__)
+                    self.error = True
+                else:
+                    for s in states:
+                        self.funcsym[s].append((f, t))
+            elif isinstance(t, StringTypes):
+                if tokname == 'ignore':
+                    for s in states:
+                        self.ignore[s] = t
+                    if '\\' in t:
+                        self.log.warning("%s contains a literal backslash '\\'", f)
+
+                elif tokname == 'error':
+                    self.log.error("Rule %r must be defined as a function", f)
+                    self.error = True
+                else:
+                    for s in states:
+                        self.strsym[s].append((f, t))
+            else:
+                self.log.error('%s not defined as a function or string', f)
+                self.error = True
+
+        # Sort the functions by line number
+        for f in self.funcsym.values():
+            f.sort(key=lambda x: x[1].__code__.co_firstlineno)
+
+        # Sort the strings by regular expression length
+        for s in self.strsym.values():
+            s.sort(key=lambda x: len(x[1]), reverse=True)
+
+    # Validate all of the t_rules collected
+    def validate_rules(self):
+        for state in self.stateinfo:
+            # Validate all rules defined by functions
+
+            for fname, f in self.funcsym[state]:
+                line = f.__code__.co_firstlineno
+                file = f.__code__.co_filename
+                module = inspect.getmodule(f)
+                self.modules.add(module)
+
+                tokname = self.toknames[fname]
+                if isinstance(f, types.MethodType):
+                    reqargs = 2
+                else:
+                    reqargs = 1
+                nargs = f.__code__.co_argcount
+                if nargs > reqargs:
+                    self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__)
+                    self.error = True
+                    continue
+
+                if nargs < reqargs:
+                    self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__)
+                    self.error = True
+                    continue
+
+                if not _get_regex(f):
+                    self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__)
+                    self.error = True
+                    continue
+
+                try:
+                    c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags)
+                    if c.match(''):
+                        self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__)
+                        self.error = True
+                except re.error as e:
+                    self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e)
+                    if '#' in _get_regex(f):
+                        self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__)
+                    self.error = True
+
+            # Validate all rules defined by strings
+            for name, r in self.strsym[state]:
+                tokname = self.toknames[name]
+                if tokname == 'error':
+                    self.log.error("Rule %r must be defined as a function", name)
+                    self.error = True
+                    continue
+
+                if tokname not in self.tokens and tokname.find('ignore_') < 0:
+                    self.log.error("Rule %r defined for an unspecified token %s", name, tokname)
+                    self.error = True
+                    continue
+
+                try:
+                    c = re.compile('(?P<%s>%s)' % (name, r), self.reflags)
+                    if (c.match('')):
+                        self.log.error("Regular expression for rule %r matches empty string", name)
+                        self.error = True
+                except re.error as e:
+                    self.log.error("Invalid regular expression for rule %r. %s", name, e)
+                    if '#' in r:
+                        self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name)
+                    self.error = True
+
+            if not self.funcsym[state] and not self.strsym[state]:
+                self.log.error("No rules defined for state %r", state)
+                self.error = True
+
+            # Validate the error function
+            efunc = self.errorf.get(state, None)
+            if efunc:
+                f = efunc
+                line = f.__code__.co_firstlineno
+                file = f.__code__.co_filename
+                module = inspect.getmodule(f)
+                self.modules.add(module)
+
+                if isinstance(f, types.MethodType):
+                    reqargs = 2
+                else:
+                    reqargs = 1
+                nargs = f.__code__.co_argcount
+                if nargs > reqargs:
+                    self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__)
+                    self.error = True
+
+                if nargs < reqargs:
+                    self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__)
+                    self.error = True
+
+        for module in self.modules:
+            self.validate_module(module)
+
+    # -----------------------------------------------------------------------------
+    # validate_module()
+    #
+    # This checks to see if there are duplicated t_rulename() functions or strings
+    # in the parser input file.  This is done using a simple regular expression
+    # match on each line in the source code of the given module.
+    # -----------------------------------------------------------------------------
+
+    def validate_module(self, module):
+        try:
+            lines, linen = inspect.getsourcelines(module)
+        except IOError:
+            return
+
+        fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
+        sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
+
+        counthash = {}
+        linen += 1
+        for line in lines:
+            m = fre.match(line)
+            if not m:
+                m = sre.match(line)
+            if m:
+                name = m.group(1)
+                prev = counthash.get(name)
+                if not prev:
+                    counthash[name] = linen
+                else:
+                    filename = inspect.getsourcefile(module)
+                    self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev)
+                    self.error = True
+            linen += 1
+
+# -----------------------------------------------------------------------------
+# lex(module)
+#
+# Build all of the regular expression rules from definitions in the supplied module
+# -----------------------------------------------------------------------------
+def lex(*, module=None, object=None, debug=False, 
+        reflags=int(re.VERBOSE), debuglog=None, errorlog=None):
+
+    global lexer
+
+    ldict = None
+    stateinfo  = {'INITIAL': 'inclusive'}
+    lexobj = Lexer()
+    global token, input
+
+    if errorlog is None:
+        errorlog = PlyLogger(sys.stderr)
+
+    if debug:
+        if debuglog is None:
+            debuglog = PlyLogger(sys.stderr)
+
+    # Get the module dictionary used for the lexer
+    if object:
+        module = object
+
+    # Get the module dictionary used for the parser
+    if module:
+        _items = [(k, getattr(module, k)) for k in dir(module)]
+        ldict = dict(_items)
+        # If no __file__ attribute is available, try to obtain it from the __module__ instead
+        if '__file__' not in ldict:
+            ldict['__file__'] = sys.modules[ldict['__module__']].__file__
+    else:
+        ldict = get_caller_module_dict(2)
+
+    # Collect parser information from the dictionary
+    linfo = LexerReflect(ldict, log=errorlog, reflags=reflags)
+    linfo.get_all()
+    if linfo.validate_all():
+        raise SyntaxError("Can't build lexer")
+
+    # Dump some basic debugging information
+    if debug:
+        debuglog.info('lex: tokens   = %r', linfo.tokens)
+        debuglog.info('lex: literals = %r', linfo.literals)
+        debuglog.info('lex: states   = %r', linfo.stateinfo)
+
+    # Build a dictionary of valid token names
+    lexobj.lextokens = set()
+    for n in linfo.tokens:
+        lexobj.lextokens.add(n)
+
+    # Get literals specification
+    if isinstance(linfo.literals, (list, tuple)):
+        lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
+    else:
+        lexobj.lexliterals = linfo.literals
+
+    lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals)
+
+    # Get the stateinfo dictionary
+    stateinfo = linfo.stateinfo
+
+    regexs = {}
+    # Build the master regular expressions
+    for state in stateinfo:
+        regex_list = []
+
+        # Add rules defined by functions first
+        for fname, f in linfo.funcsym[state]:
+            regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f)))
+            if debug:
+                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state)
+
+        # Now add all of the simple rules
+        for name, r in linfo.strsym[state]:
+            regex_list.append('(?P<%s>%s)' % (name, r))
+            if debug:
+                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state)
+
+        regexs[state] = regex_list
+
+    # Build the master regular expressions
+
+    if debug:
+        debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====')
+
+    for state in regexs:
+        lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames)
+        lexobj.lexstatere[state] = lexre
+        lexobj.lexstateretext[state] = re_text
+        lexobj.lexstaterenames[state] = re_names
+        if debug:
+            for i, text in enumerate(re_text):
+                debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text)
+
+    # For inclusive states, we need to add the regular expressions from the INITIAL state
+    for state, stype in stateinfo.items():
+        if state != 'INITIAL' and stype == 'inclusive':
+            lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
+            lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
+            lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
+
+    lexobj.lexstateinfo = stateinfo
+    lexobj.lexre = lexobj.lexstatere['INITIAL']
+    lexobj.lexretext = lexobj.lexstateretext['INITIAL']
+    lexobj.lexreflags = reflags
+
+    # Set up ignore variables
+    lexobj.lexstateignore = linfo.ignore
+    lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '')
+
+    # Set up error functions
+    lexobj.lexstateerrorf = linfo.errorf
+    lexobj.lexerrorf = linfo.errorf.get('INITIAL', None)
+    if not lexobj.lexerrorf:
+        errorlog.warning('No t_error rule is defined')
+
+    # Set up eof functions
+    lexobj.lexstateeoff = linfo.eoff
+    lexobj.lexeoff = linfo.eoff.get('INITIAL', None)
+
+    # Check state information for ignore and error rules
+    for s, stype in stateinfo.items():
+        if stype == 'exclusive':
+            if s not in linfo.errorf:
+                errorlog.warning("No error rule is defined for exclusive state %r", s)
+            if s not in linfo.ignore and lexobj.lexignore:
+                errorlog.warning("No ignore rule is defined for exclusive state %r", s)
+        elif stype == 'inclusive':
+            if s not in linfo.errorf:
+                linfo.errorf[s] = linfo.errorf.get('INITIAL', None)
+            if s not in linfo.ignore:
+                linfo.ignore[s] = linfo.ignore.get('INITIAL', '')
+
+    # Create global versions of the token() and input() functions
+    token = lexobj.token
+    input = lexobj.input
+    lexer = lexobj
+
+    return lexobj
+
+# -----------------------------------------------------------------------------
+# runmain()
+#
+# This runs the lexer as a main program
+# -----------------------------------------------------------------------------
+
+def runmain(lexer=None, data=None):
+    if not data:
+        try:
+            filename = sys.argv[1]
+            with open(filename) as f:
+                data = f.read()
+        except IndexError:
+            sys.stdout.write('Reading from standard input (type EOF to end):\n')
+            data = sys.stdin.read()
+
+    if lexer:
+        _input = lexer.input
+    else:
+        _input = input
+    _input(data)
+    if lexer:
+        _token = lexer.token
+    else:
+        _token = token
+
+    while True:
+        tok = _token()
+        if not tok:
+            break
+        sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n')
+
+# -----------------------------------------------------------------------------
+# @TOKEN(regex)
+#
+# This decorator function can be used to set the regex expression on a function
+# when its docstring might need to be set in an alternative way
+# -----------------------------------------------------------------------------
+
+def TOKEN(r):
+    def set_regex(f):
+        if hasattr(r, '__call__'):
+            f.regex = _get_regex(r)
+        else:
+            f.regex = r
+        return f
+    return set_regex
--- a/cxxheaderparser/dump.py
+++ b/cxxheaderparser/dump.py
@@ -0,0 +1,53 @@
+import argparse
+import dataclasses
+import json
+import pprint
+import subprocess
+import sys
+
+from .options import ParserOptions
+from .simple import parse_file
+
+
+def dumpmain():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("header")
+    parser.add_argument(
+        "-w",
+        "--width",
+        default=80,
+        type=int,
+        help="Width of output when in pprint mode",
+    )
+    parser.add_argument("-v", "--verbose", default=False, action="store_true")
+    parser.add_argument(
+        "--mode", choices=["json", "pprint", "repr", "brepr"], default="pprint"
+    )
+
+    args = parser.parse_args()
+
+    options = ParserOptions(verbose=args.verbose)
+    data = parse_file(args.header, options=options)
+
+    if args.mode == "pprint":
+        ddata = dataclasses.asdict(data)
+        pprint.pprint(ddata, width=args.width, compact=True)
+
+    elif args.mode == "json":
+        ddata = dataclasses.asdict(data)
+        json.dump(ddata, sys.stdout, indent=2)
+
+    elif args.mode == "brepr":
+        stmt = repr(data)
+        stmt = subprocess.check_output(
+            ["black", "-", "-q"], input=stmt.encode("utf-8")
+        ).decode("utf-8")
+
+        print(stmt)
+
+    elif args.mode == "repr":
+        print(data)
+
+    else:
+        parser.error("Invalid mode")
--- a/cxxheaderparser/errors.py
+++ b/cxxheaderparser/errors.py
@@ -0,0 +1,12 @@
+import typing
+from .lexer import LexToken
+
+
+class CxxParseError(Exception):
+    """
+    Exception raised when a parsing error occurs
+    """
+
+    def __init__(self, msg: str, tok: typing.Optional[LexToken] = None) -> None:
+        Exception.__init__(self, msg)
+        self.tok = tok
--- a/cxxheaderparser/gentest.py
+++ b/cxxheaderparser/gentest.py
@@ -0,0 +1,98 @@
+import argparse
+import dataclasses
+import inspect
+import subprocess
+
+from .options import ParserOptions
+from .simple import parse_string
+
+
+def nondefault_repr(data):
+    """
+    Similar to the default dataclass repr, but exclude any
+    default parameters or parameters with compare=False
+    """
+
+    is_dataclass = dataclasses.is_dataclass
+    get_fields = dataclasses.fields
+    MISSING = dataclasses.MISSING
+
+    def _inner_repr(o) -> str:
+        if is_dataclass(o):
+            vals = []
+            for f in get_fields(o):
+                if f.repr and f.compare:
+                    v = getattr(o, f.name)
+                    if f.default_factory is not MISSING:
+                        default = f.default_factory()
+                    else:
+                        default = f.default
+
+                    if v != default:
+                        vals.append(f"{f.name}={_inner_repr(v)}")
+
+            return f"{o.__class__.__qualname__ }({', '.join(vals)})"
+
+        elif isinstance(o, list):
+            return f"[{','.join(_inner_repr(l) for l in o)}]"
+        elif isinstance(o, dict):
+            vals = []
+            for k, v in o.items():
+                vals.append(f'"{k}": {_inner_repr(v)}')
+            return "{" + ",".join(vals) + "}"
+        else:
+            return repr(o)
+
+    return _inner_repr(data)
+
+
+def gentest(infile: str, name: str, outfile: str, verbose: bool):
+    # Goal is to allow making a unit test as easy as running this dumper
+    # on a file and copy/pasting this into a test
+
+    with open(infile, "r") as fp:
+        content = fp.read()
+
+    options = ParserOptions(verbose=verbose)
+
+    data = parse_string(content, options=options)
+
+    stmt = nondefault_repr(data)
+
+    content = content.replace("\n", "\n              ")
+
+    stmt = inspect.cleandoc(
+        f'''
+    
+        def test_{name}():
+            content = """
+              {content}
+            """
+            data = parse_string(content, cleandoc=True)
+
+            assert data == {stmt}
+    
+    '''
+    )
+
+    # format it with black
+    stmt = subprocess.check_output(
+        ["black", "-", "-q"], input=stmt.encode("utf-8")
+    ).decode("utf-8")
+
+    if outfile == "-":
+        print(stmt)
+    else:
+        with open(outfile, "w") as fp:
+            fp.write(stmt)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("header")
+    parser.add_argument("name", nargs="?", default="TODO")
+    parser.add_argument("-v", "--verbose", default=False, action="store_true")
+    parser.add_argument("-o", "--output", default="-")
+    args = parser.parse_args()
+
+    gentest(args.header, args.name, args.output, args.verbose)
--- a/cxxheaderparser/lexer.py
+++ b/cxxheaderparser/lexer.py
@@ -0,0 +1,425 @@
+from collections import deque
+import re
+import typing
+import sys
+
+
+from ._ply import lex
+
+
+if sys.version_info >= (3, 8):
+    Protocol = typing.Protocol
+else:
+    Protocol = object
+
+_line_re = re.compile(r'^#line (\d+) "(.*)"')
+_multicomment_re = re.compile("\n[\\s]+\\*")
+
+
+class Location(typing.NamedTuple):
+    """
+    Location that something was found at, takes #line directives into account
+    """
+
+    filename: str
+    lineno: int
+
+
+class LexToken(Protocol):
+    """
+    Token as emitted by PLY and modified by our lexer
+    """
+
+    #: Lexer type for this token
+    type: str
+
+    #: Raw value for this token
+    value: str
+
+    lineno: int
+    lexpos: int
+
+    #: Location token was found at
+    location: Location
+
+
+class Lexer:
+
+    keywords = {
+        "__attribute__",
+        "alignas",
+        "alignof",
+        "asm",
+        "auto",
+        "bool",
+        "break",
+        "case",
+        "catch",
+        "char",
+        "char8_t",
+        "char16_t",
+        "char32_t",
+        "class",
+        "const",
+        "constexpr",
+        "const_cast",
+        "continue",
+        "decltype",
+        "__declspec",
+        "default",
+        "delete",
+        "do",
+        "double",
+        "dynamic_cast",
+        "else",
+        "enum",
+        "explicit",
+        "export",
+        "extern",
+        "false",
+        "final",
+        "float",
+        "for",
+        "friend",
+        "goto",
+        "if",
+        "inline",
+        "int",
+        "long",
+        "mutable",
+        "namespace",
+        "new",
+        "noexcept",
+        "nullptr",
+        "nullptr_t",  # not a keyword, but makes things easier
+        "operator",
+        "private",
+        "protected",
+        "public",
+        "register",
+        "reinterpret_cast",
+        "return",
+        "short",
+        "signed",
+        "sizeof",
+        "static",
+        "static_assert",
+        "static_cast",
+        "struct",
+        "switch",
+        "template",
+        "this",
+        "thread_local",
+        "throw",
+        "true",
+        "try",
+        "typedef",
+        "typeid",
+        "typename",
+        "union",
+        "unsigned",
+        "using",
+        "virtual",
+        "void",
+        "volatile",
+        "wchar_t",
+        "while",
+    }
+
+    tokens = [
+        "NUMBER",
+        "FLOAT_NUMBER",
+        "NAME",
+        "COMMENT_SINGLELINE",
+        "COMMENT_MULTILINE",
+        "PRECOMP_MACRO",
+        "DIVIDE",
+        "CHAR_LITERAL",
+        "STRING_LITERAL",
+        "NEWLINE",
+        "ELLIPSIS",
+        "DBL_LBRACKET",
+        "DBL_RBRACKET",
+        "DBL_COLON",
+        "DBL_AMP",
+        "SHIFT_LEFT",
+    ] + list(keywords)
+
+    literals = [
+        "<",
+        ">",
+        "(",
+        ")",
+        "{",
+        "}",
+        "[",
+        "]",
+        ";",
+        ":",
+        ",",
+        "\\",
+        "|",
+        "%",
+        "^",
+        "!",
+        "*",
+        "-",
+        "+",
+        "&",
+        "=",
+        "'",
+        ".",
+    ]
+
+    t_ignore = " \t\r?@\f"
+    t_NUMBER = r"[0-9][0-9XxA-Fa-f]*"
+    t_FLOAT_NUMBER = r"[-+]?[0-9]*\.[0-9]+([eE][-+]?[0-9]+)?"
+
+    def t_NAME(self, t):
+        r"[A-Za-z_~][A-Za-z0-9_]*"
+        if t.value in self.keywords:
+            t.type = t.value
+        return t
+
+    def t_PRECOMP_MACRO(self, t):
+        r"\#.*"
+        m = _line_re.match(t.value)
+        if m:
+            filename = m.group(2)
+            if filename not in self._filenames_set:
+                self.filenames.append(filename)
+                self._filenames_set.add(filename)
+            self.filename = filename
+
+            self.line_offset = 1 + self.lex.lineno - int(m.group(1))
+
+        else:
+            return t
+
+    def t_COMMENT_SINGLELINE(self, t):
+        r"\/\/.*\n?"
+        if t.value.startswith("///") or t.value.startswith("//!"):
+            self.comments.append(t.value.lstrip("\t ").rstrip("\n"))
+        t.lexer.lineno += t.value.count("\n")
+        return t
+
+    t_DIVIDE = r"/(?!/)"
+    t_CHAR_LITERAL = "'.'"
+    t_ELLIPSIS = r"\.\.\."
+    t_DBL_LBRACKET = r"\[\["
+    t_DBL_RBRACKET = r"\]\]"
+    t_DBL_COLON = r"::"
+    t_DBL_AMP = r"&&"
+    t_SHIFT_LEFT = r"<<"
+    # SHIFT_RIGHT introduces ambiguity
+
+    # found at http://wordaligned.org/articles/string-literals-and-regular-expressions
+    # TODO: This does not work with the string "bla \" bla"
+    t_STRING_LITERAL = r'"([^"\\]|\\.)*"'
+
+    # Found at http://ostermiller.org/findcomment.html
+    def t_COMMENT_MULTILINE(self, t):
+        r"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/\n?"
+        if t.value.startswith("/**") or t.value.startswith("/*!"):
+            # not sure why, but get double new lines
+            v = t.value.replace("\n\n", "\n")
+            # strip prefixing whitespace
+            v = _multicomment_re.sub("\n*", v)
+            self.comments = v.splitlines()
+        t.lexer.lineno += t.value.count("\n")
+        return t
+
+    def t_NEWLINE(self, t):
+        r"\n+"
+        t.lexer.lineno += len(t.value)
+        del self.comments[:]
+        return t
+
+    def t_error(self, v):
+        print("Lex error: ", v)
+
+    _lexer = None
+
+    def __new__(cls, *args, **kwargs):
+        # only build the lexer once
+        inst = super().__new__(cls)
+        if cls._lexer is None:
+            cls._lexer = lex.lex(module=inst)
+
+        inst.lex = cls._lexer.clone(inst)
+        return inst
+
+    def __init__(self, filename: typing.Optional[str] = None):
+        self.input = self.lex.input
+
+        # For tracking current file/line position
+        self.filename = filename
+        self.line_offset = 0
+
+        self.filenames = []
+        self._filenames_set = set()
+
+        if self.filename:
+            self.filenames.append(filename)
+            self._filenames_set.add(filename)
+
+        # Doxygen comments
+        self.comments = []
+
+        self.lookahead = typing.Deque[LexToken]()
+
+    def current_location(self) -> Location:
+        if self.lookahead:
+            return self.lookahead[0].location
+        return Location(self.filename, self.lex.lineno - self.line_offset)
+
+    def get_doxygen(self) -> typing.Optional[str]:
+        """
+        This should be called after the first element of something has
+        been consumed.
+
+        It will lookahead for comments that come after the item, if prior
+        comments don't exist.
+        """
+
+        # Assumption: This function is either called at the beginning of a
+        # statement or at the end of a statement
+
+        if self.comments:
+            comments = self.comments
+        else:
+            comments = []
+            # only look for comments until a newline (including lookahead)
+            for tok in self.lookahead:
+                if tok.type == "NEWLINE":
+                    return None
+
+            while True:
+                tok = self.lex.token()
+                comments.extend(self.comments)
+
+                if tok is None:
+                    break
+
+                tok.location = Location(self.filename, tok.lineno - self.line_offset)
+                ttype = tok.type
+                if ttype == "NEWLINE":
+                    self.lookahead.append(tok)
+                    break
+
+                if ttype not in self._discard_types:
+                    self.lookahead.append(tok)
+
+                if ttype == "NAME":
+                    break
+
+                del self.comments[:]
+
+        comments = "\n".join(comments)
+        del self.comments[:]
+        if comments:
+            return comments
+
+        return None
+
+    _discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"}
+
+    def token(self) -> LexToken:
+        tok = None
+        while self.lookahead:
+            tok = self.lookahead.popleft()
+            if tok.type not in self._discard_types:
+                return tok
+
+        while True:
+            tok = self.lex.token()
+            if tok is None:
+                raise EOFError("unexpected end of file")
+
+            if tok.type not in self._discard_types:
+                tok.location = Location(self.filename, tok.lineno - self.line_offset)
+                break
+
+        return tok
+
+    def token_eof_ok(self) -> typing.Optional[LexToken]:
+        tok = None
+        while self.lookahead:
+            tok = self.lookahead.popleft()
+            if tok.type not in self._discard_types:
+                return tok
+
+        while True:
+            tok = self.lex.token()
+            if tok is None:
+                break
+
+            if tok.type not in self._discard_types:
+                tok.location = Location(self.filename, tok.lineno - self.line_offset)
+                break
+
+        return tok
+
+    def token_if(self, *types: str) -> typing.Optional[LexToken]:
+        tok = self.token_eof_ok()
+        if tok is None:
+            return None
+        if tok.type not in types:
+            # put it back on the left in case it was retrieved
+            # from the lookahead buffer
+            self.lookahead.appendleft(tok)
+            return None
+        return tok
+
+    def token_if_in_set(self, types: typing.Set[str]) -> typing.Optional[LexToken]:
+        tok = self.token_eof_ok()
+        if tok is None:
+            return None
+        if tok.type not in types:
+            # put it back on the left in case it was retrieved
+            # from the lookahead buffer
+            self.lookahead.appendleft(tok)
+            return None
+        return tok
+
+    def token_if_val(self, *vals: str) -> typing.Optional[LexToken]:
+        tok = self.token_eof_ok()
+        if tok is None:
+            return None
+        if tok.value not in vals:
+            # put it back on the left in case it was retrieved
+            # from the lookahead buffer
+            self.lookahead.appendleft(tok)
+            return None
+        return tok
+
+    def token_if_not(self, *types: str) -> typing.Optional[LexToken]:
+        tok = self.token_eof_ok()
+        if tok is None:
+            return None
+        if tok.type in types:
+            # put it back on the left in case it was retrieved
+            # from the lookahead buffer
+            self.lookahead.appendleft(tok)
+            return None
+        return tok
+
+    def token_peek_if(self, *types: str) -> bool:
+        tok = self.token_eof_ok()
+        if not tok:
+            return False
+        self.lookahead.appendleft(tok)
+        return tok.type in types
+
+    def return_token(self, tok: LexToken) -> None:
+        self.lookahead.appendleft(tok)
+
+    def return_tokens(self, toks: typing.Iterable[LexToken]) -> None:
+        self.lookahead.extendleft(reversed(toks))
+
+
+if __name__ == "__main__":
+    try:
+        lex.runmain(lexer=Lexer(None))
+    except EOFError:
+        pass
--- a/cxxheaderparser/options.py
+++ b/cxxheaderparser/options.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class ParserOptions:
+    """
+    Options that control parsing behaviors
+    """
+
+    #: If true, prints out
+    verbose: bool = False
+
+    #: If true, converts a single void parameter to zero parameters
+    convert_void_to_zero_params: bool = True
--- a/cxxheaderparser/parser.py
+++ b/cxxheaderparser/parser.py
--- a/cxxheaderparser/parserstate.py
+++ b/cxxheaderparser/parserstate.py
@@ -0,0 +1,114 @@
+import typing
+
+if typing.TYPE_CHECKING:
+    from .visitor import CxxVisitor
+
+from .errors import CxxParseError
+from .lexer import LexToken, Location
+from .types import ClassDecl, NamespaceDecl
+
+
+class ParsedTypeModifiers(typing.NamedTuple):
+    vars: typing.Dict[str, LexToken]  # only found on variables
+    both: typing.Dict[str, LexToken]  # found on either variables or functions
+    meths: typing.Dict[str, LexToken]  # only found on methods
+
+    def validate(self, *, var_ok: bool, meth_ok: bool, msg: str) -> None:
+        # Almost there! Do any checks the caller asked for
+        if not var_ok and self.vars:
+            for tok in self.vars.values():
+                raise CxxParseError(f"{msg}: unexpected '{tok.value}'")
+
+        if not meth_ok and self.meths:
+            for tok in self.meths.values():
+                raise CxxParseError(f"{msg}: unexpected '{tok.value}'")
+
+        if not meth_ok and not var_ok and self.both:
+            for tok in self.both.values():
+                raise CxxParseError(f"{msg}: unexpected '{tok.value}'")
+
+
+class State:
+
+    #: parent state
+    parent: typing.Optional["State"]
+
+    def __init__(self, parent: typing.Optional["State"]) -> None:
+        self.parent = parent
+
+    def _finish(self, visitor: "CxxVisitor") -> None:
+        pass
+
+
+class BlockState(State):
+
+    #: Approximate location that the parsed element was found at
+    location: Location
+
+
+class EmptyBlockState(BlockState):
+    def _finish(self, visitor: "CxxVisitor") -> None:
+        visitor.on_empty_block_end(self)
+
+
+class ExternBlockState(BlockState):
+
+    #: The linkage for this extern block
+    linkage: str
+
+    def __init__(self, parent: typing.Optional[State], linkage: str) -> None:
+        super().__init__(parent)
+        self.linkage = linkage
+
+    def _finish(self, visitor: "CxxVisitor"):
+        visitor.on_extern_block_end(self)
+
+
+class NamespaceBlockState(BlockState):
+
+    #: The incremental namespace for this block
+    namespace: NamespaceDecl
+
+    def __init__(
+        self, parent: typing.Optional[State], namespace: NamespaceDecl
+    ) -> None:
+        super().__init__(parent)
+        self.namespace = namespace
+
+    def _finish(self, visitor: "CxxVisitor") -> None:
+        visitor.on_namespace_end(self)
+
+
+class ClassBlockState(BlockState):
+
+    #: class decl block being processed
+    class_decl: ClassDecl
+
+    #: Current access level for items encountered
+    access: str
+
+    #: Currently parsing as a typedef
+    typedef: bool
+
+    #: modifiers to apply to following variables
+    mods: ParsedTypeModifiers
+
+    def __init__(
+        self,
+        parent: typing.Optional[State],
+        class_decl: ClassDecl,
+        access: str,
+        typedef: bool,
+        mods: ParsedTypeModifiers,
+    ) -> None:
+        super().__init__(parent)
+        self.class_decl = class_decl
+        self.access = access
+        self.typedef = typedef
+        self.mods = mods
+
+    def _set_access(self, access: str) -> None:
+        self.access = access
+
+    def _finish(self, visitor: "CxxVisitor") -> None:
+        visitor.on_class_end(self)
--- a/cxxheaderparser/simple.py
+++ b/cxxheaderparser/simple.py
@@ -0,0 +1,294 @@
+"""
+
+The simple parser/collector iterates over the C++ file and returns a data
+structure with all elements in it. Not quite as flexible as implementing
+your own parser listener, but you can accomplish most things with it.
+
+cxxheaderparser's unit tests predominantly use the simple API for parsing,
+so you can expect it to be pretty stable.
+
+"""
+
+import inspect
+import typing
+
+
+from dataclasses import dataclass, field
+
+from .types import (
+    ClassDecl,
+    EnumDecl,
+    Field,
+    ForwardDecl,
+    FriendDecl,
+    Function,
+    Method,
+    Typedef,
+    UsingAlias,
+    UsingDecl,
+    Variable,
+)
+
+from .parserstate import (
+    State,
+    EmptyBlockState,
+    ClassBlockState,
+    ExternBlockState,
+    NamespaceBlockState,
+)
+from .parser import CxxParser
+from .options import ParserOptions
+
+#
+# Data structure
+#
+
+
+@dataclass
+class ClassScope:
+
+    class_decl: ClassDecl
+
+    #: Nested classes
+    classes: typing.List["ClassScope"] = field(default_factory=list)
+    enums: typing.List[EnumDecl] = field(default_factory=list)
+    fields: typing.List[Field] = field(default_factory=list)
+    friends: typing.List[FriendDecl] = field(default_factory=list)
+    methods: typing.List[Method] = field(default_factory=list)
+    typedefs: typing.List[Typedef] = field(default_factory=list)
+
+    forward_decls: typing.List[ForwardDecl] = field(default_factory=list)
+    using: typing.List[UsingDecl] = field(default_factory=list)
+    using_alias: typing.List[UsingAlias] = field(default_factory=list)
+
+
+@dataclass
+class NamespaceScope:
+
+    name: str = ""
+
+    classes: typing.List["ClassScope"] = field(default_factory=list)
+    enums: typing.List[EnumDecl] = field(default_factory=list)
+    functions: typing.List[Method] = field(default_factory=list)
+    typedefs: typing.List[Typedef] = field(default_factory=list)
+    variables: typing.List[Variable] = field(default_factory=list)
+
+    forward_decls: typing.List[ForwardDecl] = field(default_factory=list)
+    using: typing.List[UsingDecl] = field(default_factory=list)
+    using_ns: typing.List[UsingDecl] = field(default_factory=list)
+    using_alias: typing.List[UsingAlias] = field(default_factory=list)
+
+    #: Child namespaces
+    namespaces: typing.Dict[str, "NamespaceScope"] = field(default_factory=dict)
+
+
+Block = typing.Union[ClassScope, NamespaceScope]
+
+
+@dataclass
+class Define:
+    content: str
+
+
+@dataclass
+class Pragma:
+    content: str
+
+
+@dataclass
+class Include:
+    #: The filename includes the surrounding ``<>`` or ``"``
+    filename: str
+
+
+@dataclass
+class UsingNamespace:
+    ns: str
+
+
+@dataclass
+class ParsedData:
+
+    namespace: NamespaceScope = field(default_factory=lambda: NamespaceScope())
+
+    defines: typing.List[Define] = field(default_factory=list)
+    pragmas: typing.List[Pragma] = field(default_factory=list)
+    includes: typing.List[Include] = field(default_factory=list)
+
+
+#
+# Visitor implementation
+#
+
+
+class SimpleCxxVisitor:
+    """
+    A simple visitor that stores all of the C++ elements passed to it
+    in an "easy" to use data structure
+
+    .. warning:: Names are not resolved, so items are stored in the scope that
+                 they are found. For example:
+
+                 .. code-block:: c++
+
+                    namespace N {
+                        class C;
+                    }
+
+                    class N::C {
+                        void fn();
+                    };
+
+                The 'C' class would be a forward declaration in the 'N' namespace,
+                but the ClassDecl for 'C' would be stored in the global
+                namespace instead of the 'N' namespace.
+    """
+
+    data: ParsedData
+    namespace: NamespaceScope
+    block: Block
+
+    def __init__(self):
+        self.namespace = NamespaceScope("")
+        self.block = self.namespace
+
+        self.ns_stack = typing.Deque[NamespaceScope]()
+        self.block_stack = typing.Deque[Block]()
+
+        self.data = ParsedData(self.namespace)
+
+    def on_define(self, state: State, content: str) -> None:
+        self.data.defines.append(Define(content))
+
+    def on_pragma(self, state: State, content: str) -> None:
+        self.data.pragmas.append(Pragma(content))
+
+    def on_include(self, state: State, filename: str) -> None:
+        self.data.includes.append(Include(filename))
+
+    def on_empty_block_start(self, state: EmptyBlockState) -> None:
+        # this matters for some scope/resolving purposes, but you're
+        # probably going to want to use clang if you care about that
+        # level of detail
+        pass
+
+    def on_empty_block_end(self, state: EmptyBlockState) -> None:
+        pass
+
+    def on_extern_block_start(self, state: ExternBlockState) -> None:
+        pass  # TODO
+
+    def on_extern_block_end(self, state: ExternBlockState) -> None:
+        pass
+
+    def on_namespace_start(self, state: NamespaceBlockState) -> None:
+        parent_ns = self.namespace
+        self.block_stack.append(parent_ns)
+        self.ns_stack.append(parent_ns)
+
+        ns = None
+        names = state.namespace.names
+        if not names:
+            # all anonymous namespaces in a translation unit are the same
+            names = [""]
+
+        for name in names:
+            ns = parent_ns.namespaces.get(name)
+            if ns is None:
+                ns = NamespaceScope(name)
+                parent_ns.namespaces[name] = ns
+            parent_ns = ns
+
+        self.block = ns
+        self.namespace = ns
+
+    def on_namespace_end(self, state: NamespaceBlockState) -> None:
+        self.block = self.block_stack.pop()
+        self.namespace = self.ns_stack.pop()
+
+    def on_forward_decl(self, state: State, fdecl: ForwardDecl) -> None:
+        self.block.forward_decls.append(fdecl)
+
+    def on_variable(self, state: State, v: Variable) -> None:
+        self.block.variables.append(v)
+
+    def on_function(self, state: State, fn: Function) -> None:
+        self.block.functions.append(fn)
+
+    def on_typedef(self, state: State, typedef: Typedef) -> None:
+        self.block.typedefs.append(typedef)
+
+    def on_using_namespace(self, state: State, namespace: typing.List[str]) -> None:
+        ns = UsingNamespace("::".join(namespace))
+        self.block.using_ns.append(ns)
+
+    def on_using_alias(self, state: State, using: UsingAlias):
+        self.block.using_alias.append(using)
+
+    def on_using_declaration(self, state: State, using: UsingDecl) -> None:
+        self.block.using.append(using)
+
+    #
+    # Enums
+    #
+
+    def on_enum(self, state: State, enum: EnumDecl) -> None:
+        self.block.enums.append(enum)
+
+    #
+    # Class/union/struct
+    #
+
+    def on_class_start(self, state: ClassBlockState) -> None:
+        block = ClassScope(state.class_decl)
+        self.block.classes.append(block)
+        self.block_stack.append(self.block)
+        self.block = block
+
+    def on_class_field(self, state: State, f: Field) -> None:
+        self.block.fields.append(f)
+
+    def on_class_method(self, state: ClassBlockState, method: Method) -> None:
+        self.block.methods.append(method)
+
+    def on_class_friend(self, state: ClassBlockState, friend: FriendDecl):
+        self.block.friends.append(friend)
+
+    def on_class_end(self, state: ClassBlockState) -> None:
+        self.block = self.block_stack.pop()
+
+
+def parse_string(
+    content: str,
+    *,
+    filename="<str>",
+    options: typing.Optional[ParserOptions] = None,
+    cleandoc: bool = False,
+) -> ParsedData:
+    """
+    Simple function to parse a header and return a data structure
+    """
+    if cleandoc:
+        content = inspect.cleandoc(content)
+
+    visitor = SimpleCxxVisitor()
+    parser = CxxParser(filename, content, visitor, options)
+    parser.parse()
+
+    return visitor.data
+
+
+def parse_file(
+    filename: str,
+    encoding: typing.Optional[str] = None,
+    *,
+    options: typing.Optional[ParserOptions] = None,
+) -> ParsedData:
+    """
+    Simple function to parse a header from a file and return a data structure
+    """
+
+    with open(filename, encoding=encoding) as fp:
+        content = fp.read()
+
+    return parse_string(content, filename=filename, options=options)
--- a/cxxheaderparser/tokfmt.py
+++ b/cxxheaderparser/tokfmt.py
@@ -0,0 +1,74 @@
+import typing
+
+from .lexer import Lexer
+from .types import Token
+
+# key: token type, value: (left spacing, right spacing)
+_want_spacing = {
+    "NUMBER": (2, 2),
+    "FLOAT_NUMBER": (2, 2),
+    "NAME": (2, 2),
+    "CHAR_LITERAL": (2, 2),
+    "STRING_LITERAL": (2, 2),
+    "ELLIPSIS": (2, 2),
+    ">": (0, 2),
+    ")": (0, 1),
+    "(": (1, 0),
+    ",": (0, 3),
+    "*": (1, 2),
+    "&": (0, 2),
+}
+
+_want_spacing.update(dict.fromkeys(Lexer.keywords, (2, 2)))
+
+
+def tokfmt(toks: typing.List[Token]) -> str:
+    """
+    Helper function that takes a list of tokens and converts them to a string
+    """
+    last = 0
+    vals = []
+    default = (0, 0)
+    ws = _want_spacing
+
+    for tok in toks:
+        value = tok.value
+        # special case
+        if value == "operator":
+            l, r = 2, 0
+        else:
+            l, r = ws.get(tok.type, default)
+        if l + last >= 3:
+            vals.append(" ")
+
+        last = r
+        vals.append(value)
+
+    return "".join(vals)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("header")
+    args = parser.parse_args()
+
+    lexer = Lexer(args.header)
+    with open(lexer.filename) as fp:
+        lexer.input(fp.read())
+
+    toks = []
+    while True:
+        tok = lexer.token_eof_ok()
+        if not tok:
+            break
+        if tok.type == ";":
+            print(toks)
+            print(tokfmt(toks))
+            toks = []
+        else:
+            toks.append(tok)
+
+    print(toks)
+    print(tokfmt(toks))
--- a/cxxheaderparser/types.py
+++ b/cxxheaderparser/types.py
@@ -0,0 +1,650 @@
+import typing
+from dataclasses import dataclass, field
+
+
+@dataclass
+class Token:
+    """
+    In an ideal world, this Token class would not be exposed via the user
+    visible API. Unfortunately, getting to that point would take a significant
+    amount of effort.
+
+    It is not expected that these will change, but they might.
+
+    At the moment, the only supported use of Token objects are in conjunction
+    with the ``tokfmt`` function. As this library matures, we'll try to clarify
+    the expectations around these. File an issue on github if you have ideas!
+    """
+
+    #: Raw value of the token
+    value: str
+
+    #: Lex type of the token
+    type: str = field(repr=False, compare=False, default="")
+
+
+@dataclass
+class Value:
+    """
+    A unparsed list of tokens
+
+    .. code-block:: c++
+
+        int x = 0x1337;
+                ~~~~~~
+    """
+
+    #: Tokens corresponding to the value
+    tokens: typing.List[Token]
+
+
+@dataclass
+class NamespaceDecl:
+    """
+    Namespace declarations
+
+    .. code-block:: c++
+
+        namespace foo::bar {}
+                  ~~~~~~~~
+    """
+
+    #: These are the names (split by ::) for this namespace declaration,
+    #: but does not include any parent namespace names
+    #:
+    #: An anonymous namespace is an empty list
+    names: typing.List[str]
+    inline: bool = False
+
+
+@dataclass
+class DecltypeSpecifier:
+    """
+    Contents of a decltype (inside the parentheses)
+
+    .. code-block:: c++
+
+        decltype(Foo::Bar)
+                 ~~~~~~~~
+    """
+
+    #: Unparsed tokens within the decltype
+    tokens: typing.List[Token]
+
+
+@dataclass
+class FundamentalSpecifier:
+    """
+    A specifier that only contains fundamental types
+    """
+
+    name: str
+
+
+@dataclass
+class NameSpecifier:
+    """
+    An individual segment of a type name
+
+    .. code-block:: c++
+
+        Foo::Bar
+        ~~~
+
+    """
+
+    name: str
+
+    specialization: typing.Optional[typing.List["TemplateSpecialization"]] = None
+
+
+@dataclass
+class AutoSpecifier:
+    """
+    Used for an auto return type
+    """
+
+    name: str = "auto"
+
+
+@dataclass
+class AnonymousName:
+    """
+    A name for an anonymous class, such as in a typedef. There is no string
+    associated with this name, only an integer id. Things that share the same
+    anonymous name have anonymous name instances with the same id
+    """
+
+    #: Unique id associated with this name (only unique per parser instance!)
+    id: int
+
+
+PQNameSegment = typing.Union[
+    AnonymousName, FundamentalSpecifier, NameSpecifier, DecltypeSpecifier, AutoSpecifier
+]
+
+
+@dataclass
+class PQName:
+    """
+    Possibly qualified name of a C++ type.
+    """
+
+    #: All of the segments of the name. This is always guaranteed to have at
+    #: least one element in it. Name is segmented by '::'
+    #:
+    #: If a name refers to the global namespace, the first segment will be an
+    #: empty NameSpecifier
+    segments: typing.List[PQNameSegment]
+
+    #: Set if the name starts with class/enum/struct
+    classkey: typing.Optional[str] = None
+
+
+@dataclass
+class Enumerator:
+    """
+    An individual value of an enumeration
+    """
+
+    #: The enumerator key name
+    name: str
+
+    #: None if not explicitly specified
+    value: typing.Optional[Value] = None
+
+    #: Documentation if present
+    doxygen: typing.Optional[str] = None
+
+
+@dataclass
+class EnumDecl:
+    """
+    An enumeration type
+    """
+
+    typename: PQName
+
+    values: typing.List[Enumerator]
+
+    base: typing.Optional[PQName] = None
+
+    #: Documentation if present
+    doxygen: typing.Optional[str] = None
+
+    #: If within a class, the access level for this decl
+    access: typing.Optional[str] = None
+
+
+@dataclass
+class TemplateArgument:
+    """
+    A single argument for a template specialization
+
+    .. code-block:: c++
+
+        Foo<int, Bar...>
+            ~~~
+
+    """
+
+    #: This contains unparsed arbitrary expressions, including additional
+    #: specializations or decltypes or whatever
+    tokens: typing.List[Token]
+
+
+@dataclass
+class TemplateSpecialization:
+    """
+    Contains the arguments of a template specialization
+
+    .. code-block:: c++s
+
+        Foo<int, Bar...>
+            ~~~~~~~~~~~
+
+    """
+
+    args: typing.List[TemplateArgument]
+
+    #: If True, indicates a parameter pack (...) on the last parameter
+    param_pack: bool = False
+
+
+@dataclass
+class FunctionType:
+    """
+    A function type, currently only used in a function pointer
+
+    .. note:: There can only be one of FunctionType or Type in a DecoratedType
+              chain
+    """
+
+    return_type: "DecoratedType"
+    parameters: typing.List["Parameter"]
+
+    #: If a member function pointer
+    # TODO classname: typing.Optional[PQName]
+
+    #: Set to True if ends with ``...``
+    vararg: bool = False
+
+
+@dataclass
+class Type:
+    """"""
+
+    typename: PQName
+
+    const: bool = False
+    volatile: bool = False
+
+    def get_type(self) -> "Type":
+        return self
+
+
+@dataclass
+class Array:
+    """
+    Information about an array. Multidimensional arrays are represented as
+    an array of array.
+    """
+
+    #: The type that this is an array of
+    array_of: typing.Union["Array", "Pointer", Type]
+
+    #: Size of the array
+    #:
+    #: .. code-block:: c++
+    #:
+    #:    int x[10];
+    #:          ~~
+    size: typing.Optional[Value]
+
+    def get_type(self) -> Type:
+        return self.array_of.get_type()
+
+
+@dataclass
+class Pointer:
+    """
+    A pointer
+    """
+
+    #: Thing that this points to
+    ptr_to: typing.Union[Array, FunctionType, "Pointer", Type]
+
+    const: bool = False
+    volatile: bool = False
+
+    def get_type(self) -> Type:
+        return self.ptr_to.get_type()
+
+
+@dataclass
+class Reference:
+    """
+    A lvalue (``&``) reference
+    """
+
+    ref_to: typing.Union[Array, Pointer, Type]
+
+    def get_type(self) -> Type:
+        return self.ref_to.get_type()
+
+
+@dataclass
+class MoveReference:
+    """
+    An rvalue (``&&``) reference
+    """
+
+    moveref_to: typing.Union[Array, Pointer, Type]
+
+    def get_type(self) -> Type:
+        return self.moveref_to.get_type()
+
+
+#: A type or function type that is decorated with various things
+#:
+#: .. note:: There can only be one of FunctionType or Type in a DecoratedType
+#:           chain
+DecoratedType = typing.Union[Array, Pointer, MoveReference, Reference, Type]
+
+
+@dataclass
+class TemplateNonTypeParam:
+    """
+
+    .. code-block:: c++
+
+       template <int T>
+                 ~~~~~
+
+       template <class T, typename T::type* U>
+                          ~~~~~~~~~~~~~~~~~~~
+
+       template <auto T>
+                 ~~~~~~
+    """
+
+    type: DecoratedType
+    name: typing.Optional[str] = None
+    default: typing.Optional[Value] = None
+
+    #: Contains a ``...``
+    param_pack: bool = False
+
+
+@dataclass
+class TemplateTypeParam:
+    """
+
+    .. code-block:: c++
+
+       template <typename T>
+                 ~~~~~~~~~~
+    """
+
+    #: 'typename' or 'class'
+    typekey: str
+
+    name: typing.Optional[str] = None
+
+    param_pack: bool = False
+
+    default: typing.Optional[Value] = None
+
+    #: A template-template param
+    template: typing.Optional["TemplateDecl"] = None
+
+
+#: A parameter for a template declaration
+#:
+#: .. code-block:: c++
+#:
+#:    template <typename T>
+#:              ~~~~~~~~~~
+TemplateParam = typing.Union[TemplateNonTypeParam, TemplateTypeParam]
+
+
+@dataclass
+class TemplateDecl:
+    """
+    Template declaration for a function or class
+
+    .. code-block:: c++
+
+        template <typename T>
+        class Foo {};
+
+        template <typename T>
+        T fn();
+
+    """
+
+    params: typing.List[TemplateParam] = field(default_factory=list)
+
+
+@dataclass
+class ForwardDecl:
+    """
+    Represents a forward declaration of a user defined type
+    """
+
+    typename: PQName
+    template: typing.Optional[TemplateDecl] = None
+    doxygen: typing.Optional[str] = None
+
+    #: Set if this is a forward declaration of an enum and it has a base
+    enum_base: typing.Optional[PQName] = None
+
+    #: If within a class, the access level for this decl
+    access: typing.Optional[str] = None
+
+
+@dataclass
+class BaseClass:
+    """
+    Base class declarations for a class
+    """
+
+    #: access specifier for this base
+    access: str
+
+    #: possibly qualified type name for the base
+    typename: PQName
+
+    #: Virtual inheritance
+    virtual: bool = False
+
+    #: Contains a ``...``
+    param_pack: bool = False
+
+
+@dataclass
+class ClassDecl:
+    """
+    A class is a user defined type (class/struct/union)
+    """
+
+    typename: PQName
+
+    bases: typing.List[BaseClass] = field(default_factory=list)
+    template: typing.Optional[TemplateDecl] = None
+
+    explicit: bool = False
+    final: bool = False
+
+    doxygen: typing.Optional[str] = None
+
+    #: If within a class, the access level for this decl
+    access: typing.Optional[str] = None
+
+    @property
+    def classkey(self) -> str:
+        return self.typename.classkey
+
+
+@dataclass
+class Parameter:
+    """
+    A parameter of a function/method
+    """
+
+    type: DecoratedType
+    name: typing.Optional[str] = None
+    default: typing.Optional[Value] = None
+    param_pack: bool = False
+
+
+@dataclass
+class Function:
+    """
+    A function declaration, potentially with the function body
+    """
+
+    return_type: DecoratedType
+    name: PQName
+    parameters: typing.List[Parameter]
+
+    #: Set to True if ends with ``...``
+    vararg: bool = False
+
+    doxygen: typing.Optional[str] = None
+
+    constexpr: bool = False
+    extern: typing.Union[bool, str] = False
+    static: bool = False
+    inline: bool = False
+
+    #: If true, the body of the function is present
+    has_body: bool = False
+
+    template: typing.Optional[TemplateDecl] = None
+
+    throw: typing.Optional[Value] = None
+    noexcept: typing.Optional[Value] = None
+
+
+@dataclass
+class Method(Function):
+    """
+    A method declaration, potentially with the method body
+    """
+
+    #: constructors and destructors don't have a return type
+    return_type: typing.Optional[DecoratedType]
+
+    access: str = ""
+
+    const: bool = False
+    volatile: bool = False
+
+    #: ref-qualifier for this method, either lvalue (&) or rvalue (&&)
+    #:
+    #: .. code-block:: c++
+    #:
+    #:   void foo() &&;
+    #:              ~~
+    #:
+    ref_qualifier: typing.Optional[str] = None
+
+    constructor: bool = False
+    explicit: bool = False
+    default: bool = False
+    deleted: bool = False
+
+    destructor: bool = False
+
+    pure_virtual: bool = False
+    virtual: bool = False
+    final: bool = False
+    override: bool = False
+
+
+@dataclass
+class Operator(Method):
+    operator: str = ""
+
+
+@dataclass
+class FriendDecl:
+    """
+    Represents a friend declaration -- friends can only be classes or functions
+    """
+
+    cls: typing.Optional[ForwardDecl] = None
+
+    fn: typing.Optional[Function] = None
+
+
+@dataclass
+class Typedef:
+    """
+    A typedef specifier. A unique typedef specifier is created for each alias
+    created by the typedef.
+
+    .. code-block:: c++
+
+        typedef type name, *pname;
+
+    """
+
+    #: The aliased type
+    #:
+    #: .. code-block:: c++
+    #:
+    #:    typedef type *pname;
+    #:            ~~~~~~
+    type: DecoratedType
+
+    #: The alias introduced for the specified type
+    #:
+    #: .. code-block:: c++
+    #:
+    #:    typedef type *pname;
+    #:                  ~~~~~
+    name: str
+
+    #: If within a class, the access level for this decl
+    access: typing.Optional[str] = None
+
+
+@dataclass
+class Variable:
+    """
+    A variable declaration
+    """
+
+    name: PQName
+    type: DecoratedType
+
+    value: typing.Optional[Value] = None
+
+    constexpr: bool = False
+    extern: typing.Union[bool, str] = False
+    static: bool = False
+    inline: bool = False
+
+    #: Can occur for a static variable for a templated class
+    template: typing.Optional[TemplateDecl] = None
+
+    doxygen: typing.Optional[str] = None
+
+
+@dataclass
+class Field:
+    """
+    A field of a class
+    """
+
+    #: public/private/protected
+    access: str
+
+    type: DecoratedType
+    name: typing.Optional[str] = None
+
+    value: typing.Optional[Value] = None
+    bits: typing.Optional[int] = None
+
+    constexpr: bool = False
+    mutable: bool = False
+    static: bool = False
+
+    doxygen: typing.Optional[str] = None
+
+
+@dataclass
+class UsingDecl:
+    """
+    .. code-block:: c++
+
+        using NS::ClassName;
+    """
+
+    typename: PQName
+
+    #: If within a class, the access level for this decl
+    access: typing.Optional[str] = None
+
+
+@dataclass
+class UsingAlias:
+    """
+    .. code-block:: c++
+
+        using foo = int;
+
+        template <typename T>
+        using VectorT = std::vector<T>;
+
+    """
+
+    alias: str
+    type: DecoratedType
+
+    template: typing.Optional[TemplateDecl] = None
+
+    #: If within a class, the access level for this decl
+    access: typing.Optional[str] = None
--- a/cxxheaderparser/visitor.py
+++ b/cxxheaderparser/visitor.py
@@ -0,0 +1,197 @@
+import sys
+import typing
+
+if sys.version_info >= (3, 8):
+    Protocol = typing.Protocol
+else:
+    Protocol = object
+
+from .types import (
+    EnumDecl,
+    Field,
+    ForwardDecl,
+    FriendDecl,
+    Function,
+    Method,
+    Typedef,
+    UsingAlias,
+    UsingDecl,
+    Variable,
+)
+
+from .parserstate import (
+    State,
+    EmptyBlockState,
+    ClassBlockState,
+    ExternBlockState,
+    NamespaceBlockState,
+)
+
+
+class CxxVisitor(Protocol):
+    """
+    Defines the interface used by the parser to emit events
+    """
+
+    def on_define(self, state: State, content: str) -> None:
+        """
+        .. warning:: cxxheaderparser intentionally does not have a C preprocessor
+                     implementation. If you are parsing code with macros in it,
+                     use a conforming preprocessor like ``pcpp``
+        """
+
+    def on_pragma(self, state: State, content: str) -> None:
+        """
+        Called once for each ``#pragma`` directive encountered
+        """
+
+    def on_include(self, state: State, filename: str) -> None:
+        """
+        Called once for each ``#include`` directive encountered
+        """
+
+    def on_empty_block_start(self, state: EmptyBlockState) -> None:
+        """
+        Called when a ``{`` is encountered that isn't associated with or
+        consumed by other declarations.
+
+        .. code-block:: c++
+
+            {
+                // stuff
+            }
+        """
+
+    def on_empty_block_end(self, state: EmptyBlockState) -> None:
+        ...
+
+    def on_extern_block_start(self, state: ExternBlockState) -> None:
+        """
+        .. code-block:: c++
+
+            extern "C" {
+
+            }
+
+        """
+
+    def on_extern_block_end(self, state: ExternBlockState) -> None:
+        ...
+
+    def on_namespace_start(self, state: NamespaceBlockState) -> None:
+        """
+        Called when a ``namespace`` directive is encountered
+        """
+
+    def on_namespace_end(self, state: NamespaceBlockState) -> None:
+        """
+        Called at the end of a ``namespace`` block
+        """
+
+    def on_forward_decl(self, state: State, fdecl: ForwardDecl) -> None:
+        """
+        Called when a forward declaration is encountered
+        """
+
+    def on_variable(self, state: State, v: Variable) -> None:
+        ...
+
+    def on_function(self, state: State, fn: Function) -> None:
+        ...
+
+    def on_typedef(self, state: State, typedef: Typedef) -> None:
+        """
+        Called for each typedef instance encountered. For example:
+
+        .. code-block:: c++
+
+            typedef int T, *PT;
+
+        Will result in ``on_typedef`` being called twice, once for ``T`` and
+        once for ``*PT``
+        """
+
+    def on_using_namespace(self, state: State, namespace: typing.List[str]) -> None:
+        """
+        .. code-block:: c++
+
+            using namespace std;
+        """
+
+    def on_using_alias(self, state: State, using: UsingAlias):
+        """
+        .. code-block:: c++
+
+            using foo = int;
+
+            template <typename T>
+            using VectorT = std::vector<T>;
+
+        """
+
+    def on_using_declaration(self, state: State, using: UsingDecl) -> None:
+        """
+        .. code-block:: c++
+
+            using NS::ClassName;
+
+        """
+
+    #
+    # Enums
+    #
+
+    def on_enum(self, state: State, enum: EnumDecl) -> None:
+        """
+        Called after an enum is encountered
+        """
+
+    #
+    # Class/union/struct
+    #
+
+    def on_class_start(self, state: ClassBlockState) -> None:
+        """
+        Called when a class/struct/union is encountered
+
+        When part of a typedef:
+
+        .. code-block:: c++
+
+            typedef struct { } X;
+
+        This is called first, followed by on_typedef for each typedef instance
+        encountered. The compound type object is passed as the type to the
+        typedef.
+        """
+
+    def on_class_field(self, state: ClassBlockState, f: Field) -> None:
+        """
+        Called when a field of a class is encountered
+        """
+
+    def on_class_friend(self, state: ClassBlockState, friend: FriendDecl):
+        """
+        Called when a friend declaration is encountered
+        """
+
+    def on_class_method(self, state: ClassBlockState, method: Method) -> None:
+        """
+        Called when a method of a class is encountered
+        """
+
+    def on_class_end(self, state: ClassBlockState) -> None:
+        """
+        Called when the end of a class/struct/union is encountered.
+
+        When a variable like this is declared:
+
+        .. code-block:: c++
+
+            struct X {
+
+            } x;
+
+        Then ``on_class_start``, .. ``on_class_end`` are emitted, along with
+        ``on_variable`` for each instance declared.
+        """