From 0c9c49b7e3bea5533161affaddc9259e7c7f616a Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Wed, 30 Dec 2020 02:51:00 -0500 Subject: [PATCH] Add lexer support for parsing a specified group of tokens --- cxxheaderparser/errors.py | 6 +++-- cxxheaderparser/lexer.py | 52 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/cxxheaderparser/errors.py b/cxxheaderparser/errors.py index c92e177..d457f51 100644 --- a/cxxheaderparser/errors.py +++ b/cxxheaderparser/errors.py @@ -1,5 +1,7 @@ import typing -from .lexer import LexToken + +if typing.TYPE_CHECKING: + from .lexer import LexToken class CxxParseError(Exception): @@ -7,6 +9,6 @@ class CxxParseError(Exception): Exception raised when a parsing error occurs """ - def __init__(self, msg: str, tok: typing.Optional[LexToken] = None) -> None: + def __init__(self, msg: str, tok: typing.Optional["LexToken"] = None) -> None: Exception.__init__(self, msg) self.tok = tok diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index 93bb3e7..d982caa 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -1,9 +1,11 @@ +import contextlib from collections import deque import re import typing import sys +from .errors import CxxParseError from ._ply import lex @@ -43,6 +45,13 @@ class LexToken(Protocol): location: Location +PhonyEnding = lex.LexToken() +PhonyEnding.type = "PLACEHOLDER" +PhonyEnding.value = "" +PhonyEnding.lineno = 0 +PhonyEnding.lexpos = 0 + + class Lexer: keywords = { @@ -268,6 +277,10 @@ class Lexer: self.lookahead = typing.Deque[LexToken]() + # For 'set_group_of_tokens' support + self._get_token = self.lex.token + self.lookahead_stack = typing.Deque[typing.Deque[LexToken]]() + def current_location(self) -> Location: if self.lookahead: return self.lookahead[0].location @@ -295,7 +308,7 @@ class Lexer: return None while True: - tok = self.lex.token() + tok = self._get_token() comments.extend(self.comments) if tok is None: @@ -324,6 +337,39 @@ class Lexer: _discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"} + def _token_limit_exceeded(self): + raise CxxParseError("no more tokens left in this group") + + @contextlib.contextmanager + def set_group_of_tokens(self, toks: typing.List[LexToken]): + # intended for use when you have a set of tokens that you know + # must be consumed, such as a paren grouping or some type of + # lookahead case + + stack = self.lookahead_stack + restore_fn = False + + if not stack: + restore_fn = True + self._get_token = self._token_limit_exceeded + + this_buf = typing.Deque[LexToken](toks) + prev_buf = self.lookahead + stack.append(prev_buf) + self.lookahead = this_buf + + try: + yield this_buf + finally: + buf = stack.pop() + if prev_buf is not buf: + raise ValueError("internal error") + + self.lookahead = prev_buf + + if restore_fn: + self._get_token = self.lex.token + def token(self) -> LexToken: tok = None while self.lookahead: @@ -332,7 +378,7 @@ class Lexer: return tok while True: - tok = self.lex.token() + tok = self._get_token() if tok is None: raise EOFError("unexpected end of file") @@ -350,7 +396,7 @@ class Lexer: return tok while True: - tok = self.lex.token() + tok = self._get_token() if tok is None: break