Add lexer support for parsing a specified group of tokens

This commit is contained in:
Dustin Spicuzza 2020-12-30 02:51:00 -05:00
parent 2de33946c2
commit 0c9c49b7e3
2 changed files with 53 additions and 5 deletions

View File

@ -1,5 +1,7 @@
import typing import typing
from .lexer import LexToken
if typing.TYPE_CHECKING:
from .lexer import LexToken
class CxxParseError(Exception): class CxxParseError(Exception):
@ -7,6 +9,6 @@ class CxxParseError(Exception):
Exception raised when a parsing error occurs Exception raised when a parsing error occurs
""" """
def __init__(self, msg: str, tok: typing.Optional[LexToken] = None) -> None: def __init__(self, msg: str, tok: typing.Optional["LexToken"] = None) -> None:
Exception.__init__(self, msg) Exception.__init__(self, msg)
self.tok = tok self.tok = tok

View File

@ -1,9 +1,11 @@
import contextlib
from collections import deque from collections import deque
import re import re
import typing import typing
import sys import sys
from .errors import CxxParseError
from ._ply import lex from ._ply import lex
@ -43,6 +45,13 @@ class LexToken(Protocol):
location: Location location: Location
PhonyEnding = lex.LexToken()
PhonyEnding.type = "PLACEHOLDER"
PhonyEnding.value = ""
PhonyEnding.lineno = 0
PhonyEnding.lexpos = 0
class Lexer: class Lexer:
keywords = { keywords = {
@ -268,6 +277,10 @@ class Lexer:
self.lookahead = typing.Deque[LexToken]() self.lookahead = typing.Deque[LexToken]()
# For 'set_group_of_tokens' support
self._get_token = self.lex.token
self.lookahead_stack = typing.Deque[typing.Deque[LexToken]]()
def current_location(self) -> Location: def current_location(self) -> Location:
if self.lookahead: if self.lookahead:
return self.lookahead[0].location return self.lookahead[0].location
@ -295,7 +308,7 @@ class Lexer:
return None return None
while True: while True:
tok = self.lex.token() tok = self._get_token()
comments.extend(self.comments) comments.extend(self.comments)
if tok is None: if tok is None:
@ -324,6 +337,39 @@ class Lexer:
_discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"} _discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"}
def _token_limit_exceeded(self):
raise CxxParseError("no more tokens left in this group")
@contextlib.contextmanager
def set_group_of_tokens(self, toks: typing.List[LexToken]):
# intended for use when you have a set of tokens that you know
# must be consumed, such as a paren grouping or some type of
# lookahead case
stack = self.lookahead_stack
restore_fn = False
if not stack:
restore_fn = True
self._get_token = self._token_limit_exceeded
this_buf = typing.Deque[LexToken](toks)
prev_buf = self.lookahead
stack.append(prev_buf)
self.lookahead = this_buf
try:
yield this_buf
finally:
buf = stack.pop()
if prev_buf is not buf:
raise ValueError("internal error")
self.lookahead = prev_buf
if restore_fn:
self._get_token = self.lex.token
def token(self) -> LexToken: def token(self) -> LexToken:
tok = None tok = None
while self.lookahead: while self.lookahead:
@ -332,7 +378,7 @@ class Lexer:
return tok return tok
while True: while True:
tok = self.lex.token() tok = self._get_token()
if tok is None: if tok is None:
raise EOFError("unexpected end of file") raise EOFError("unexpected end of file")
@ -350,7 +396,7 @@ class Lexer:
return tok return tok
while True: while True:
tok = self.lex.token() tok = self._get_token()
if tok is None: if tok is None:
break break