Add support for parsing user defined literals

This commit is contained in:
Dustin Spicuzza 2022-12-15 02:55:07 -05:00
parent 1eaa85ae8d
commit e5295070a0
3 changed files with 83 additions and 4 deletions

View File

@ -179,6 +179,7 @@ class PlyLexer:
# misc
"DIVIDE",
"NEWLINE",
"WHITESPACE",
"ELLIPSIS",
"DBL_LBRACKET",
"DBL_RBRACKET",
@ -329,7 +330,8 @@ class PlyLexer:
+ "[FfLl]?)"
)
t_ignore = " \t\r?@\f"
t_WHITESPACE = "[ \t]+"
t_ignore = "\r"
# The following floating and integer constants are defined as
# functions to impose a strict order (otherwise, decimal
@ -531,7 +533,12 @@ class TokenStream:
"""
raise NotImplementedError
_discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"}
_discard_types = {
"NEWLINE",
"COMMENT_SINGLELINE",
"COMMENT_MULTILINE",
"WHITESPACE",
}
def token(self) -> LexToken:
tokbuf = self.tokbuf
@ -610,6 +617,27 @@ class LexerTokenStream(TokenStream):
Provides tokens from using PlyLexer on the given input text
"""
_user_defined_literal_start = {
"FLOAT_CONST",
"HEX_FLOAT_CONST",
"INT_CONST_HEX",
"INT_CONST_BIN",
"INT_CONST_OCT",
"INT_CONST_DEC",
"INT_CONST_CHAR",
"CHAR_CONST",
"WCHAR_CONST",
"U8CHAR_CONST",
"U16CHAR_CONST",
"U32CHAR_CONST",
# String literals
"STRING_LITERAL",
"WSTRING_LITERAL",
"U8STRING_LITERAL",
"U16STRING_LITERAL",
"U32STRING_LITERAL",
}
def __init__(self, filename: typing.Optional[str], content: str) -> None:
self._lex = PlyLexer(filename)
self._lex.input(content)
@ -623,6 +651,8 @@ class LexerTokenStream(TokenStream):
if tok is None:
return False
udl_start = self._user_defined_literal_start
while True:
tok.location = self._lex.current_location()
tokbuf.append(tok)
@ -630,6 +660,19 @@ class LexerTokenStream(TokenStream):
if tok.type == "NEWLINE":
break
# detect/combine user defined literals
if tok.type in udl_start:
tok2 = get_token()
if tok2 is None:
break
if tok2.type != "NAME" or tok2.value[0] != "_":
tok = tok2
continue
tok.value = tok.value + tok2.value
tok.type = f"UD_{tok.type}"
tok = get_token()
if tok is None:
break
@ -659,6 +702,8 @@ class LexerTokenStream(TokenStream):
tok = tokbuf.popleft()
if tok.type == "NEWLINE":
comments.clear()
elif tok.type == "WHITESPACE":
pass
elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"):
comments.append(tok)
else:
@ -693,6 +738,8 @@ class LexerTokenStream(TokenStream):
tok = tokbuf.popleft()
if tok.type == "NEWLINE":
break
elif tok.type == "WHITESPACE":
new_tokbuf.append(tok)
elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"):
comments.append(tok)
else:

View File

@ -236,3 +236,34 @@ def test_final() -> None:
],
)
)
#
# User defined literals
#
def test_user_defined_literal() -> None:
content = """
units::volt_t v = 1_V;
"""
data = parse_string(content, cleandoc=True)
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="v")]),
type=Type(
typename=PQName(
segments=[
NameSpecifier(name="units"),
NameSpecifier(name="volt_t"),
]
)
),
value=Value(tokens=[Token(value="1_V")]),
)
]
)
)

View File

@ -1,6 +1,6 @@
import pytest
from cxxheaderparser.lexer import PlyLexer
from cxxheaderparser.lexer import PlyLexer, LexerTokenStream
from cxxheaderparser.tokfmt import tokfmt
from cxxheaderparser.types import Token
@ -48,6 +48,7 @@ def test_tokfmt(instr: str) -> None:
if not tok:
break
if tok.type not in LexerTokenStream._discard_types:
toks.append(Token(tok.value, tok.type))
assert tokfmt(toks) == instr