diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index 096882a..075e2cf 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -179,6 +179,7 @@ class PlyLexer: # misc "DIVIDE", "NEWLINE", + "WHITESPACE", "ELLIPSIS", "DBL_LBRACKET", "DBL_RBRACKET", @@ -329,7 +330,8 @@ class PlyLexer: + "[FfLl]?)" ) - t_ignore = " \t\r?@\f" + t_WHITESPACE = "[ \t]+" + t_ignore = "\r" # The following floating and integer constants are defined as # functions to impose a strict order (otherwise, decimal @@ -531,7 +533,12 @@ class TokenStream: """ raise NotImplementedError - _discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"} + _discard_types = { + "NEWLINE", + "COMMENT_SINGLELINE", + "COMMENT_MULTILINE", + "WHITESPACE", + } def token(self) -> LexToken: tokbuf = self.tokbuf @@ -610,6 +617,27 @@ class LexerTokenStream(TokenStream): Provides tokens from using PlyLexer on the given input text """ + _user_defined_literal_start = { + "FLOAT_CONST", + "HEX_FLOAT_CONST", + "INT_CONST_HEX", + "INT_CONST_BIN", + "INT_CONST_OCT", + "INT_CONST_DEC", + "INT_CONST_CHAR", + "CHAR_CONST", + "WCHAR_CONST", + "U8CHAR_CONST", + "U16CHAR_CONST", + "U32CHAR_CONST", + # String literals + "STRING_LITERAL", + "WSTRING_LITERAL", + "U8STRING_LITERAL", + "U16STRING_LITERAL", + "U32STRING_LITERAL", + } + def __init__(self, filename: typing.Optional[str], content: str) -> None: self._lex = PlyLexer(filename) self._lex.input(content) @@ -623,6 +651,8 @@ class LexerTokenStream(TokenStream): if tok is None: return False + udl_start = self._user_defined_literal_start + while True: tok.location = self._lex.current_location() tokbuf.append(tok) @@ -630,6 +660,19 @@ class LexerTokenStream(TokenStream): if tok.type == "NEWLINE": break + # detect/combine user defined literals + if tok.type in udl_start: + tok2 = get_token() + if tok2 is None: + break + + if tok2.type != "NAME" or tok2.value[0] != "_": + tok = tok2 + continue + + tok.value = tok.value + tok2.value + tok.type = f"UD_{tok.type}" + tok = get_token() if tok is None: break @@ -659,6 +702,8 @@ class LexerTokenStream(TokenStream): tok = tokbuf.popleft() if tok.type == "NEWLINE": comments.clear() + elif tok.type == "WHITESPACE": + pass elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"): comments.append(tok) else: @@ -693,6 +738,8 @@ class LexerTokenStream(TokenStream): tok = tokbuf.popleft() if tok.type == "NEWLINE": break + elif tok.type == "WHITESPACE": + new_tokbuf.append(tok) elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"): comments.append(tok) else: diff --git a/tests/test_misc.py b/tests/test_misc.py index b90fd96..81d4a0e 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -236,3 +236,34 @@ def test_final() -> None: ], ) ) + + +# +# User defined literals +# + + +def test_user_defined_literal() -> None: + content = """ + units::volt_t v = 1_V; + """ + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="v")]), + type=Type( + typename=PQName( + segments=[ + NameSpecifier(name="units"), + NameSpecifier(name="volt_t"), + ] + ) + ), + value=Value(tokens=[Token(value="1_V")]), + ) + ] + ) + ) diff --git a/tests/test_tokfmt.py b/tests/test_tokfmt.py index 758b9f6..cc0b379 100644 --- a/tests/test_tokfmt.py +++ b/tests/test_tokfmt.py @@ -1,6 +1,6 @@ import pytest -from cxxheaderparser.lexer import PlyLexer +from cxxheaderparser.lexer import PlyLexer, LexerTokenStream from cxxheaderparser.tokfmt import tokfmt from cxxheaderparser.types import Token @@ -48,6 +48,7 @@ def test_tokfmt(instr: str) -> None: if not tok: break - toks.append(Token(tok.value, tok.type)) + if tok.type not in LexerTokenStream._discard_types: + toks.append(Token(tok.value, tok.type)) assert tokfmt(toks) == instr