Add support for parsing user defined literals
This commit is contained in:
parent
1eaa85ae8d
commit
e5295070a0
@ -179,6 +179,7 @@ class PlyLexer:
|
|||||||
# misc
|
# misc
|
||||||
"DIVIDE",
|
"DIVIDE",
|
||||||
"NEWLINE",
|
"NEWLINE",
|
||||||
|
"WHITESPACE",
|
||||||
"ELLIPSIS",
|
"ELLIPSIS",
|
||||||
"DBL_LBRACKET",
|
"DBL_LBRACKET",
|
||||||
"DBL_RBRACKET",
|
"DBL_RBRACKET",
|
||||||
@ -329,7 +330,8 @@ class PlyLexer:
|
|||||||
+ "[FfLl]?)"
|
+ "[FfLl]?)"
|
||||||
)
|
)
|
||||||
|
|
||||||
t_ignore = " \t\r?@\f"
|
t_WHITESPACE = "[ \t]+"
|
||||||
|
t_ignore = "\r"
|
||||||
|
|
||||||
# The following floating and integer constants are defined as
|
# The following floating and integer constants are defined as
|
||||||
# functions to impose a strict order (otherwise, decimal
|
# functions to impose a strict order (otherwise, decimal
|
||||||
@ -531,7 +533,12 @@ class TokenStream:
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
_discard_types = {"NEWLINE", "COMMENT_SINGLELINE", "COMMENT_MULTILINE"}
|
_discard_types = {
|
||||||
|
"NEWLINE",
|
||||||
|
"COMMENT_SINGLELINE",
|
||||||
|
"COMMENT_MULTILINE",
|
||||||
|
"WHITESPACE",
|
||||||
|
}
|
||||||
|
|
||||||
def token(self) -> LexToken:
|
def token(self) -> LexToken:
|
||||||
tokbuf = self.tokbuf
|
tokbuf = self.tokbuf
|
||||||
@ -610,6 +617,27 @@ class LexerTokenStream(TokenStream):
|
|||||||
Provides tokens from using PlyLexer on the given input text
|
Provides tokens from using PlyLexer on the given input text
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_user_defined_literal_start = {
|
||||||
|
"FLOAT_CONST",
|
||||||
|
"HEX_FLOAT_CONST",
|
||||||
|
"INT_CONST_HEX",
|
||||||
|
"INT_CONST_BIN",
|
||||||
|
"INT_CONST_OCT",
|
||||||
|
"INT_CONST_DEC",
|
||||||
|
"INT_CONST_CHAR",
|
||||||
|
"CHAR_CONST",
|
||||||
|
"WCHAR_CONST",
|
||||||
|
"U8CHAR_CONST",
|
||||||
|
"U16CHAR_CONST",
|
||||||
|
"U32CHAR_CONST",
|
||||||
|
# String literals
|
||||||
|
"STRING_LITERAL",
|
||||||
|
"WSTRING_LITERAL",
|
||||||
|
"U8STRING_LITERAL",
|
||||||
|
"U16STRING_LITERAL",
|
||||||
|
"U32STRING_LITERAL",
|
||||||
|
}
|
||||||
|
|
||||||
def __init__(self, filename: typing.Optional[str], content: str) -> None:
|
def __init__(self, filename: typing.Optional[str], content: str) -> None:
|
||||||
self._lex = PlyLexer(filename)
|
self._lex = PlyLexer(filename)
|
||||||
self._lex.input(content)
|
self._lex.input(content)
|
||||||
@ -623,6 +651,8 @@ class LexerTokenStream(TokenStream):
|
|||||||
if tok is None:
|
if tok is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
udl_start = self._user_defined_literal_start
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
tok.location = self._lex.current_location()
|
tok.location = self._lex.current_location()
|
||||||
tokbuf.append(tok)
|
tokbuf.append(tok)
|
||||||
@ -630,6 +660,19 @@ class LexerTokenStream(TokenStream):
|
|||||||
if tok.type == "NEWLINE":
|
if tok.type == "NEWLINE":
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# detect/combine user defined literals
|
||||||
|
if tok.type in udl_start:
|
||||||
|
tok2 = get_token()
|
||||||
|
if tok2 is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
if tok2.type != "NAME" or tok2.value[0] != "_":
|
||||||
|
tok = tok2
|
||||||
|
continue
|
||||||
|
|
||||||
|
tok.value = tok.value + tok2.value
|
||||||
|
tok.type = f"UD_{tok.type}"
|
||||||
|
|
||||||
tok = get_token()
|
tok = get_token()
|
||||||
if tok is None:
|
if tok is None:
|
||||||
break
|
break
|
||||||
@ -659,6 +702,8 @@ class LexerTokenStream(TokenStream):
|
|||||||
tok = tokbuf.popleft()
|
tok = tokbuf.popleft()
|
||||||
if tok.type == "NEWLINE":
|
if tok.type == "NEWLINE":
|
||||||
comments.clear()
|
comments.clear()
|
||||||
|
elif tok.type == "WHITESPACE":
|
||||||
|
pass
|
||||||
elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"):
|
elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"):
|
||||||
comments.append(tok)
|
comments.append(tok)
|
||||||
else:
|
else:
|
||||||
@ -693,6 +738,8 @@ class LexerTokenStream(TokenStream):
|
|||||||
tok = tokbuf.popleft()
|
tok = tokbuf.popleft()
|
||||||
if tok.type == "NEWLINE":
|
if tok.type == "NEWLINE":
|
||||||
break
|
break
|
||||||
|
elif tok.type == "WHITESPACE":
|
||||||
|
new_tokbuf.append(tok)
|
||||||
elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"):
|
elif tok.type in ("COMMENT_SINGLELINE", "COMMENT_MULTILINE"):
|
||||||
comments.append(tok)
|
comments.append(tok)
|
||||||
else:
|
else:
|
||||||
|
@ -236,3 +236,34 @@ def test_final() -> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# User defined literals
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
def test_user_defined_literal() -> None:
|
||||||
|
content = """
|
||||||
|
units::volt_t v = 1_V;
|
||||||
|
"""
|
||||||
|
data = parse_string(content, cleandoc=True)
|
||||||
|
|
||||||
|
assert data == ParsedData(
|
||||||
|
namespace=NamespaceScope(
|
||||||
|
variables=[
|
||||||
|
Variable(
|
||||||
|
name=PQName(segments=[NameSpecifier(name="v")]),
|
||||||
|
type=Type(
|
||||||
|
typename=PQName(
|
||||||
|
segments=[
|
||||||
|
NameSpecifier(name="units"),
|
||||||
|
NameSpecifier(name="volt_t"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
value=Value(tokens=[Token(value="1_V")]),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from cxxheaderparser.lexer import PlyLexer
|
from cxxheaderparser.lexer import PlyLexer, LexerTokenStream
|
||||||
from cxxheaderparser.tokfmt import tokfmt
|
from cxxheaderparser.tokfmt import tokfmt
|
||||||
from cxxheaderparser.types import Token
|
from cxxheaderparser.types import Token
|
||||||
|
|
||||||
@ -48,6 +48,7 @@ def test_tokfmt(instr: str) -> None:
|
|||||||
if not tok:
|
if not tok:
|
||||||
break
|
break
|
||||||
|
|
||||||
toks.append(Token(tok.value, tok.type))
|
if tok.type not in LexerTokenStream._discard_types:
|
||||||
|
toks.append(Token(tok.value, tok.type))
|
||||||
|
|
||||||
assert tokfmt(toks) == instr
|
assert tokfmt(toks) == instr
|
||||||
|
Loading…
x
Reference in New Issue
Block a user