From 8a0568c0f52ebbd1cbc070273dbe18cfb54625f8 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 9 Dec 2022 03:26:16 -0500 Subject: [PATCH] Change balanced token handling to allow mismatched gt/lt tokens - These can be used for math, so we just assume the code is doing that --- cxxheaderparser/parser.py | 26 +++++++++----- tests/test_var.py | 75 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/cxxheaderparser/parser.py b/cxxheaderparser/parser.py index 6bd8ba5..c5c35ae 100644 --- a/cxxheaderparser/parser.py +++ b/cxxheaderparser/parser.py @@ -230,16 +230,24 @@ class CxxParser: if tok.type in self._end_balanced_tokens: expected = match_stack.pop() if tok.type != expected: - # hack: ambiguous right-shift issues here, really - # should be looking at the context - if tok.type == ">": - tok = self.lex.token_if(">") - if tok: - consumed.append(tok) - match_stack.append(expected) - continue + # hack: we only claim to parse correct code, so if this + # is less than or greater than, assume that the code is + # doing math and so this unexpected item is correct. + # + # If one of the other items on the stack match, pop back + # to that. Otherwise, ignore it and hope for the best + if tok.type != ">" and expected != ">": + raise self._parse_error(tok, expected) + + for i, maybe in enumerate(reversed(match_stack)): + if tok.type == maybe: + for _ in range(i + 1): + match_stack.pop() + break + else: + match_stack.append(expected) + continue - raise self._parse_error(tok, expected) if len(match_stack) == 0: return consumed diff --git a/tests/test_var.py b/tests/test_var.py index 488ad10..5567d6a 100644 --- a/tests/test_var.py +++ b/tests/test_var.py @@ -1,6 +1,6 @@ # Note: testcases generated via `python -m cxxheaderparser.gentest` - +from cxxheaderparser.errors import CxxParseError from cxxheaderparser.types import ( Array, ClassDecl, @@ -21,6 +21,9 @@ from cxxheaderparser.types import ( ) from cxxheaderparser.simple import ClassScope, NamespaceScope, ParsedData, parse_string +import pytest +import re + def test_var_unixwiz_ridiculous() -> None: # http://unixwiz.net/techtips/reading-cdecl.html @@ -766,3 +769,73 @@ def test_var_extern() -> None: ] ) ) + + +def test_balanced_with_gt() -> None: + """Tests _consume_balanced_tokens handling of mismatched gt tokens""" + content = """ + int x = (1 >> 2); + """ + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value( + tokens=[ + Token(value="("), + Token(value="1"), + Token(value=">"), + Token(value=">"), + Token(value="2"), + Token(value=")"), + ] + ), + ) + ] + ) + ) + + +def test_balanced_with_lt() -> None: + """Tests _consume_balanced_tokens handling of mismatched lt tokens""" + content = """ + bool z = (i < 4); + """ + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="z")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="bool")]) + ), + value=Value( + tokens=[ + Token(value="("), + Token(value="i"), + Token(value="<"), + Token(value="4"), + Token(value=")"), + ] + ), + ) + ] + ) + ) + + +def test_balanced_bad_mismatch() -> None: + content = """ + bool z = (12 ]); + """ + err = ":1: parse error evaluating ']': unexpected ']', expected ')'" + with pytest.raises(CxxParseError, match=re.escape(err)): + parse_string(content, cleandoc=True)