From 4ab7b3fd1665b41af8ce0e1d6770064758e7bd09 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sat, 2 Sep 2023 21:08:26 -0400 Subject: [PATCH 1/4] Adjust simple parse_file to accept a Path or string as filename --- cxxheaderparser/simple.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cxxheaderparser/simple.py b/cxxheaderparser/simple.py index 6d1d1c1..1670e6d 100644 --- a/cxxheaderparser/simple.py +++ b/cxxheaderparser/simple.py @@ -24,6 +24,7 @@ See below for the contents of the returned :class:`ParsedData`. """ +import os import sys import inspect import typing @@ -344,7 +345,7 @@ def parse_string( def parse_file( - filename: str, + filename: typing.Union[str, os.PathLike], encoding: typing.Optional[str] = None, *, options: typing.Optional[ParserOptions] = None, @@ -352,6 +353,7 @@ def parse_file( """ Simple function to parse a header from a file and return a data structure """ + filename = os.fsdecode(filename) if encoding is None: encoding = "utf-8-sig" From a60bb7fd18a9649eeb5913360716ff7cee79fc66 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sat, 2 Sep 2023 19:47:46 -0400 Subject: [PATCH 2/4] Add basic preprocessor test --- .github/workflows/dist.yml | 2 +- cxxheaderparser/gentest.py | 21 +++++++++++++++++---- tests/test_preprocessor.py | 27 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 tests/test_preprocessor.py diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml index 9db0a53..218ce61 100644 --- a/.github/workflows/dist.yml +++ b/.github/workflows/dist.yml @@ -81,7 +81,7 @@ jobs: run: python setup.py bdist_wheel - name: Install test dependencies - run: python -m pip --disable-pip-version-check install pytest + run: python -m pip --disable-pip-version-check install pytest pcpp - name: Test wheel shell: bash diff --git a/cxxheaderparser/gentest.py b/cxxheaderparser/gentest.py index b192cea..ace24d7 100644 --- a/cxxheaderparser/gentest.py +++ b/cxxheaderparser/gentest.py @@ -6,6 +6,7 @@ import subprocess import typing from .errors import CxxParseError +from .preprocessor import make_pcpp_preprocessor from .options import ParserOptions from .simple import parse_string, ParsedData @@ -49,14 +50,23 @@ def nondefault_repr(data: ParsedData) -> str: return _inner_repr(data) -def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> None: +def gentest( + infile: str, name: str, outfile: str, verbose: bool, fail: bool, pcpp: bool +) -> None: # Goal is to allow making a unit test as easy as running this dumper # on a file and copy/pasting this into a test with open(infile, "r") as fp: content = fp.read() + maybe_options = "" + popt = "" + options = ParserOptions(verbose=verbose) + if options: + options.preprocessor = make_pcpp_preprocessor() + maybe_options = "options = ParserOptions(preprocessor=make_pcpp_preprocessor())" + popt = ", options=options" try: data = parse_string(content, options=options) @@ -74,15 +84,17 @@ def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> if not fail: stmt = nondefault_repr(data) stmt = f""" - data = parse_string(content, cleandoc=True) + {maybe_options} + data = parse_string(content, cleandoc=True{popt}) assert data == {stmt} """ else: stmt = f""" + {maybe_options} err = {repr(err)} with pytest.raises(CxxParseError, match=re.escape(err)): - parse_string(content, cleandoc=True) + parse_string(content, cleandoc=True{popt}) """ content = ("\n" + content.strip()).replace("\n", "\n ") @@ -113,6 +125,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("header") parser.add_argument("name", nargs="?", default="TODO") + parser.add_argument("--pcpp", default=False, action="store_true") parser.add_argument("-v", "--verbose", default=False, action="store_true") parser.add_argument("-o", "--output", default="-") parser.add_argument( @@ -120,4 +133,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - gentest(args.header, args.name, args.output, args.verbose, args.fail) + gentest(args.header, args.name, args.output, args.verbose, args.fail, args.pcpp) diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py new file mode 100644 index 0000000..35ed0ab --- /dev/null +++ b/tests/test_preprocessor.py @@ -0,0 +1,27 @@ +from cxxheaderparser.options import ParserOptions +from cxxheaderparser.preprocessor import make_pcpp_preprocessor +from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_string +from cxxheaderparser.types import FundamentalSpecifier, NameSpecifier, PQName, Token, Type, Value, Variable + + +def test_basic_preprocessor() -> None: + content = """ + #define X 1 + int x = X; + """ + options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + data = parse_string(content, cleandoc=True, options=options) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="1")]), + ) + ] + ) + ) \ No newline at end of file From de4d06defed5cb5d0b65ff2bf45d9a285d1c5eaf Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sat, 2 Sep 2023 21:03:15 -0400 Subject: [PATCH 3/4] Fix preprocessor option to retain content --- cxxheaderparser/preprocessor.py | 15 ++++------- tests/test_preprocessor.py | 45 ++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index a89c4b1..f732ef2 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -3,6 +3,7 @@ Contains optional preprocessor support via pcpp """ import io +import os from os.path import relpath import typing from .options import PreprocessorFunction @@ -17,7 +18,7 @@ class PreprocessorError(Exception): class _CustomPreprocessor(Preprocessor): def __init__(self): Preprocessor.__init__(self) - self.errors = [] + self.errors: typing.List[str] = [] def on_error(self, file, line, msg): self.errors.append(f"{file}:{line} error: {msg}") @@ -34,21 +35,15 @@ def _filter_self(fname: str, fp: typing.TextIO) -> str: # isn't what a typical user of cxxheaderparser would want, so we strip out # the line directives and any content that isn't in our original file - # Compute the filename to match based on how pcpp does it - try: - relfname = relpath(fname) - except Exception: - relfname = fname - relfname = relfname.replace("\\", "/") - - relfname += '"\n' + # pcpp always emits line directives that match whatever is passed in to it + line_ending = f'{fname}"\n' new_output = io.StringIO() keep = True for line in fp: if line.startswith("#line"): - keep = line.endswith(relfname) + keep = line.endswith(line_ending) if keep: new_output.write(line) diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 35ed0ab..8e9ae02 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -1,7 +1,17 @@ +import pathlib + from cxxheaderparser.options import ParserOptions from cxxheaderparser.preprocessor import make_pcpp_preprocessor -from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_string -from cxxheaderparser.types import FundamentalSpecifier, NameSpecifier, PQName, Token, Type, Value, Variable +from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_file, parse_string +from cxxheaderparser.types import ( + FundamentalSpecifier, + NameSpecifier, + PQName, + Token, + Type, + Value, + Variable, +) def test_basic_preprocessor() -> None: @@ -24,4 +34,33 @@ def test_basic_preprocessor() -> None: ) ] ) - ) \ No newline at end of file + ) + + +def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: + """Ensure that content in other headers is omitted""" + h_content = '#include "t2.h"' "\n" "int x = X;\n" + h2_content = "#define X 2\n" "int omitted = 1;\n" + + with open(tmp_path / "t1.h", "w") as fp: + fp.write(h_content) + + with open(tmp_path / "t2.h", "w") as fp: + fp.write(h2_content) + + options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + data = parse_file(tmp_path / "t1.h", options=options) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="2")]), + ) + ] + ) + ) From a13cdf4f67749435f5c5acf35a4a100c87983853 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 1 Sep 2023 20:37:14 -0400 Subject: [PATCH 4/4] Provide mechanism to specify preprocessor file encoding --- cxxheaderparser/dump.py | 7 +++++-- cxxheaderparser/preprocessor.py | 8 ++++++-- tests/test_preprocessor.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/cxxheaderparser/dump.py b/cxxheaderparser/dump.py index d4da431..296993e 100644 --- a/cxxheaderparser/dump.py +++ b/cxxheaderparser/dump.py @@ -26,6 +26,9 @@ def dumpmain() -> None: parser.add_argument( "--pcpp", default=False, action="store_true", help="Use pcpp preprocessor" ) + parser.add_argument( + "--encoding", default=None, help="Use this encoding to open the file" + ) args = parser.parse_args() @@ -33,10 +36,10 @@ def dumpmain() -> None: if args.pcpp: from .preprocessor import make_pcpp_preprocessor - preprocessor = make_pcpp_preprocessor() + preprocessor = make_pcpp_preprocessor(encoding=args.encoding) options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor) - data = parse_file(args.header, options=options) + data = parse_file(args.header, encoding=args.encoding, options=options) if args.mode == "pprint": ddata = dataclasses.asdict(data) diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index f732ef2..4b6aabe 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -16,9 +16,10 @@ class PreprocessorError(Exception): class _CustomPreprocessor(Preprocessor): - def __init__(self): + def __init__(self, encoding: typing.Optional[str]): Preprocessor.__init__(self) self.errors: typing.List[str] = [] + self.assume_encoding = encoding def on_error(self, file, line, msg): self.errors.append(f"{file}:{line} error: {msg}") @@ -57,11 +58,14 @@ def make_pcpp_preprocessor( defines: typing.List[str] = [], include_paths: typing.List[str] = [], retain_all_content: bool = False, + encoding: typing.Optional[str] = None, ) -> PreprocessorFunction: """ Creates a preprocessor function that uses pcpp (which must be installed separately) to preprocess the input text. + :param encoding: If specified any include files are opened with this encoding + .. code-block:: python pp = make_pcpp_preprocessor() @@ -72,7 +76,7 @@ def make_pcpp_preprocessor( """ def _preprocess_file(filename: str, content: str) -> str: - pp = _CustomPreprocessor() + pp = _CustomPreprocessor(encoding) if include_paths: for p in include_paths: pp.add_path(p) diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 8e9ae02..b0d87e9 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -64,3 +64,33 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: ] ) ) + + +def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: + """Ensure we can handle alternate encodings""" + h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n" + + h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n" + + with open(tmp_path / "t1.h", "wb") as fp: + fp.write(h_content) + + with open(tmp_path / "t2.h", "wb") as fp: + fp.write(h2_content) + + options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252")) + data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252") + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="3")]), + ) + ] + ) + )