diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml index 9db0a53..218ce61 100644 --- a/.github/workflows/dist.yml +++ b/.github/workflows/dist.yml @@ -81,7 +81,7 @@ jobs: run: python setup.py bdist_wheel - name: Install test dependencies - run: python -m pip --disable-pip-version-check install pytest + run: python -m pip --disable-pip-version-check install pytest pcpp - name: Test wheel shell: bash diff --git a/cxxheaderparser/dump.py b/cxxheaderparser/dump.py index d4da431..296993e 100644 --- a/cxxheaderparser/dump.py +++ b/cxxheaderparser/dump.py @@ -26,6 +26,9 @@ def dumpmain() -> None: parser.add_argument( "--pcpp", default=False, action="store_true", help="Use pcpp preprocessor" ) + parser.add_argument( + "--encoding", default=None, help="Use this encoding to open the file" + ) args = parser.parse_args() @@ -33,10 +36,10 @@ def dumpmain() -> None: if args.pcpp: from .preprocessor import make_pcpp_preprocessor - preprocessor = make_pcpp_preprocessor() + preprocessor = make_pcpp_preprocessor(encoding=args.encoding) options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor) - data = parse_file(args.header, options=options) + data = parse_file(args.header, encoding=args.encoding, options=options) if args.mode == "pprint": ddata = dataclasses.asdict(data) diff --git a/cxxheaderparser/gentest.py b/cxxheaderparser/gentest.py index b192cea..ace24d7 100644 --- a/cxxheaderparser/gentest.py +++ b/cxxheaderparser/gentest.py @@ -6,6 +6,7 @@ import subprocess import typing from .errors import CxxParseError +from .preprocessor import make_pcpp_preprocessor from .options import ParserOptions from .simple import parse_string, ParsedData @@ -49,14 +50,23 @@ def nondefault_repr(data: ParsedData) -> str: return _inner_repr(data) -def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> None: +def gentest( + infile: str, name: str, outfile: str, verbose: bool, fail: bool, pcpp: bool +) -> None: # Goal is to allow making a unit test as easy as running this dumper # on a file and copy/pasting this into a test with open(infile, "r") as fp: content = fp.read() + maybe_options = "" + popt = "" + options = ParserOptions(verbose=verbose) + if options: + options.preprocessor = make_pcpp_preprocessor() + maybe_options = "options = ParserOptions(preprocessor=make_pcpp_preprocessor())" + popt = ", options=options" try: data = parse_string(content, options=options) @@ -74,15 +84,17 @@ def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> if not fail: stmt = nondefault_repr(data) stmt = f""" - data = parse_string(content, cleandoc=True) + {maybe_options} + data = parse_string(content, cleandoc=True{popt}) assert data == {stmt} """ else: stmt = f""" + {maybe_options} err = {repr(err)} with pytest.raises(CxxParseError, match=re.escape(err)): - parse_string(content, cleandoc=True) + parse_string(content, cleandoc=True{popt}) """ content = ("\n" + content.strip()).replace("\n", "\n ") @@ -113,6 +125,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("header") parser.add_argument("name", nargs="?", default="TODO") + parser.add_argument("--pcpp", default=False, action="store_true") parser.add_argument("-v", "--verbose", default=False, action="store_true") parser.add_argument("-o", "--output", default="-") parser.add_argument( @@ -120,4 +133,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - gentest(args.header, args.name, args.output, args.verbose, args.fail) + gentest(args.header, args.name, args.output, args.verbose, args.fail, args.pcpp) diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index a89c4b1..4b6aabe 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -3,6 +3,7 @@ Contains optional preprocessor support via pcpp """ import io +import os from os.path import relpath import typing from .options import PreprocessorFunction @@ -15,9 +16,10 @@ class PreprocessorError(Exception): class _CustomPreprocessor(Preprocessor): - def __init__(self): + def __init__(self, encoding: typing.Optional[str]): Preprocessor.__init__(self) - self.errors = [] + self.errors: typing.List[str] = [] + self.assume_encoding = encoding def on_error(self, file, line, msg): self.errors.append(f"{file}:{line} error: {msg}") @@ -34,21 +36,15 @@ def _filter_self(fname: str, fp: typing.TextIO) -> str: # isn't what a typical user of cxxheaderparser would want, so we strip out # the line directives and any content that isn't in our original file - # Compute the filename to match based on how pcpp does it - try: - relfname = relpath(fname) - except Exception: - relfname = fname - relfname = relfname.replace("\\", "/") - - relfname += '"\n' + # pcpp always emits line directives that match whatever is passed in to it + line_ending = f'{fname}"\n' new_output = io.StringIO() keep = True for line in fp: if line.startswith("#line"): - keep = line.endswith(relfname) + keep = line.endswith(line_ending) if keep: new_output.write(line) @@ -62,11 +58,14 @@ def make_pcpp_preprocessor( defines: typing.List[str] = [], include_paths: typing.List[str] = [], retain_all_content: bool = False, + encoding: typing.Optional[str] = None, ) -> PreprocessorFunction: """ Creates a preprocessor function that uses pcpp (which must be installed separately) to preprocess the input text. + :param encoding: If specified any include files are opened with this encoding + .. code-block:: python pp = make_pcpp_preprocessor() @@ -77,7 +76,7 @@ def make_pcpp_preprocessor( """ def _preprocess_file(filename: str, content: str) -> str: - pp = _CustomPreprocessor() + pp = _CustomPreprocessor(encoding) if include_paths: for p in include_paths: pp.add_path(p) diff --git a/cxxheaderparser/simple.py b/cxxheaderparser/simple.py index 6d1d1c1..1670e6d 100644 --- a/cxxheaderparser/simple.py +++ b/cxxheaderparser/simple.py @@ -24,6 +24,7 @@ See below for the contents of the returned :class:`ParsedData`. """ +import os import sys import inspect import typing @@ -344,7 +345,7 @@ def parse_string( def parse_file( - filename: str, + filename: typing.Union[str, os.PathLike], encoding: typing.Optional[str] = None, *, options: typing.Optional[ParserOptions] = None, @@ -352,6 +353,7 @@ def parse_file( """ Simple function to parse a header from a file and return a data structure """ + filename = os.fsdecode(filename) if encoding is None: encoding = "utf-8-sig" diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py new file mode 100644 index 0000000..b0d87e9 --- /dev/null +++ b/tests/test_preprocessor.py @@ -0,0 +1,96 @@ +import pathlib + +from cxxheaderparser.options import ParserOptions +from cxxheaderparser.preprocessor import make_pcpp_preprocessor +from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_file, parse_string +from cxxheaderparser.types import ( + FundamentalSpecifier, + NameSpecifier, + PQName, + Token, + Type, + Value, + Variable, +) + + +def test_basic_preprocessor() -> None: + content = """ + #define X 1 + int x = X; + """ + options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + data = parse_string(content, cleandoc=True, options=options) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="1")]), + ) + ] + ) + ) + + +def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: + """Ensure that content in other headers is omitted""" + h_content = '#include "t2.h"' "\n" "int x = X;\n" + h2_content = "#define X 2\n" "int omitted = 1;\n" + + with open(tmp_path / "t1.h", "w") as fp: + fp.write(h_content) + + with open(tmp_path / "t2.h", "w") as fp: + fp.write(h2_content) + + options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + data = parse_file(tmp_path / "t1.h", options=options) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="2")]), + ) + ] + ) + ) + + +def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: + """Ensure we can handle alternate encodings""" + h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n" + + h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n" + + with open(tmp_path / "t1.h", "wb") as fp: + fp.write(h_content) + + with open(tmp_path / "t2.h", "wb") as fp: + fp.write(h2_content) + + options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252")) + data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252") + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="3")]), + ) + ] + ) + )