From a13cdf4f67749435f5c5acf35a4a100c87983853 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 1 Sep 2023 20:37:14 -0400 Subject: [PATCH] Provide mechanism to specify preprocessor file encoding --- cxxheaderparser/dump.py | 7 +++++-- cxxheaderparser/preprocessor.py | 8 ++++++-- tests/test_preprocessor.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/cxxheaderparser/dump.py b/cxxheaderparser/dump.py index d4da431..296993e 100644 --- a/cxxheaderparser/dump.py +++ b/cxxheaderparser/dump.py @@ -26,6 +26,9 @@ def dumpmain() -> None: parser.add_argument( "--pcpp", default=False, action="store_true", help="Use pcpp preprocessor" ) + parser.add_argument( + "--encoding", default=None, help="Use this encoding to open the file" + ) args = parser.parse_args() @@ -33,10 +36,10 @@ def dumpmain() -> None: if args.pcpp: from .preprocessor import make_pcpp_preprocessor - preprocessor = make_pcpp_preprocessor() + preprocessor = make_pcpp_preprocessor(encoding=args.encoding) options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor) - data = parse_file(args.header, options=options) + data = parse_file(args.header, encoding=args.encoding, options=options) if args.mode == "pprint": ddata = dataclasses.asdict(data) diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index f732ef2..4b6aabe 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -16,9 +16,10 @@ class PreprocessorError(Exception): class _CustomPreprocessor(Preprocessor): - def __init__(self): + def __init__(self, encoding: typing.Optional[str]): Preprocessor.__init__(self) self.errors: typing.List[str] = [] + self.assume_encoding = encoding def on_error(self, file, line, msg): self.errors.append(f"{file}:{line} error: {msg}") @@ -57,11 +58,14 @@ def make_pcpp_preprocessor( defines: typing.List[str] = [], include_paths: typing.List[str] = [], retain_all_content: bool = False, + encoding: typing.Optional[str] = None, ) -> PreprocessorFunction: """ Creates a preprocessor function that uses pcpp (which must be installed separately) to preprocess the input text. + :param encoding: If specified any include files are opened with this encoding + .. code-block:: python pp = make_pcpp_preprocessor() @@ -72,7 +76,7 @@ def make_pcpp_preprocessor( """ def _preprocess_file(filename: str, content: str) -> str: - pp = _CustomPreprocessor() + pp = _CustomPreprocessor(encoding) if include_paths: for p in include_paths: pp.add_path(p) diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 8e9ae02..b0d87e9 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -64,3 +64,33 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: ] ) ) + + +def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: + """Ensure we can handle alternate encodings""" + h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n" + + h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n" + + with open(tmp_path / "t1.h", "wb") as fp: + fp.write(h_content) + + with open(tmp_path / "t2.h", "wb") as fp: + fp.write(h2_content) + + options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252")) + data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252") + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="x")]), + type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + value=Value(tokens=[Token(value="3")]), + ) + ] + ) + )