From 9dd573e43321d3c9989b1da94bc43cdb2e1142ef Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 8 Oct 2023 00:56:25 -0400 Subject: [PATCH 1/2] Make pcpp more optional --- cxxheaderparser/preprocessor.py | 44 ++++++++++++++++----------- tests/test_preprocessor.py | 53 +++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index e1c720c..30f0fbc 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -8,32 +8,37 @@ import os import typing from .options import PreprocessorFunction -from pcpp import Preprocessor, OutputDirective, Action - class PreprocessorError(Exception): pass -class _CustomPreprocessor(Preprocessor): - def __init__( - self, - encoding: typing.Optional[str], - passthru_includes: typing.Optional["re.Pattern"], - ): - Preprocessor.__init__(self) - self.errors: typing.List[str] = [] - self.assume_encoding = encoding - self.passthru_includes = passthru_includes +try: + import pcpp + from pcpp import Preprocessor, OutputDirective, Action - def on_error(self, file, line, msg): - self.errors.append(f"{file}:{line} error: {msg}") + class _CustomPreprocessor(Preprocessor): + def __init__( + self, + encoding: typing.Optional[str], + passthru_includes: typing.Optional["re.Pattern"], + ): + Preprocessor.__init__(self) + self.errors: typing.List[str] = [] + self.assume_encoding = encoding + self.passthru_includes = passthru_includes - def on_include_not_found(self, *ignored): - raise OutputDirective(Action.IgnoreAndPassThrough) + def on_error(self, file, line, msg): + self.errors.append(f"{file}:{line} error: {msg}") - def on_comment(self, *ignored): - return True + def on_include_not_found(self, *ignored): + raise OutputDirective(Action.IgnoreAndPassThrough) + + def on_comment(self, *ignored): + return True + +except ImportError: + pcpp = None def _filter_self(fname: str, fp: typing.TextIO) -> str: @@ -82,6 +87,9 @@ def make_pcpp_preprocessor( """ + if pcpp is None: + raise PreprocessorError("pcpp is not installed") + def _preprocess_file(filename: str, content: str) -> str: pp = _CustomPreprocessor(encoding, passthru_includes) if include_paths: diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 2e0e591..f7d775b 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -1,9 +1,13 @@ import os import pathlib +import pytest import re +import shutil +import subprocess +import typing -from cxxheaderparser.options import ParserOptions -from cxxheaderparser.preprocessor import make_pcpp_preprocessor +from cxxheaderparser.options import ParserOptions, PreprocessorFunction +from cxxheaderparser import preprocessor from cxxheaderparser.simple import ( NamespaceScope, ParsedData, @@ -22,12 +26,26 @@ from cxxheaderparser.types import ( ) -def test_basic_preprocessor() -> None: +@pytest.fixture(params=["pcpp"]) +def make_pp(request) -> typing.Callable[..., PreprocessorFunction]: + param = request.param + if param == "pcpp": + if preprocessor.pcpp is None: + pytest.skip("pcpp not installed") + return preprocessor.make_pcpp_preprocessor + else: + assert False + + +def test_basic_preprocessor( + make_pp: typing.Callable[..., PreprocessorFunction] +) -> None: content = """ #define X 1 int x = X; """ - options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + + options = ParserOptions(preprocessor=make_pp()) data = parse_string(content, cleandoc=True, options=options) assert data == ParsedData( @@ -45,7 +63,10 @@ def test_basic_preprocessor() -> None: ) -def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: +def test_preprocessor_omit_content( + make_pp: typing.Callable[..., PreprocessorFunction], + tmp_path: pathlib.Path, +) -> None: """Ensure that content in other headers is omitted""" h_content = '#include "t2.h"' "\n" "int x = X;\n" h2_content = "#define X 2\n" "int omitted = 1;\n" @@ -56,7 +77,7 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: with open(tmp_path / "t2.h", "w") as fp: fp.write(h2_content) - options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + options = ParserOptions(preprocessor=make_pp()) data = parse_file(tmp_path / "t1.h", options=options) assert data == ParsedData( @@ -74,7 +95,10 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: ) -def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None: +def test_preprocessor_omit_content2( + make_pp: typing.Callable[..., PreprocessorFunction], + tmp_path: pathlib.Path, +) -> None: """ Ensure that content in other headers is omitted while handling pcpp relative path quirk @@ -91,9 +115,7 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None: with open(tmp_path2 / "t2.h", "w") as fp: fp.write(h2_content) - options = ParserOptions( - preprocessor=make_pcpp_preprocessor(include_paths=[str(tmp_path)]) - ) + options = ParserOptions(preprocessor=make_pp(include_paths=[str(tmp_path)])) # Weirdness happens here os.chdir(tmp_path) @@ -114,7 +136,9 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None: ) -def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: +def test_preprocessor_encoding( + make_pp: typing.Callable[..., PreprocessorFunction], tmp_path: pathlib.Path +) -> None: """Ensure we can handle alternate encodings""" h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n" @@ -126,7 +150,7 @@ def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: with open(tmp_path / "t2.h", "wb") as fp: fp.write(h2_content) - options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252")) + options = ParserOptions(preprocessor=make_pp(encoding="cp1252")) data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252") assert data == ParsedData( @@ -144,6 +168,7 @@ def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: ) +@pytest.mark.skipif(preprocessor.pcpp is None, reason="pcpp not installed") def test_preprocessor_passthru_includes(tmp_path: pathlib.Path) -> None: """Ensure that all #include pass through""" h_content = '#include "t2.h"\n' @@ -155,7 +180,9 @@ def test_preprocessor_passthru_includes(tmp_path: pathlib.Path) -> None: fp.write("") options = ParserOptions( - preprocessor=make_pcpp_preprocessor(passthru_includes=re.compile(".+")) + preprocessor=preprocessor.make_pcpp_preprocessor( + passthru_includes=re.compile(".+") + ) ) data = parse_file(tmp_path / "t1.h", options=options) From 8f9e8626af75bd252a125a6ca4a636b045a014e1 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 8 Oct 2023 01:00:55 -0400 Subject: [PATCH 2/2] Add GCC compatible preprocessing function --- cxxheaderparser/lexer.py | 6 +- cxxheaderparser/preprocessor.py | 107 +++++++++++++++++++++++++++++++- tests/test_preprocessor.py | 11 +++- 3 files changed, 116 insertions(+), 8 deletions(-) diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index af7769f..341ef76 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -17,7 +17,7 @@ if sys.version_info >= (3, 8): else: Protocol = object -_line_re = re.compile(r'^\#[\t ]*line (\d+) "(.*)"') +_line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"') _multicomment_re = re.compile("\n[\\s]+\\*") @@ -448,8 +448,8 @@ class PlyLexer: # handle line macros m = _line_re.match(t.value) if m: - self.filename = m.group(2) - self.line_offset = 1 + self.lex.lineno - int(m.group(1)) + self.filename = m.group(3) + self.line_offset = 1 + self.lex.lineno - int(m.group(2)) return None # ignore C++23 warning directive if t.value.startswith("#warning"): diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index 30f0fbc..1e5719b 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -1,11 +1,14 @@ """ -Contains optional preprocessor support via pcpp +Contains optional preprocessor support functions """ import io import re import os +import subprocess +import sys import typing + from .options import PreprocessorFunction @@ -13,6 +16,97 @@ class PreprocessorError(Exception): pass +# +# GCC preprocessor support +# + + +def _gcc_filter(fname: str, fp: typing.TextIO) -> str: + new_output = io.StringIO() + keep = True + fname = fname.replace("\\", "\\\\") + + for line in fp: + if line.startswith("# "): + last_quote = line.rfind('"') + if last_quote != -1: + keep = line[:last_quote].endswith(fname) + + if keep: + new_output.write(line) + + new_output.seek(0) + return new_output.read() + + +def make_gcc_preprocessor( + *, + defines: typing.List[str] = [], + include_paths: typing.List[str] = [], + retain_all_content: bool = False, + encoding: typing.Optional[str] = None, + gcc_args: typing.List[str] = ["g++"], + print_cmd: bool = True, +) -> PreprocessorFunction: + """ + Creates a preprocessor function that uses g++ to preprocess the input text. + + gcc is a high performance and accurate precompiler, but if an #include + directive can't be resolved or other oddity exists in your input it will + throw an error. + + :param defines: list of #define macros specified as "key value" + :param include_paths: list of directories to search for included files + :param retain_all_content: If False, only the parsed file content will be retained + :param encoding: If specified any include files are opened with this encoding + :param gcc_args: This is the path to G++ and any extra args you might want + :param print_cmd: Prints the gcc command as its executed + + .. code-block:: python + + pp = make_gcc_preprocessor() + options = ParserOptions(preprocessor=pp) + + parse_file(content, options=options) + + """ + + if not encoding: + encoding = "utf-8" + + def _preprocess_file(filename: str, content: str) -> str: + cmd = gcc_args + ["-w", "-E", "-C"] + + for p in include_paths: + cmd.append(f"-I{p}") + for d in defines: + cmd.append(f"-D{d.replace(' ', '=')}") + + kwargs = {"encoding": encoding} + if filename == "": + cmd.append("-") + filename = "" + kwargs["input"] = content + else: + cmd.append(filename) + + if print_cmd: + print("+", " ".join(cmd), file=sys.stderr) + + result: str = subprocess.check_output(cmd, **kwargs) # type: ignore + if not retain_all_content: + result = _gcc_filter(filename, io.StringIO(result)) + + return result + + return _preprocess_file + + +# +# PCPP preprocessor support (not installed by default) +# + + try: import pcpp from pcpp import Preprocessor, OutputDirective, Action @@ -41,7 +135,7 @@ except ImportError: pcpp = None -def _filter_self(fname: str, fp: typing.TextIO) -> str: +def _pcpp_filter(fname: str, fp: typing.TextIO) -> str: # the output of pcpp includes the contents of all the included files, which # isn't what a typical user of cxxheaderparser would want, so we strip out # the line directives and any content that isn't in our original file @@ -74,6 +168,13 @@ def make_pcpp_preprocessor( Creates a preprocessor function that uses pcpp (which must be installed separately) to preprocess the input text. + If missing #include files are encountered, this preprocessor will ignore the + error. This preprocessor is pure python so it's very portable, and is a good + choice if performance isn't critical. + + :param defines: list of #define macros specified as "key value" + :param include_paths: list of directories to search for included files + :param retain_all_content: If False, only the parsed file content will be retained :param encoding: If specified any include files are opened with this encoding :param passthru_includes: If specified any #include directives that match the compiled regex pattern will be part of the output. @@ -127,6 +228,6 @@ def make_pcpp_preprocessor( filename = filename.replace(os.sep, "/") break - return _filter_self(filename, fp) + return _pcpp_filter(filename, fp) return _preprocess_file diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index f7d775b..e54f86e 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -26,10 +26,17 @@ from cxxheaderparser.types import ( ) -@pytest.fixture(params=["pcpp"]) +@pytest.fixture(params=["gcc", "pcpp"]) def make_pp(request) -> typing.Callable[..., PreprocessorFunction]: param = request.param - if param == "pcpp": + if param == "gcc": + gcc_path = shutil.which("g++") + if not gcc_path: + pytest.skip("g++ not found") + + subprocess.run([gcc_path, "--version"]) + return preprocessor.make_gcc_preprocessor + elif param == "pcpp": if preprocessor.pcpp is None: pytest.skip("pcpp not installed") return preprocessor.make_pcpp_preprocessor