Merge pull request #63 from robotpy/pp-encoding

Variety of preprocessor related fixes
This commit is contained in:
Dustin Spicuzza 2023-09-03 18:54:28 -04:00 committed by GitHub
commit a110a5508b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 133 additions and 20 deletions

View File

@ -81,7 +81,7 @@ jobs:
run: python setup.py bdist_wheel run: python setup.py bdist_wheel
- name: Install test dependencies - name: Install test dependencies
run: python -m pip --disable-pip-version-check install pytest run: python -m pip --disable-pip-version-check install pytest pcpp
- name: Test wheel - name: Test wheel
shell: bash shell: bash

View File

@ -26,6 +26,9 @@ def dumpmain() -> None:
parser.add_argument( parser.add_argument(
"--pcpp", default=False, action="store_true", help="Use pcpp preprocessor" "--pcpp", default=False, action="store_true", help="Use pcpp preprocessor"
) )
parser.add_argument(
"--encoding", default=None, help="Use this encoding to open the file"
)
args = parser.parse_args() args = parser.parse_args()
@ -33,10 +36,10 @@ def dumpmain() -> None:
if args.pcpp: if args.pcpp:
from .preprocessor import make_pcpp_preprocessor from .preprocessor import make_pcpp_preprocessor
preprocessor = make_pcpp_preprocessor() preprocessor = make_pcpp_preprocessor(encoding=args.encoding)
options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor) options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor)
data = parse_file(args.header, options=options) data = parse_file(args.header, encoding=args.encoding, options=options)
if args.mode == "pprint": if args.mode == "pprint":
ddata = dataclasses.asdict(data) ddata = dataclasses.asdict(data)

View File

@ -6,6 +6,7 @@ import subprocess
import typing import typing
from .errors import CxxParseError from .errors import CxxParseError
from .preprocessor import make_pcpp_preprocessor
from .options import ParserOptions from .options import ParserOptions
from .simple import parse_string, ParsedData from .simple import parse_string, ParsedData
@ -49,14 +50,23 @@ def nondefault_repr(data: ParsedData) -> str:
return _inner_repr(data) return _inner_repr(data)
def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> None: def gentest(
infile: str, name: str, outfile: str, verbose: bool, fail: bool, pcpp: bool
) -> None:
# Goal is to allow making a unit test as easy as running this dumper # Goal is to allow making a unit test as easy as running this dumper
# on a file and copy/pasting this into a test # on a file and copy/pasting this into a test
with open(infile, "r") as fp: with open(infile, "r") as fp:
content = fp.read() content = fp.read()
maybe_options = ""
popt = ""
options = ParserOptions(verbose=verbose) options = ParserOptions(verbose=verbose)
if options:
options.preprocessor = make_pcpp_preprocessor()
maybe_options = "options = ParserOptions(preprocessor=make_pcpp_preprocessor())"
popt = ", options=options"
try: try:
data = parse_string(content, options=options) data = parse_string(content, options=options)
@ -74,15 +84,17 @@ def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) ->
if not fail: if not fail:
stmt = nondefault_repr(data) stmt = nondefault_repr(data)
stmt = f""" stmt = f"""
data = parse_string(content, cleandoc=True) {maybe_options}
data = parse_string(content, cleandoc=True{popt})
assert data == {stmt} assert data == {stmt}
""" """
else: else:
stmt = f""" stmt = f"""
{maybe_options}
err = {repr(err)} err = {repr(err)}
with pytest.raises(CxxParseError, match=re.escape(err)): with pytest.raises(CxxParseError, match=re.escape(err)):
parse_string(content, cleandoc=True) parse_string(content, cleandoc=True{popt})
""" """
content = ("\n" + content.strip()).replace("\n", "\n ") content = ("\n" + content.strip()).replace("\n", "\n ")
@ -113,6 +125,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("header") parser.add_argument("header")
parser.add_argument("name", nargs="?", default="TODO") parser.add_argument("name", nargs="?", default="TODO")
parser.add_argument("--pcpp", default=False, action="store_true")
parser.add_argument("-v", "--verbose", default=False, action="store_true") parser.add_argument("-v", "--verbose", default=False, action="store_true")
parser.add_argument("-o", "--output", default="-") parser.add_argument("-o", "--output", default="-")
parser.add_argument( parser.add_argument(
@ -120,4 +133,4 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
gentest(args.header, args.name, args.output, args.verbose, args.fail) gentest(args.header, args.name, args.output, args.verbose, args.fail, args.pcpp)

View File

@ -3,6 +3,7 @@ Contains optional preprocessor support via pcpp
""" """
import io import io
import os
from os.path import relpath from os.path import relpath
import typing import typing
from .options import PreprocessorFunction from .options import PreprocessorFunction
@ -15,9 +16,10 @@ class PreprocessorError(Exception):
class _CustomPreprocessor(Preprocessor): class _CustomPreprocessor(Preprocessor):
def __init__(self): def __init__(self, encoding: typing.Optional[str]):
Preprocessor.__init__(self) Preprocessor.__init__(self)
self.errors = [] self.errors: typing.List[str] = []
self.assume_encoding = encoding
def on_error(self, file, line, msg): def on_error(self, file, line, msg):
self.errors.append(f"{file}:{line} error: {msg}") self.errors.append(f"{file}:{line} error: {msg}")
@ -34,21 +36,15 @@ def _filter_self(fname: str, fp: typing.TextIO) -> str:
# isn't what a typical user of cxxheaderparser would want, so we strip out # isn't what a typical user of cxxheaderparser would want, so we strip out
# the line directives and any content that isn't in our original file # the line directives and any content that isn't in our original file
# Compute the filename to match based on how pcpp does it # pcpp always emits line directives that match whatever is passed in to it
try: line_ending = f'{fname}"\n'
relfname = relpath(fname)
except Exception:
relfname = fname
relfname = relfname.replace("\\", "/")
relfname += '"\n'
new_output = io.StringIO() new_output = io.StringIO()
keep = True keep = True
for line in fp: for line in fp:
if line.startswith("#line"): if line.startswith("#line"):
keep = line.endswith(relfname) keep = line.endswith(line_ending)
if keep: if keep:
new_output.write(line) new_output.write(line)
@ -62,11 +58,14 @@ def make_pcpp_preprocessor(
defines: typing.List[str] = [], defines: typing.List[str] = [],
include_paths: typing.List[str] = [], include_paths: typing.List[str] = [],
retain_all_content: bool = False, retain_all_content: bool = False,
encoding: typing.Optional[str] = None,
) -> PreprocessorFunction: ) -> PreprocessorFunction:
""" """
Creates a preprocessor function that uses pcpp (which must be installed Creates a preprocessor function that uses pcpp (which must be installed
separately) to preprocess the input text. separately) to preprocess the input text.
:param encoding: If specified any include files are opened with this encoding
.. code-block:: python .. code-block:: python
pp = make_pcpp_preprocessor() pp = make_pcpp_preprocessor()
@ -77,7 +76,7 @@ def make_pcpp_preprocessor(
""" """
def _preprocess_file(filename: str, content: str) -> str: def _preprocess_file(filename: str, content: str) -> str:
pp = _CustomPreprocessor() pp = _CustomPreprocessor(encoding)
if include_paths: if include_paths:
for p in include_paths: for p in include_paths:
pp.add_path(p) pp.add_path(p)

View File

@ -24,6 +24,7 @@ See below for the contents of the returned :class:`ParsedData`.
""" """
import os
import sys import sys
import inspect import inspect
import typing import typing
@ -344,7 +345,7 @@ def parse_string(
def parse_file( def parse_file(
filename: str, filename: typing.Union[str, os.PathLike],
encoding: typing.Optional[str] = None, encoding: typing.Optional[str] = None,
*, *,
options: typing.Optional[ParserOptions] = None, options: typing.Optional[ParserOptions] = None,
@ -352,6 +353,7 @@ def parse_file(
""" """
Simple function to parse a header from a file and return a data structure Simple function to parse a header from a file and return a data structure
""" """
filename = os.fsdecode(filename)
if encoding is None: if encoding is None:
encoding = "utf-8-sig" encoding = "utf-8-sig"

View File

@ -0,0 +1,96 @@
import pathlib
from cxxheaderparser.options import ParserOptions
from cxxheaderparser.preprocessor import make_pcpp_preprocessor
from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_file, parse_string
from cxxheaderparser.types import (
FundamentalSpecifier,
NameSpecifier,
PQName,
Token,
Type,
Value,
Variable,
)
def test_basic_preprocessor() -> None:
content = """
#define X 1
int x = X;
"""
options = ParserOptions(preprocessor=make_pcpp_preprocessor())
data = parse_string(content, cleandoc=True, options=options)
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="1")]),
)
]
)
)
def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
"""Ensure that content in other headers is omitted"""
h_content = '#include "t2.h"' "\n" "int x = X;\n"
h2_content = "#define X 2\n" "int omitted = 1;\n"
with open(tmp_path / "t1.h", "w") as fp:
fp.write(h_content)
with open(tmp_path / "t2.h", "w") as fp:
fp.write(h2_content)
options = ParserOptions(preprocessor=make_pcpp_preprocessor())
data = parse_file(tmp_path / "t1.h", options=options)
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="2")]),
)
]
)
)
def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
"""Ensure we can handle alternate encodings"""
h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n"
h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n"
with open(tmp_path / "t1.h", "wb") as fp:
fp.write(h_content)
with open(tmp_path / "t2.h", "wb") as fp:
fp.write(h2_content)
options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252"))
data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252")
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="3")]),
)
]
)
)