Merge pull request #63 from robotpy/pp-encoding

Variety of preprocessor related fixes
This commit is contained in:
Dustin Spicuzza 2023-09-03 18:54:28 -04:00 committed by GitHub
commit a110a5508b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 133 additions and 20 deletions

View File

@ -81,7 +81,7 @@ jobs:
run: python setup.py bdist_wheel
- name: Install test dependencies
run: python -m pip --disable-pip-version-check install pytest
run: python -m pip --disable-pip-version-check install pytest pcpp
- name: Test wheel
shell: bash

View File

@ -26,6 +26,9 @@ def dumpmain() -> None:
parser.add_argument(
"--pcpp", default=False, action="store_true", help="Use pcpp preprocessor"
)
parser.add_argument(
"--encoding", default=None, help="Use this encoding to open the file"
)
args = parser.parse_args()
@ -33,10 +36,10 @@ def dumpmain() -> None:
if args.pcpp:
from .preprocessor import make_pcpp_preprocessor
preprocessor = make_pcpp_preprocessor()
preprocessor = make_pcpp_preprocessor(encoding=args.encoding)
options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor)
data = parse_file(args.header, options=options)
data = parse_file(args.header, encoding=args.encoding, options=options)
if args.mode == "pprint":
ddata = dataclasses.asdict(data)

View File

@ -6,6 +6,7 @@ import subprocess
import typing
from .errors import CxxParseError
from .preprocessor import make_pcpp_preprocessor
from .options import ParserOptions
from .simple import parse_string, ParsedData
@ -49,14 +50,23 @@ def nondefault_repr(data: ParsedData) -> str:
return _inner_repr(data)
def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> None:
def gentest(
infile: str, name: str, outfile: str, verbose: bool, fail: bool, pcpp: bool
) -> None:
# Goal is to allow making a unit test as easy as running this dumper
# on a file and copy/pasting this into a test
with open(infile, "r") as fp:
content = fp.read()
maybe_options = ""
popt = ""
options = ParserOptions(verbose=verbose)
if options:
options.preprocessor = make_pcpp_preprocessor()
maybe_options = "options = ParserOptions(preprocessor=make_pcpp_preprocessor())"
popt = ", options=options"
try:
data = parse_string(content, options=options)
@ -74,15 +84,17 @@ def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) ->
if not fail:
stmt = nondefault_repr(data)
stmt = f"""
data = parse_string(content, cleandoc=True)
{maybe_options}
data = parse_string(content, cleandoc=True{popt})
assert data == {stmt}
"""
else:
stmt = f"""
{maybe_options}
err = {repr(err)}
with pytest.raises(CxxParseError, match=re.escape(err)):
parse_string(content, cleandoc=True)
parse_string(content, cleandoc=True{popt})
"""
content = ("\n" + content.strip()).replace("\n", "\n ")
@ -113,6 +125,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("header")
parser.add_argument("name", nargs="?", default="TODO")
parser.add_argument("--pcpp", default=False, action="store_true")
parser.add_argument("-v", "--verbose", default=False, action="store_true")
parser.add_argument("-o", "--output", default="-")
parser.add_argument(
@ -120,4 +133,4 @@ if __name__ == "__main__":
)
args = parser.parse_args()
gentest(args.header, args.name, args.output, args.verbose, args.fail)
gentest(args.header, args.name, args.output, args.verbose, args.fail, args.pcpp)

View File

@ -3,6 +3,7 @@ Contains optional preprocessor support via pcpp
"""
import io
import os
from os.path import relpath
import typing
from .options import PreprocessorFunction
@ -15,9 +16,10 @@ class PreprocessorError(Exception):
class _CustomPreprocessor(Preprocessor):
def __init__(self):
def __init__(self, encoding: typing.Optional[str]):
Preprocessor.__init__(self)
self.errors = []
self.errors: typing.List[str] = []
self.assume_encoding = encoding
def on_error(self, file, line, msg):
self.errors.append(f"{file}:{line} error: {msg}")
@ -34,21 +36,15 @@ def _filter_self(fname: str, fp: typing.TextIO) -> str:
# isn't what a typical user of cxxheaderparser would want, so we strip out
# the line directives and any content that isn't in our original file
# Compute the filename to match based on how pcpp does it
try:
relfname = relpath(fname)
except Exception:
relfname = fname
relfname = relfname.replace("\\", "/")
relfname += '"\n'
# pcpp always emits line directives that match whatever is passed in to it
line_ending = f'{fname}"\n'
new_output = io.StringIO()
keep = True
for line in fp:
if line.startswith("#line"):
keep = line.endswith(relfname)
keep = line.endswith(line_ending)
if keep:
new_output.write(line)
@ -62,11 +58,14 @@ def make_pcpp_preprocessor(
defines: typing.List[str] = [],
include_paths: typing.List[str] = [],
retain_all_content: bool = False,
encoding: typing.Optional[str] = None,
) -> PreprocessorFunction:
"""
Creates a preprocessor function that uses pcpp (which must be installed
separately) to preprocess the input text.
:param encoding: If specified any include files are opened with this encoding
.. code-block:: python
pp = make_pcpp_preprocessor()
@ -77,7 +76,7 @@ def make_pcpp_preprocessor(
"""
def _preprocess_file(filename: str, content: str) -> str:
pp = _CustomPreprocessor()
pp = _CustomPreprocessor(encoding)
if include_paths:
for p in include_paths:
pp.add_path(p)

View File

@ -24,6 +24,7 @@ See below for the contents of the returned :class:`ParsedData`.
"""
import os
import sys
import inspect
import typing
@ -344,7 +345,7 @@ def parse_string(
def parse_file(
filename: str,
filename: typing.Union[str, os.PathLike],
encoding: typing.Optional[str] = None,
*,
options: typing.Optional[ParserOptions] = None,
@ -352,6 +353,7 @@ def parse_file(
"""
Simple function to parse a header from a file and return a data structure
"""
filename = os.fsdecode(filename)
if encoding is None:
encoding = "utf-8-sig"

View File

@ -0,0 +1,96 @@
import pathlib
from cxxheaderparser.options import ParserOptions
from cxxheaderparser.preprocessor import make_pcpp_preprocessor
from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_file, parse_string
from cxxheaderparser.types import (
FundamentalSpecifier,
NameSpecifier,
PQName,
Token,
Type,
Value,
Variable,
)
def test_basic_preprocessor() -> None:
content = """
#define X 1
int x = X;
"""
options = ParserOptions(preprocessor=make_pcpp_preprocessor())
data = parse_string(content, cleandoc=True, options=options)
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="1")]),
)
]
)
)
def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
"""Ensure that content in other headers is omitted"""
h_content = '#include "t2.h"' "\n" "int x = X;\n"
h2_content = "#define X 2\n" "int omitted = 1;\n"
with open(tmp_path / "t1.h", "w") as fp:
fp.write(h_content)
with open(tmp_path / "t2.h", "w") as fp:
fp.write(h2_content)
options = ParserOptions(preprocessor=make_pcpp_preprocessor())
data = parse_file(tmp_path / "t1.h", options=options)
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="2")]),
)
]
)
)
def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
"""Ensure we can handle alternate encodings"""
h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n"
h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n"
with open(tmp_path / "t1.h", "wb") as fp:
fp.write(h_content)
with open(tmp_path / "t2.h", "wb") as fp:
fp.write(h2_content)
options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252"))
data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252")
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="3")]),
)
]
)
)