Provide mechanism to specify preprocessor file encoding

This commit is contained in:
Dustin Spicuzza 2023-09-01 20:37:14 -04:00
parent de4d06defe
commit a13cdf4f67
3 changed files with 41 additions and 4 deletions

View File

@ -26,6 +26,9 @@ def dumpmain() -> None:
parser.add_argument( parser.add_argument(
"--pcpp", default=False, action="store_true", help="Use pcpp preprocessor" "--pcpp", default=False, action="store_true", help="Use pcpp preprocessor"
) )
parser.add_argument(
"--encoding", default=None, help="Use this encoding to open the file"
)
args = parser.parse_args() args = parser.parse_args()
@ -33,10 +36,10 @@ def dumpmain() -> None:
if args.pcpp: if args.pcpp:
from .preprocessor import make_pcpp_preprocessor from .preprocessor import make_pcpp_preprocessor
preprocessor = make_pcpp_preprocessor() preprocessor = make_pcpp_preprocessor(encoding=args.encoding)
options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor) options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor)
data = parse_file(args.header, options=options) data = parse_file(args.header, encoding=args.encoding, options=options)
if args.mode == "pprint": if args.mode == "pprint":
ddata = dataclasses.asdict(data) ddata = dataclasses.asdict(data)

View File

@ -16,9 +16,10 @@ class PreprocessorError(Exception):
class _CustomPreprocessor(Preprocessor): class _CustomPreprocessor(Preprocessor):
def __init__(self): def __init__(self, encoding: typing.Optional[str]):
Preprocessor.__init__(self) Preprocessor.__init__(self)
self.errors: typing.List[str] = [] self.errors: typing.List[str] = []
self.assume_encoding = encoding
def on_error(self, file, line, msg): def on_error(self, file, line, msg):
self.errors.append(f"{file}:{line} error: {msg}") self.errors.append(f"{file}:{line} error: {msg}")
@ -57,11 +58,14 @@ def make_pcpp_preprocessor(
defines: typing.List[str] = [], defines: typing.List[str] = [],
include_paths: typing.List[str] = [], include_paths: typing.List[str] = [],
retain_all_content: bool = False, retain_all_content: bool = False,
encoding: typing.Optional[str] = None,
) -> PreprocessorFunction: ) -> PreprocessorFunction:
""" """
Creates a preprocessor function that uses pcpp (which must be installed Creates a preprocessor function that uses pcpp (which must be installed
separately) to preprocess the input text. separately) to preprocess the input text.
:param encoding: If specified any include files are opened with this encoding
.. code-block:: python .. code-block:: python
pp = make_pcpp_preprocessor() pp = make_pcpp_preprocessor()
@ -72,7 +76,7 @@ def make_pcpp_preprocessor(
""" """
def _preprocess_file(filename: str, content: str) -> str: def _preprocess_file(filename: str, content: str) -> str:
pp = _CustomPreprocessor() pp = _CustomPreprocessor(encoding)
if include_paths: if include_paths:
for p in include_paths: for p in include_paths:
pp.add_path(p) pp.add_path(p)

View File

@ -64,3 +64,33 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
] ]
) )
) )
def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
"""Ensure we can handle alternate encodings"""
h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n"
h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n"
with open(tmp_path / "t1.h", "wb") as fp:
fp.write(h_content)
with open(tmp_path / "t2.h", "wb") as fp:
fp.write(h2_content)
options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252"))
data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252")
assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="x")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
value=Value(tokens=[Token(value="3")]),
)
]
)
)