@@ -174,6 +174,41 @@ def _is_printable_ascii(argstr: str) -> bool:
174
174
return all (32 <= ord (c ) <= 126 for c in argstr )
175
175
176
176
177
+ def _contains_apostrophe_or_backtick (argstr : str ) -> bool :
178
+ """
179
+ Check if a string contains apostrophe (') or backtick (`).
180
+
181
+ For typographical reasons, apostrophe (') and backtick (`) are mapped to left and
182
+ right single quotation marks (‘ and ’) in Adobe ISOLatin1+ encoding. To ensure that
183
+ what you type is what you get (issue #3476), they need special handling in the
184
+ ``_check_encoding`` and ``non_ascii_to_octal`` functions. More specifically, a
185
+ string containing printable ASCII characters with apostrophe (') and backtick (`)
186
+ will not be considered as "ascii" encoding.
187
+
188
+ Parameters
189
+ ----------
190
+ argstr
191
+ The string to be checked.
192
+
193
+ Returns
194
+ -------
195
+ ``True`` if the string contains apostrophe (') or backtick (`). Otherwise, return
196
+ ``False``.
197
+
198
+ Examples
199
+ --------
200
+ >>> _contains_apostrophe_or_backtick("12AB±β①②")
201
+ False
202
+ >>> _contains_apostrophe_or_backtick("12AB`")
203
+ True
204
+ >>> _contains_apostrophe_or_backtick("12AB'")
205
+ True
206
+ >>> _contains_apostrophe_or_backtick("12AB'`")
207
+ True
208
+ """ # noqa: RUF002
209
+ return "'" in argstr or "`" in argstr
210
+
211
+
177
212
def _check_encoding (argstr : str ) -> Encoding :
178
213
"""
179
214
Check the charset encoding of a string.
@@ -206,8 +241,9 @@ def _check_encoding(argstr: str) -> Encoding:
206
241
>>> _check_encoding("123AB中文") # Characters not in any charset encoding
207
242
'ISOLatin1+'
208
243
"""
209
- # Return "ascii" if the string only contains printable ASCII characters.
210
- if _is_printable_ascii (argstr ):
244
+ # Return "ascii" if the string only contains printable ASCII characters, excluding
245
+ # apostrophe (') and backtick (`).
246
+ if _is_printable_ascii (argstr ) and not _contains_apostrophe_or_backtick (argstr ):
211
247
return "ascii"
212
248
# Loop through all supported encodings and check if all characters in the string
213
249
# are in the charset of the encoding. If all characters are in the charset, return
@@ -402,9 +438,14 @@ def non_ascii_to_octal(argstr: str, encoding: Encoding = "ISOLatin1+") -> str:
402
438
'ABC \\261120\\260 DEF @~\\141@~ @%34%\\252@%%'
403
439
>>> non_ascii_to_octal("12ABāáâãäåβ①②", encoding="ISO-8859-4")
404
440
'12AB\\340\\341\\342\\343\\344\\345@~\\142@~@%34%\\254@%%@%34%\\255@%%'
441
+ >>> non_ascii_to_octal("'‘’\"“”")
442
+ '\\234\\140\\047"\\216\\217'
405
443
""" # noqa: RUF002
406
- # Return the input string if it only contains printable ASCII characters.
407
- if encoding == "ascii" or _is_printable_ascii (argstr ):
444
+ # Return the input string if it only contains printable ASCII characters, excluding
445
+ # apostrophe (') and backtick (`).
446
+ if encoding == "ascii" or (
447
+ _is_printable_ascii (argstr ) and not _contains_apostrophe_or_backtick (argstr )
448
+ ):
408
449
return argstr
409
450
410
451
# Dictionary mapping non-ASCII characters to octal codes
@@ -420,6 +461,11 @@ def non_ascii_to_octal(argstr: str, encoding: Encoding = "ISOLatin1+") -> str:
420
461
421
462
# Remove any printable characters.
422
463
mapping = {k : v for k , v in mapping .items () if k not in string .printable }
464
+
465
+ if encoding == "ISOLatin1+" :
466
+ # Map apostrophe (') and backtick (`) to correct octal codes.
467
+ # See _contains_apostrophe_or_backtick() for explanations.
468
+ mapping .update ({"'" : "\\ 234" , "`" : "\\ 221" })
423
469
return argstr .translate (str .maketrans (mapping ))
424
470
425
471
@@ -465,16 +511,12 @@ def build_arg_list( # noqa: PLR0912
465
511
['-A', '-D0', '-E200', '-F', '-G1/2/3/4']
466
512
>>> build_arg_list(dict(A="1/2/3/4", B=["xaf", "yaf", "WSen"], C=("1p", "2p")))
467
513
['-A1/2/3/4', '-BWSen', '-Bxaf', '-Byaf', '-C1p', '-C2p']
468
- >>> print(
469
- ... build_arg_list(
470
- ... dict(
471
- ... B=["af", "WSne+tBlank Space"],
472
- ... F='+t"Empty Spaces"',
473
- ... l="'Void Space'",
474
- ... )
475
- ... )
476
- ... )
477
- ['-BWSne+tBlank Space', '-Baf', '-F+t"Empty Spaces"', "-l'Void Space'"]
514
+ >>> build_arg_list(dict(B=["af", "WSne+tBlank Space"]))
515
+ ['-BWSne+tBlank Space', '-Baf']
516
+ >>> build_arg_list(dict(F='+t"Empty Spaces"'))
517
+ ['-F+t"Empty Spaces"']
518
+ >>> build_arg_list(dict(l="'Void Space'"))
519
+ ['-l\\234Void Space\\234', '--PS_CHAR_ENCODING=ISOLatin1+']
478
520
>>> print(
479
521
... build_arg_list(
480
522
... dict(A="0", B=True, C="rainbow"),
0 commit comments