From ca5124c2ae5bc59b4c8987980b9395b6499bd4c8 Mon Sep 17 00:00:00 2001 From: strayge Date: Tue, 25 Jun 2024 00:32:00 +0400 Subject: [PATCH] add --cp option for custom codepages --- README.md | 1 + pylnk3/cli.py | 8 ++++++-- pylnk3/helpers.py | 18 ++++++++++++------ pylnk3/structures/id_list/path.py | 15 +++++++++------ pylnk3/structures/link_info.py | 19 ++++++++++--------- pylnk3/structures/lnk.py | 16 +++++++++------- pylnk3/utils/read_write.py | 10 +++++----- 7 files changed, 52 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 8693b99..a96247a 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ pylnk3 create 'c:\\dir\\file.txt' text.lnk -m Minimized -d "Description" **dev** add `--json` format for `parse` command `Lnk` class now has separated constructors `Lnk.from_bytes()` and `Lnk.from_file()` +add `--cp` option to specify codepage for ANSI strings [FIX] strings with 2-bytes unicode characters now correctly written on save [FIX] allow read/write links with only `This PC` part in target [FIX] allow read completely unknown `ExtraData` blocks diff --git a/pylnk3/cli.py b/pylnk3/cli.py index 8257105..afb8eae 100644 --- a/pylnk3/cli.py +++ b/pylnk3/cli.py @@ -36,6 +36,7 @@ def main() -> None: parser_parse.add_argument('filename', help='lnk filename to read') parser_parse.add_argument('props', nargs='*', help='props path to read') parser_parse.add_argument('--json', '-j', action='store_true', help='output as json') + parser_parse.add_argument('--cp', nargs='?', help='codepage for ANSI strings') parser_create = subparsers.add_parser('create', aliases=['c'], help='create new lnk file') parser_create.add_argument('target', help='target path') @@ -50,10 +51,12 @@ def main() -> None: parser_create.add_argument( '--directory', action='store_true', help='threat target as directory (by default guessed by dot in target)', ) + parser_create.add_argument('--cp', nargs='?', help='codepage for ANSI strings') parser_dup = subparsers.add_parser('duplicate', aliases=['d'], help='read and write lnk file') parser_dup.add_argument('filename', help='lnk filename to read') parser_dup.add_argument('new_filename', help='new filename to write') + parser_dup.add_argument('--cp', nargs='?', help='codepage for ANSI strings') args = parser.parse_args() if args.help or not args.action: @@ -72,9 +75,10 @@ def main() -> None: icon_index=args.icon_index, work_dir=args.workdir, window_mode=args.mode, is_file=is_file, + cp=args.cp, ) elif args.action in ('parse', 'p'): - lnk = parse(args.filename) + lnk = parse(args.filename, cp=args.cp) props = args.props if len(props) == 0: print(json.dumps(lnk.json(), indent=4) if args.json else lnk.text()) @@ -82,7 +86,7 @@ def main() -> None: for prop in props: print(get_prop(lnk, prop.split('.'))) elif args.action in ('d', 'duplicate'): - lnk = parse(args.filename) + lnk = parse(args.filename, cp=args.cp) new_filename = args.new_filename print(lnk) lnk.save(new_filename) diff --git a/pylnk3/helpers.py b/pylnk3/helpers.py index c0a4b70..d05f2cb 100644 --- a/pylnk3/helpers.py +++ b/pylnk3/helpers.py @@ -37,8 +37,8 @@ def is_drive(data: Union[str, Any]) -> bool: return p.match(data) is not None -def parse(lnk: str) -> Lnk: - return Lnk.from_file(lnk) +def parse(lnk: str, cp: Optional[str] = None) -> Lnk: + return Lnk.from_file(lnk, cp=cp) def for_file( @@ -51,13 +51,14 @@ def for_file( work_dir: Optional[str] = None, window_mode: Optional[str] = None, is_file: Optional[bool] = None, + cp: Optional[str] = None, ) -> Lnk: - lnk = Lnk() + lnk = Lnk(cp=cp) lnk.link_flags.IsUnicode = True lnk.link_info = None if target_file.startswith('\\\\'): # remote link - lnk.link_info = LinkInfo() + lnk.link_info = LinkInfo(cp=cp) lnk.link_info.remote = 1 # extract server + share name from full path path_parts = target_file.split('\\') @@ -80,7 +81,11 @@ def for_file( for level in levels[1:]: is_last_level = level == levels[-1] # consider all segments before last as directory - segment = PathSegmentFileOrFolderEntry.create_for_path(level, is_file=is_file if is_last_level else False) + segment = PathSegmentFileOrFolderEntry.create_for_path( + path=level, + is_file=is_file if is_last_level else False, + cp=cp, + ) elements.append(segment) lnk.shell_item_id_list = LinkTargetIDList() lnk.shell_item_id_list.items = elements @@ -108,6 +113,7 @@ def for_file( def from_segment_list( data: List[Union[str, Dict[str, Any]]], lnk_name: Optional[str] = None, + cp: Optional[str] = None, ) -> Lnk: """ Creates a lnk file from a list of path segments. @@ -137,7 +143,7 @@ def from_segment_list( """ if not isinstance(data, (list, tuple)): raise ValueError("Invalid data format, list or tuple expected") - lnk = Lnk() + lnk = Lnk(cp=cp) entries: List[IDListEntry] = [] if is_drive(data[0]): assert isinstance(data[0], str) diff --git a/pylnk3/structures/id_list/path.py b/pylnk3/structures/id_list/path.py index b51f309..26ef161 100644 --- a/pylnk3/structures/id_list/path.py +++ b/pylnk3/structures/id_list/path.py @@ -74,7 +74,8 @@ def json(self) -> dict: class PathSegmentFileOrFolderEntry(PathSegmentEntry): - def __init__(self, bytes: Optional[bytes] = None) -> None: + def __init__(self, bytes: Optional[bytes] = None, cp: Optional[str] = None) -> None: + self.cp = cp self.type = 'FILE_OR_FOLDER' self.flags = FileOrFolderEntryFlags() @@ -103,7 +104,7 @@ def __init__(self, bytes: Optional[bytes] = None) -> None: if self.flags.IsUnicode: self.short_name = read_cunicode(buf) else: - self.short_name = read_cstring(buf, padding=True) + self.short_name = read_cstring(buf, padding=True, cp=self.cp) extra_size = read_short(buf) extra_version = read_short(buf) extra_signature = read_int(buf) @@ -133,7 +134,7 @@ def __init__(self, bytes: Optional[bytes] = None) -> None: if extra_version >= 7: self.localized_name = read_cunicode(buf) else: - self.localized_name = read_cstring(buf) + self.localized_name = read_cstring(buf, cp=self.cp) version_offset = read_short(buf) @property @@ -165,7 +166,7 @@ def bytes(self) -> bytes: if self.flags.IsUnicode: write_cunicode(self.short_name, out) else: - write_cstring(self.short_name, out, padding=True) + write_cstring(self.short_name, out, padding=True, cp=self.cp) version = 3 # just hardcode some version # structures below compatible with versions 3 and 9 in case someone needs it @@ -220,8 +221,10 @@ def _validate(self) -> None: self.short_name = self.full_name @classmethod - def create_for_path(cls, path: str, is_file: Optional[bool] = None) -> 'PathSegmentFileOrFolderEntry': - entry = cls() + def create_for_path( + cls, path: str, is_file: Optional[bool] = None, cp: Optional[str] = None, + ) -> 'PathSegmentFileOrFolderEntry': + entry = cls(cp=cp) fs_stat = None try: diff --git a/pylnk3/structures/link_info.py b/pylnk3/structures/link_info.py index 04256a3..668c683 100644 --- a/pylnk3/structures/link_info.py +++ b/pylnk3/structures/link_info.py @@ -32,7 +32,8 @@ class LinkInfo(Serializable): - def __init__(self, lnk: Optional[BufferedIOBase] = None) -> None: + def __init__(self, lnk: Optional[BufferedIOBase] = None, cp: Optional[str] = None) -> None: + self.cp = cp self.offs_local_base_path_unicode = 0 self.offs_local_base_path_suffix_unicode = 0 self.local_base_path_unicode: str = '' @@ -74,17 +75,17 @@ def _parse_path_elements(self, lnk: BufferedIOBase) -> None: if self.remote: # 20 is the offset of the network share name lnk.seek(self.start + self.offs_network_volume_table + 20) - self.network_share_name = read_cstring(lnk) + self.network_share_name = read_cstring(lnk, cp=self.cp) lnk.seek(self.start + self.offs_base_name) - self.base_name = read_cstring(lnk) + self.base_name = read_cstring(lnk, cp=self.cp) if self.local: lnk.seek(self.start + self.offs_local_volume_table + 4) self.drive_type = _DRIVE_TYPES.get(read_int(lnk)) self.drive_serial = read_int(lnk) lnk.read(4) # volume name offset (10h) - self.volume_label = read_cstring(lnk) + self.volume_label = read_cstring(lnk, cp=self.cp) lnk.seek(self.start + self.offs_local_base_path) - self.local_base_path = read_cstring(lnk) + self.local_base_path = read_cstring(lnk, cp=self.cp) if self.offs_local_base_path_unicode: lnk.seek(self.start + self.offs_local_base_path_unicode) self.local_base_path_unicode = read_cunicode(lnk) @@ -121,11 +122,11 @@ def write(self, lnk: BufferedIOBase) -> None: if self.remote: self._write_network_volume_table(lnk) - write_cstring(self.base_name, lnk, padding=False) + write_cstring(self.base_name, lnk, padding=False, cp=self.cp) return self._write_local_volume_table(lnk) - write_cstring(self.local_base_path, lnk, padding=False) + write_cstring(self.local_base_path, lnk, padding=False, cp=self.cp) if self.local_base_path_unicode: write_cunicode(self.local_base_path_unicode, lnk) write_cunicode(self.local_base_path_suffix_unicode, lnk) @@ -179,7 +180,7 @@ def _write_network_volume_table(self, buf: BufferedIOBase) -> None: write_int(20, buf) # size of Network Volume Table write_int(0, buf) # ? write_int(131072, buf) # ? - write_cstring(self.network_share_name, buf) + write_cstring(self.network_share_name, buf, cp=self.cp) def _write_local_volume_table(self, buf: BufferedIOBase) -> None: write_int(self.size_local_volume_table, buf) @@ -189,7 +190,7 @@ def _write_local_volume_table(self, buf: BufferedIOBase) -> None: write_int(drive_type, buf) write_int(self.drive_serial, buf) write_int(16, buf) # volume name offset - write_cstring(self.volume_label, buf) + write_cstring(self.volume_label, buf, cp=self.cp) @property def path(self) -> str: diff --git a/pylnk3/structures/lnk.py b/pylnk3/structures/lnk.py index 44a0c1e..321a8c4 100644 --- a/pylnk3/structures/lnk.py +++ b/pylnk3/structures/lnk.py @@ -53,7 +53,8 @@ def assert_lnk_signature(f: BufferedIOBase) -> None: class Lnk(Serializable): - def __init__(self) -> None: + def __init__(self, cp: Optional[str] = None) -> None: + self.cp = cp self.link_flags = LinkFlags() self.file_flags = FileFlags() self.creation_time = datetime.now() @@ -63,7 +64,7 @@ def __init__(self) -> None: self.icon_index = 0 self._show_command = WINDOW_NORMAL self.hot_key: Optional[str] = None - self._link_info = LinkInfo() + self._link_info = LinkInfo(cp=self.cp) self.description = None self.relative_path = None self.work_dir = None @@ -118,7 +119,7 @@ def _parse_lnk_file(self, lnk: BufferedIOBase) -> None: # LINKINFO (HasLinkInfo) if self.link_flags.HasLinkInfo and not self.link_flags.ForceNoLinkInfo: - self._link_info = LinkInfo(lnk) + self._link_info = LinkInfo(lnk, cp=self.cp) lnk.seek(self._link_info.start + self._link_info.size) # STRING_DATA = [NAME_STRING] [RELATIVE_PATH] [WORKING_DIR] [COMMAND_LINE_ARGUMENTS] [ICON_LOCATION] @@ -305,10 +306,10 @@ def specify_remote_location(self, network_share_name: str, base_name: str) -> No self._link_info.make_path() @classmethod - def from_bytes(cls, data: bytes) -> 'Lnk': + def from_bytes(cls, data: bytes, cp: Optional[str] = None) -> 'Lnk': f = BytesIO(data) assert_lnk_signature(f) - lnk = cls() + lnk = cls(cp=cp) lnk._parse_lnk_file(f) return lnk @@ -342,15 +343,16 @@ def from_file( cls, filename: Optional[str] = None, file: Optional[BufferedReader] = None, + cp: Optional[str] = None, ) -> 'Lnk': """Create Lnk object from file (by filename or file-like object).""" if filename: if not os.path.exists(filename): filename += ".lnk" with open(filename, 'rb') as f: - return cls.from_bytes(f.read()) + return cls.from_bytes(f.read(), cp=cp) elif file: - return cls.from_bytes(file.read()) + return cls.from_bytes(file.read(), cp=cp) raise ValueError("Either filename or file must be specified") def __str__(self) -> str: diff --git a/pylnk3/utils/read_write.py b/pylnk3/utils/read_write.py index 4e8c48e..cebff70 100644 --- a/pylnk3/utils/read_write.py +++ b/pylnk3/utils/read_write.py @@ -1,7 +1,7 @@ from datetime import datetime from io import BufferedIOBase from struct import pack, unpack -from typing import Union +from typing import Optional, Union DEFAULT_CHARSET = 'cp1251' @@ -31,7 +31,7 @@ def read_cunicode(buf: BufferedIOBase) -> str: return s.decode('utf-16-le') -def read_cstring(buf: BufferedIOBase, padding: bool = False) -> str: +def read_cstring(buf: BufferedIOBase, padding: bool = False, cp: Optional[str] = None) -> str: s = b"" b = buf.read(1) while b != b'\x00': @@ -40,7 +40,7 @@ def read_cstring(buf: BufferedIOBase, padding: bool = False) -> str: if padding and not len(s) % 2: buf.read(1) # make length + terminator even # TODO: encoding is not clear, unicode-escape has been necessary sometimes - return s.decode(DEFAULT_CHARSET) + return s.decode(cp or DEFAULT_CHARSET) def read_sized_string(buf: BufferedIOBase, string: bool = True) -> Union[str, bytes]: @@ -90,9 +90,9 @@ def write_double(val: int, buf: BufferedIOBase) -> None: buf.write(pack(' None: +def write_cstring(val: str, buf: BufferedIOBase, padding: bool = False, cp: Optional[str] = None) -> None: # val = val.encode('unicode-escape').replace('\\\\', '\\') - val_bytes = val.encode(DEFAULT_CHARSET) + val_bytes = val.encode(cp or DEFAULT_CHARSET) buf.write(val_bytes + b'\x00') if padding and not len(val_bytes) % 2: buf.write(b'\x00')