From 136dc9db74d8b02891f1343ea815588f6e8ea35b Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Thu, 2 Nov 2023 22:51:06 +1300 Subject: [PATCH 01/11] WIP write out windows import library --- src/archive.rs | 287 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 283 insertions(+), 4 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 5a29bc18d..dbd634d6a 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -1,3 +1,4 @@ +use std::fs; use std::path::{Path, PathBuf}; use rustc_codegen_ssa::back::archive::{ @@ -15,11 +16,289 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { fn create_dll_import_lib( &self, _sess: &Session, - _lib_name: &str, - _dll_imports: &[rustc_session::cstore::DllImport], - _tmpdir: &Path, + lib_name: &str, + dll_imports: &[rustc_session::cstore::DllImport], + tmpdir: &Path, _is_direct_dependency: bool, ) -> PathBuf { - unimplemented!("creating dll imports is not yet supported"); + let mut import_names = Vec::new(); + for dll_import in dll_imports { + import_names.push(dll_import.name.as_str()); + } + let lib_path = tmpdir.join(format!("{}.lib", lib_name)); + // todo: emit session error instead of expects + fs::write(&lib_path, windows_import_lib::generate(lib_name, &import_names)) + .expect("failed to write import library"); + + lib_path + } +} + +// todo: pull out to a proper location. Really should be in `object` crate! +// todo: support ordinals +// todo: support name types (e.g. verbatim+) +// todo: support long member names +// todo: support windows-gnu flavor? +// todo: provide machine +// todo: remove any panics, nice errors +mod windows_import_lib { + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format + // + // Windows .lib files are System-V (aka. GUN) flavored ar files with a couple of extra lookup + // members. + // + // An archive is the 8 bytes b"!\n" + // followed by a sequence of 60 byte member headers: + // 0: name: [u8; 16], // member name, terminated with "/". If it is longer than 15, then + // // use "/n" where "n" is a decimal for the offset in bytes into + // // the longnames ("//") member contents. + // 16: date: [u8; 12], // ASCII decimal seconds since UNIX epoch - always -1 for MSVC + // 28: uid: [u8; 6], // ASCII decimal user id. Always blank for MSVC + // 34: gid: [u8; 6], // ditto for group id. + // 40: mode: [u8; 8], // ASCII octal UNIX mode. 0 for MSVC + // 48: size: [u8; 10], // ASCII decimal data size. + // 58: end: b"`\n", + // then size bytes of payload. If payload is odd sized, pad + // to an even offset with \n. + // + // You must store two extra members at the start, a legacy member lookup table member + // and the current member lookup and symbol table, both with empty ("/") names. + // + // The legacy table member has the name "/" with following contents, using big-endian numbers: + // count: u32, // number of indexed symbols + // offsets: [u32, count], // file offsets to the header of the member that contains + // // that symbol. + // names: * // sequence of null terminated symbol names. + // + // The current table member also has the name "/", and has the following contents, using + // little-endian numbers: + // member_count: u32, // number of members + // member_offsets: [u32; member_count], // file offsets to each member header + // symbol_count: u32, // number of symbols + // symbol_member: [u16; symbol_count], // *1-based* index of the member that contains + // // each symbol + // symbol_names: * // sequence of null terminated symbol names + // + // Then the long names member ("//") as with regular GNU ar files, just a sequence of + // null terminated strings indexed by members using the long name format "/n" as described + // above. + // + // Then regular members follow. + // + // This library emits only import libraries, that is, libraries with a short import object + // describing an import from a dll. That means each member contains exactly one symbol. The member + // name doesn't seem to matter, including duplicates, we use the dll name since that's what's in the + // files generated by MSVC tools. + // + // The short import object has the form: + // header: + // sig1: 0u16 + // sig2: 0xFFFFu16 + // version: u16, // normally 0 + // machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 + // time_date_stamp: u32, // normally 0 + // size_of_data: u32, // size following the header + // ordinal_or_hint: u16, // depending on flag + // object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 + // name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 + // reserved: u11, + // data: // size_of_data bytes + // name: * // import name; null terminated string + // dll_name: * // dll name; null terminated string + pub fn generate(dll_name: &str, import_names: &[&str]) -> Vec { + assert!(dll_name.len() < 16, "long member names not supported yet"); + assert!(import_names.len() <= 0xFFFF, "too many import names"); + // number of symbols, and members containing symbols for symbol lookup members + let symbol_count = import_names.len(); + + let mut writer = Writer::new(); + + // legacy symbol directory + let mut legacy_symbol_directory = writer.start_member_raw(); + legacy_symbol_directory.set_raw_name(b"/"); + legacy_symbol_directory.write_u32_be(symbol_count as u32); + // reserve space for offsets. + let legacy_member_table_offset = legacy_symbol_directory.reserve_bytes(symbol_count * 4); + // string table + for name in import_names { + legacy_symbol_directory.write_c_str(name); + } + // done with legacy symbol directory + drop(legacy_symbol_directory); + + // current symbol directory + let mut current_symbol_directory = writer.start_member_raw(); + current_symbol_directory.set_raw_name(b"/"); + // member count: same as symbol count for import library + current_symbol_directory.write_u32_le(symbol_count as u32); + // reserve space for member offsets + let current_member_table_offset = current_symbol_directory.reserve_bytes(symbol_count * 4); + // symbol count + current_symbol_directory.write_u32_le(symbol_count as u32); + // we assume symbol members are already in order + for index in 0..import_names.len() as u16 { + current_symbol_directory.write_u16_le(1 + index); + } + // string table again (could just copy from legacy string table above?) + for name in import_names { + current_symbol_directory.write_c_str(name); + } + // done with current symbol directory + drop(current_symbol_directory); + + // long names member not supported yet + + // import members + for (index, name) in import_names.iter().enumerate() { + let mut member = writer.start_member(dll_name); + // update member offsets + let member_offset = member.header_offset as u32; + member.data[legacy_member_table_offset + index * 4..][..4] + .copy_from_slice(&member_offset.to_be_bytes()); + member.data[current_member_table_offset + index * 4..][..4] + .copy_from_slice(&member_offset.to_le_bytes()); + // write import object: + // signature + member.write_u16_le(0); + member.write_u16_le(0xFFFF); + // version + member.write_u16_le(0); + // machine = AMD64 + member.write_u16_le(0x8664); + // time_date_stamp + member.write_u32_le(0); + // size_of_data + member.write_u32_le((dll_name.len() + 1 + name.len() + 1) as u32); + // ordinal_or_hint + member.write_u16_le(0); + // object_type | name_type = IMPORT_OBJECT_CODE | IMPORT_OBJECT_NAME + member.write_u16_le(1 << 2 | 0); + // data: + // name + member.write_c_str(name); + // dll_name + member.write_c_str(dll_name); + + drop(member); + } + + writer.data + } + + struct Writer { + data: Vec, + } + + impl Writer { + fn new() -> Self { + Self { data: Vec::from(*b"!\n") } + } + + fn start_member_raw(&mut self) -> Member<'_> { + let header_offset = self.data.len(); + // fill the header with blanks... + self.data.resize(header_offset + Member::HEADER_SIZE - 2, b' '); + // except for end marker + self.data.extend_from_slice(b"`\n"); + + let mut member = Member::new(&mut self.data, header_offset); + // init date, mode to default values as produced by MSVC tools + member.set_time_date_stamp(-1); + member.set_mode(0); + member + } + + fn start_member(&mut self, name: &str) -> Member<'_> { + let mut member = self.start_member_raw(); + member.set_name(name); + member + } + } + + struct Member<'a> { + data: &'a mut Vec, + header_offset: usize, + } + + impl<'a> Member<'a> { + const HEADER_SIZE: usize = 60; + + fn new(data: &'a mut Vec, header_offset: usize) -> Self { + Self { data, header_offset } + } + + fn header_slice(&mut self, offset: usize, len: usize) -> &mut [u8] { + &mut self.data[self.header_offset + offset..][..len] + } + + fn set_name(&mut self, name: &str) { + assert!(name.len() < 16, "long member names not supported yet"); + self.set_raw_name(name.as_bytes()); + self.data[self.header_offset + name.len()] = b'/'; + } + + fn set_raw_name(&mut self, raw_name: &[u8]) { + assert!(raw_name.len() <= 16, "raw name must be <= 16 bytes"); + self.header_slice(0, raw_name.len()).copy_from_slice(raw_name); + } + + fn set_time_date_stamp(&mut self, value: i32) { + self.set_decimal_field(16, 12, value); + } + + fn set_uid(&mut self, value: i32) { + self.set_decimal_field(28, 6, value); + } + + fn set_gid(&mut self, value: i32) { + self.set_decimal_field(34, 6, value); + } + + fn set_mode(&mut self, value: i32) { + use std::io::Write; + write!(std::io::Cursor::new(self.header_slice(40, 8)), "{value:o}") + .expect("value too large"); + } + + fn set_decimal_field(&mut self, offset: usize, size: usize, value: i32) { + use std::io::Write; + write!(std::io::Cursor::new(self.header_slice(offset, size)), "{value}") + .expect("value too large"); + } + + fn write_c_str(&mut self, data: &str) { + self.data.extend_from_slice(data.as_bytes()); + self.data.push(0); + } + + fn write_u16_le(&mut self, data: u16) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + fn write_u32_be(&mut self, data: u32) { + self.data.extend_from_slice(&data.to_be_bytes()); + } + + fn write_u32_le(&mut self, data: u32) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + fn reserve_bytes(&mut self, count: usize) -> usize { + let offset = self.data.len(); + self.data.resize(offset + count, 0); + offset + } + } + + impl<'a> Drop for Member<'a> { + fn drop(&mut self) { + let data_size = self.data.len() - self.header_offset - Self::HEADER_SIZE; + assert!(data_size < i32::MAX as usize); + self.set_decimal_field(48, 10, data_size as i32); + // pad to even address + if data_size % 2 == 1 { + self.data.push(b'\n'); + } + } } } From 86726894b243f34c26a466346a3502203cccb7eb Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Fri, 3 Nov 2023 00:02:26 +1300 Subject: [PATCH 02/11] WIP hack avoid long member names. --- src/archive.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index dbd634d6a..97c5eaa43 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -106,13 +106,15 @@ mod windows_import_lib { // name: * // import name; null terminated string // dll_name: * // dll name; null terminated string pub fn generate(dll_name: &str, import_names: &[&str]) -> Vec { - assert!(dll_name.len() < 16, "long member names not supported yet"); assert!(import_names.len() <= 0xFFFF, "too many import names"); // number of symbols, and members containing symbols for symbol lookup members let symbol_count = import_names.len(); let mut writer = Writer::new(); + // hack: trim dll name to 15 characters to avoid long member names + let member_name = &dll_name[..dll_name.len().min(15)]; + // legacy symbol directory let mut legacy_symbol_directory = writer.start_member_raw(); legacy_symbol_directory.set_raw_name(b"/"); @@ -150,7 +152,7 @@ mod windows_import_lib { // import members for (index, name) in import_names.iter().enumerate() { - let mut member = writer.start_member(dll_name); + let mut member = writer.start_member(member_name); // update member offsets let member_offset = member.header_offset as u32; member.data[legacy_member_table_offset + index * 4..][..4] From f46537eb578094ad5d1968b4e2f6ade168f40ffc Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Sun, 5 Nov 2023 01:07:01 +1300 Subject: [PATCH 03/11] WIP support long names, discovery of short import objects, partial import descriptor object. --- src/archive.rs | 704 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 588 insertions(+), 116 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 97c5eaa43..e9cd70d17 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -10,7 +10,7 @@ pub(crate) struct ArArchiveBuilderBuilder; impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box + 'a> { - Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols)) + Box::new(ArArchiveBuilder::new(sess, get_import_or_native_object_symbols)) } fn create_dll_import_lib( @@ -25,7 +25,7 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { for dll_import in dll_imports { import_names.push(dll_import.name.as_str()); } - let lib_path = tmpdir.join(format!("{}.lib", lib_name)); + let lib_path = tmpdir.join(format!("{lib_name}_import.lib")); // todo: emit session error instead of expects fs::write(&lib_path, windows_import_lib::generate(lib_name, &import_names)) .expect("failed to write import library"); @@ -34,17 +34,36 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { } } +fn get_import_or_native_object_symbols( + buf: &[u8], + f: &mut dyn FnMut(&[u8]) -> std::io::Result<()>, +) -> std::io::Result { + let sig1 = u16::from_le_bytes([buf[0], buf[1]]); + let sig2 = u16::from_le_bytes([buf[2], buf[3]]); + if sig1 == 0 && sig2 == 0xFFFF { + let data = &buf[20..]; + let name_end = + data.iter().position(|&c| c == b'\0').expect("import object missing name terminator"); + let name = &data[..name_end]; + f(name)?; + Ok(true) + } else { + get_native_object_symbols(buf, f) + } +} + // todo: pull out to a proper location. Really should be in `object` crate! // todo: support ordinals // todo: support name types (e.g. verbatim+) -// todo: support long member names // todo: support windows-gnu flavor? // todo: provide machine // todo: remove any panics, nice errors mod windows_import_lib { + use std::ops::{Deref, DerefMut}; + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format // - // Windows .lib files are System-V (aka. GUN) flavored ar files with a couple of extra lookup + // Windows .lib files are System-V (aka. GNU) flavored ar files with a couple of extra lookup // members. // // An archive is the 8 bytes b"!\n" @@ -61,17 +80,19 @@ mod windows_import_lib { // then size bytes of payload. If payload is odd sized, pad // to an even offset with \n. // - // You must store two extra members at the start, a legacy member lookup table member - // and the current member lookup and symbol table, both with empty ("/") names. + // You must store two extra members at the start, a standard System-V / GNU symbol lookup table + // member (the "first linker member" in the linked documentation), and the Windows symbol table, + // (the "second linker member") both with empty ("/") names. // - // The legacy table member has the name "/" with following contents, using big-endian numbers: + // The standard System-V/GNU symbol table member has the name "/" with following contents, + // using big-endian numbers: // count: u32, // number of indexed symbols // offsets: [u32, count], // file offsets to the header of the member that contains // // that symbol. // names: * // sequence of null terminated symbol names. // - // The current table member also has the name "/", and has the following contents, using - // little-endian numbers: + // The Windows table member also has the name "/", and has the following contents, using + // little-endian numbers, where the symbol_members must be sorted to allow binary search: // member_count: u32, // number of members // member_offsets: [u32; member_count], // file offsets to each member header // symbol_count: u32, // number of symbols @@ -86,95 +107,143 @@ mod windows_import_lib { // Then regular members follow. // // This library emits only import libraries, that is, libraries with a short import object - // describing an import from a dll. That means each member contains exactly one symbol. The member - // name doesn't seem to matter, including duplicates, we use the dll name since that's what's in the - // files generated by MSVC tools. + // describing an import from a dll. That means each member contains exactly one import + // definition, although to be usable from MSVC __declspec(dllimport) it must include an alias + // with an `__imp_` prefix. + // + // The member name doesn't seem to matter, including duplicates, we will use the dll name since + // that's what's in the files generated by MSVC tools. // // The short import object has the form: // header: - // sig1: 0u16 - // sig2: 0xFFFFu16 - // version: u16, // normally 0 - // machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 - // time_date_stamp: u32, // normally 0 - // size_of_data: u32, // size following the header - // ordinal_or_hint: u16, // depending on flag - // object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 - // name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 - // reserved: u11, - // data: // size_of_data bytes + // 0: sig1: 0u16 + // 2: sig2: 0xFFFFu16 + // 4: version: u16, // normally 0 + // 6: machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 + // 8: time_date_stamp: u32, // normally 0 + // 12: size_of_data: u32, // size following the header + // 16: ordinal_or_hint: u16, // depending on flag + // 18: object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 + // name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 + // reserved: u11, + // 20: data: // size_of_data bytes // name: * // import name; null terminated string // dll_name: * // dll name; null terminated string pub fn generate(dll_name: &str, import_names: &[&str]) -> Vec { - assert!(import_names.len() <= 0xFFFF, "too many import names"); - // number of symbols, and members containing symbols for symbol lookup members - let symbol_count = import_names.len(); + // member count: one for each import_name in argument order, followed by the import + // descriptor. + let member_count = import_names.len() + 1; + + assert!(member_count <= 0xFFFF, "too many import names"); + + // foo.dll => foo so we can construct the import descriptor symbol. + // At least for the Windows system dlls, don't seem to need any further + // escaping, e.g. "api-ms-win-appmodel-runtime-l1-1-1.dll" => + // "__IMPORT_DESCRIPTOR_api-ms-win-appmodel-runtime-l1-1-1" + // Not using std::path to avoid having to handle non-unicode paths. + let mut dll_basename = String::from(dll_name); + if let Some(index) = dll_basename.rfind('.') { + dll_basename.truncate(index); + } - let mut writer = Writer::new(); + // Identify the target of a symbol + #[derive(Copy, Clone)] + enum SymbolValue { + // a short import object, specifically for import_names[.0] + Import(usize), + // the __IMPORT_DESCRIPTOR_{dll_basename} used to build the final .idata section. + Descriptor, + } - // hack: trim dll name to 15 characters to avoid long member names - let member_name = &dll_name[..dll_name.len().min(15)]; + // Note we are using the behavior of BTee* that it keeps its keys in sorted order: this + // is required by the MSVC symbol table so the linker can use binary search. + let mut symbols = + std::collections::BTreeMap::, SymbolValue>::new(); - // legacy symbol directory - let mut legacy_symbol_directory = writer.start_member_raw(); - legacy_symbol_directory.set_raw_name(b"/"); - legacy_symbol_directory.write_u32_be(symbol_count as u32); - // reserve space for offsets. - let legacy_member_table_offset = legacy_symbol_directory.reserve_bytes(symbol_count * 4); - // string table - for name in import_names { - legacy_symbol_directory.write_c_str(name); + for (index, &name) in import_names.iter().enumerate() { + symbols.insert(name.into(), SymbolValue::Import(index)); + symbols.insert(format!("__imp_{name}").into(), SymbolValue::Import(index)); + } + + let import_descriptor_symbol = format!("__IMPORT_DESCRIPTOR_{dll_basename}"); + symbols.insert(import_descriptor_symbol.as_str().into(), SymbolValue::Descriptor); + + let symbol_count = symbols.len(); + + let mut writer = Writer::new(); + + // member names are all the dll_name with the MSVC tools. + let member_name = writer.member_name(dll_name); + + // Standard System-V / GNU symbol table member + let mut gnu_symbols = writer.start_member(MemberName::SymbolTable); + // member table: one entry per symbol (duplicates allowed for aliasing) + gnu_symbols.write_u32_be(symbol_count as u32); + // reserve space for member offsets. + let gnu_member_table_offset = gnu_symbols.reserve_bytes(symbol_count * 4); + // symbol string table + for name in symbols.keys() { + gnu_symbols.write_c_str(name); } // done with legacy symbol directory - drop(legacy_symbol_directory); - - // current symbol directory - let mut current_symbol_directory = writer.start_member_raw(); - current_symbol_directory.set_raw_name(b"/"); - // member count: same as symbol count for import library - current_symbol_directory.write_u32_le(symbol_count as u32); - // reserve space for member offsets - let current_member_table_offset = current_symbol_directory.reserve_bytes(symbol_count * 4); - // symbol count - current_symbol_directory.write_u32_le(symbol_count as u32); - // we assume symbol members are already in order - for index in 0..import_names.len() as u16 { - current_symbol_directory.write_u16_le(1 + index); + drop(gnu_symbols); + + // MSVC tools symbol table member + let mut ms_symbols = writer.start_member(MemberName::SymbolTable); + // member offset table + ms_symbols.write_u32_le(member_count as u32); + let ms_member_table_offset = ms_symbols.reserve_bytes(member_count * 4); + // symbol table + ms_symbols.write_u32_le(symbol_count as u32); + // member index we assume symbols are in the same order as the member table. + for &value in symbols.values() { + let member_index = match value { + SymbolValue::Import(index) => index, + SymbolValue::Descriptor => import_names.len(), + }; + // Yep, it's a 1-based index. Who knows why. + // cast to u16 should be safe due to assert!() on member_count above. + ms_symbols.write_u16_le(1 + member_index as u16); } // string table again (could just copy from legacy string table above?) - for name in import_names { - current_symbol_directory.write_c_str(name); + for name in symbols.keys() { + ms_symbols.write_c_str(name); } // done with current symbol directory - drop(current_symbol_directory); + drop(ms_symbols); - // long names member not supported yet + writer.write_long_names(); + // can't use writer.member_name() from here - // import members + // short import object members for (index, name) in import_names.iter().enumerate() { let mut member = writer.start_member(member_name); // update member offsets let member_offset = member.header_offset as u32; - member.data[legacy_member_table_offset + index * 4..][..4] - .copy_from_slice(&member_offset.to_be_bytes()); - member.data[current_member_table_offset + index * 4..][..4] - .copy_from_slice(&member_offset.to_le_bytes()); + // fixme: also update the GNU alias symbol entry + member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); + + member.set_u32_le(ms_member_table_offset + index * 4, member_offset); // write import object: // signature - member.write_u16_le(0); - member.write_u16_le(0xFFFF); + member.write_u16_le(object::pe::IMAGE_FILE_MACHINE_UNKNOWN); + member.write_u16_le(object::pe::IMPORT_OBJECT_HDR_SIG2); // version member.write_u16_le(0); // machine = AMD64 - member.write_u16_le(0x8664); + member.write_u16_le(object::pe::IMAGE_FILE_MACHINE_AMD64); // time_date_stamp member.write_u32_le(0); // size_of_data - member.write_u32_le((dll_name.len() + 1 + name.len() + 1) as u32); + member.write_u32_le((name.len() + 1 + dll_name.len() + 1) as u32); // ordinal_or_hint member.write_u16_le(0); // object_type | name_type = IMPORT_OBJECT_CODE | IMPORT_OBJECT_NAME - member.write_u16_le(1 << 2 | 0); + member.write_u16_le({ + use object::pe::*; + IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT + | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT + }); // data: // name member.write_c_str(name); @@ -184,64 +253,493 @@ mod windows_import_lib { drop(member); } - writer.data + // import descriptor member + let mut import_descriptor = writer.start_member(member_name); + let member_offset = import_descriptor.header_offset as u32; + import_descriptor + .set_u32_be(gnu_member_table_offset + import_names.len() * 4, member_offset); + import_descriptor + .set_u32_le(ms_member_table_offset + import_names.len() * 4, member_offset); + // This is a COFF object containing two sections: + // .idata$2: import directory entry: + // 20 bytes, all 0 on disk, an Import Directory Table entry + // filled out by the linker with relocations. + // .idata$6: DLL name: + // The null terminated file name of the dll + // The import directory entry has three relocations: + // 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 + // 12: [2] name rva => points to DLL name section .idata$6 + // 16: [4] import address table rva => points to UNDEF symbol .idata$5 + // The COFF symbol table contains the following symbols: + // [0]: external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 + // [1]: section .idata$2 => section 1 + // [2]: static .idata$6 => section 2 + // [3]: section .idata$4 => undef + // [4]: section .idata$5 => undef + // Unfortunately, the object crate doesn't support writing COFF objects. + // For now, continue to use dumb explicit writer code, but this should be cleaned up. + // + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#coff-file-header-object-and-image + // + // COFF file: + // 0: header: + // 0: machine: u16, // an IMAGE_MACHINE_* value + // 2: number_of_sections: u16, + // 4: time_date_stamp: u32, // 0 in MSVC tools + // 8: pointer_to_symbol_table: u32, // offset in COFF file to COFF symbol table + // 12: number_of_symbols: u32, // number of entries in symbol table + // 16: size_of_optional_header: u16, // 0 for object file + // 18: characteristics: u16, // union of IMAGE_FILE_* flags; 0 for our output + // 20: sections table: + // 0: name: [u8; 8], // null padded section name + // 8: virtual_size: u32, // 0 for object files + // 12: virtual_address: u32, // 0 for object files + // 16: size_of_raw_data: u32, // size of section on disk + // 20: pointer_to_raw_data: u32, // section data COFF file offset + // 24: pointer_to_relocations: u32, // relocation table COFF file offset + // 28: pointer_to_line_number: u32, // 0 + // 32: number_of_relocations: u16, // number of entries in relocation tables + // 34: number_of_line_numbers: u16, // 0 + // 36: characteristics: u32, // union of IMAGE_SCN_* flags + // section in sections: + // section.pointer_to_raw_data: raw_data: [u8; section.size_of_raw_data] + // section.pointer_to_relocations: + // 0: virtual_address: u32, // rva of relocation + // 4: symbol_table_index: u32, // index into COFF symbol table + // 8: type: u16, // IMAGE_REL_* value + // pointer_to_symbol_table: + // i in 0..number_of_symbols: + // 0: name: [u8; 8] | { 0u32; offset_in_string_table: u32 }; + // 8: value: u32, + // 12: section_number: u16, // IMAGE_SYM_* or 1-based section index + // 14: base_type: u8, // IMAGE_SYM_TYPE_*, always NULL + // 15: complex_type: u8, // IMAGE_SYM_DTYPE_*, always NULL for our output + // 16: storage_class: u8, // IMAGE_SYM_CLASS_* + // 17: number_of_aux_symbols: u8, // 0 for our output + // pointer_to_symbol_table + number_of_symbols * 18: string_table: + // sequence of null-terminated strings + + // COFF File header: + import_descriptor.write_u16_le(object::pe::IMAGE_FILE_MACHINE_AMD64); + // number_of_sections + import_descriptor.write_u16_le(2); + // time_date_stamp + import_descriptor.write_u32_le(0); + // pointer_to_symbol_table + let pointer_to_symbol_table_offset = import_descriptor.reserve_bytes(4); + // number_of_symbols + import_descriptor.write_u32_le(3); + // size_of_optional_header: + import_descriptor.write_u16_le(0); + // characteristics: + import_descriptor.write_u16_le(0); + + // Section table: + // [0] .idata$2: import directory entry + // name: + import_descriptor.write_pad_len(".idata$2".as_bytes(), 0u8, 8); + // virtual_size: + import_descriptor.write_u32_le(0); + // virtual_address: + import_descriptor.write_u32_le(0); + // size_of_raw_data: + import_descriptor.write_u32_le(20); + // pointer_to_raw_data: + let import_directory_entry_section_pointer_to_raw_data_offset = + import_descriptor.reserve_bytes(4); + // pointer_to_relocations: + let import_directory_entry_section_pointer_to_relocations_offset = + import_descriptor.reserve_bytes(4); + // pointer_to_line_number: + import_descriptor.write_u32_le(0); + // number_of_relocations: + import_descriptor.write_u16_le(3); + // number_of_line_numbers: + import_descriptor.write_u16_le(0); + // characteristics: + import_descriptor.write_u32_le( + object::pe::IMAGE_SCN_ALIGN_4BYTES + | object::pe::IMAGE_SCN_CNT_INITIALIZED_DATA + | object::pe::IMAGE_SCN_MEM_READ + | object::pe::IMAGE_SCN_MEM_WRITE, + ); + + // [1] .idata$6: dll name + // name: + import_descriptor.write_pad_len(b".idata$6", 0u8, 8); + // virtual_size: + import_descriptor.write_u32_le(0); + // virtual_address: + import_descriptor.write_u32_le(0); + // size_of_raw_data: (padding to alignment shouldn't be needed, but this matches MSVC) + import_descriptor.write_u32_le((dll_name.len() as u32 + 1).next_multiple_of(2)); + // pointer_to_raw_data: + let dll_name_section_pointer_to_raw_data_offset = import_descriptor.reserve_bytes(4); + // pointer_to_relocations: + import_descriptor.write_u32_le(0); + // pointer_to_line_number: + import_descriptor.write_u32_le(0); + // number_of_relocations: + import_descriptor.write_u16_le(0); + // number_of_line_numbers: + import_descriptor.write_u16_le(0); + // characteristics: + import_descriptor.write_u32_le( + object::pe::IMAGE_SCN_ALIGN_2BYTES + | object::pe::IMAGE_SCN_CNT_INITIALIZED_DATA + | object::pe::IMAGE_SCN_MEM_READ + | object::pe::IMAGE_SCN_MEM_WRITE, + ); + + // [0] section .idata$2 data + let import_directory_entry_section_pointer_to_raw_data = + import_descriptor.data.len() - import_descriptor.header_offset; + import_descriptor.set_u32_le( + import_directory_entry_section_pointer_to_raw_data_offset, + import_directory_entry_section_pointer_to_raw_data as u32, + ); + import_descriptor.reserve_bytes(20); + // [0] section .idata$2 relocations + let import_directory_entry_section_pointer_to_relocations = + import_descriptor.data.len() - import_descriptor.header_offset; + import_descriptor.set_u32_le( + import_directory_entry_section_pointer_to_relocations_offset, + import_directory_entry_section_pointer_to_relocations as u32, + ); + // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 + import_descriptor.write_u32_le(0); + import_descriptor.write_u32_le(3); + import_descriptor.write_u16_le(object::pe::IMAGE_REL_AMD64_ADDR32NB); + // relocation 1: [2] name rva => points to DLL name section .idata$6 + import_descriptor.write_u32_le(12); + import_descriptor.write_u32_le(2); + import_descriptor.write_u16_le(object::pe::IMAGE_REL_AMD64_ADDR32NB); + // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 + import_descriptor.write_u32_le(16); + import_descriptor.write_u32_le(4); + import_descriptor.write_u16_le(object::pe::IMAGE_REL_AMD64_ADDR32NB); + + // [1] section .idata$6 data + let dll_name_section_pointer_to_raw_data = + import_descriptor.data.len() - import_descriptor.header_offset; + import_descriptor.set_u32_le( + dll_name_section_pointer_to_raw_data_offset, + dll_name_section_pointer_to_raw_data as u32, + ); + import_descriptor.write_c_str(dll_name); + // pad to even offset + import_descriptor.align(2, 0u8); + + let mut symbol_string_table = vec![]; + // COFF symbol table: + let pointer_to_symbol_table = + import_descriptor.data.len() - import_descriptor.header_offset; + import_descriptor + .set_u32_le(pointer_to_symbol_table_offset, (pointer_to_symbol_table) as u32); + // [0] external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 + // name: + // import_descriptor_symbol is definitely longer than 8, so use the long name format. + symbol_string_table.extend_from_slice(import_descriptor_symbol.as_bytes()); + symbol_string_table.push(0); + import_descriptor.write_u32_le(0); + import_descriptor.write_u32_le(0); + // value: + import_descriptor.write_u32_le(0); + // section_number: + import_descriptor.write_u16_le(1); + // base_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); + // complex_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); + // storage_class: + import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_EXTERNAL); + // number_of_aux_symbols: + import_descriptor.write_u8(0); + // [1] section .idata$2 => section 1 + // name: + // ".idata$2" is definitely shorter than 8, so use the short name format. + import_descriptor.write_pad_len(b".idata$2", 0u8, 8); + // value: + import_descriptor.write_u32_le(0); + // section_number: + import_descriptor.write_u16_le(1); + // base_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); + // complex_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); + // storage_class: + import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_SECTION); + // number_of_aux_symbols: + import_descriptor.write_u8(0); + // [2] static .idata$6 => section 2 + // name: + // ".idata$6" is definitely shorter than 8, so use the short name format. + import_descriptor.write_pad_len(b".idata$6", 0u8, 8); + // value: + import_descriptor.write_u32_le(0); + // section_number: + import_descriptor.write_u16_le(2); + // base_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); + // complex_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); + // storage_class: + import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_STATIC); + // number_of_aux_symbols: + import_descriptor.write_u8(0); + // [3] section .idata$4 => undef + // name: + import_descriptor.write_pad_len(b".idata$4", 0u8, 8); + // value: + import_descriptor.write_u32_le(0); + // section_number: + import_descriptor.write_i16_le(object::pe::IMAGE_SYM_UNDEFINED as i16); + // base_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); + // complex_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); + // storage_class: + import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_SECTION); + // number_of_aux_symbols: + import_descriptor.write_u8(0); + // [4] section .idata$5 => undef + // name: + import_descriptor.write_pad_len(b".idata$5", 0u8, 8); + // value: + import_descriptor.write_u32_le(0); + // section_number: + import_descriptor.write_i16_le(object::pe::IMAGE_SYM_UNDEFINED as i16); + // base_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); + // complex_type: + import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); + // storage_class: + import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_SECTION); + // number_of_aux_symbols: + import_descriptor.write_u8(0); + + // COFF symbol string table + import_descriptor.write(&symbol_string_table); + + drop(import_descriptor); + + writer.data.data } - struct Writer { + #[derive(Copy, Clone)] + enum MemberName { + SymbolTable, // "/" + LongNames, // "//" + Short([u8; 16]), // "{0}/" + Long(usize), // "/{0}" + } + + struct Data { data: Vec, } + impl Data { + fn new() -> Self { + Self { data: vec![] } + } + + fn len(&self) -> usize { + self.data.len() + } + + fn write(&mut self, data: &[u8]) { + self.data.extend_from_slice(data); + } + + fn slice(&mut self, offset: usize, len: usize) -> &mut [u8] { + &mut self.data[offset..offset + len] + } + + fn write_c_str(&mut self, data: &str) { + self.data.extend_from_slice(data.as_bytes()); + self.data.push(0); + } + + fn write_pad_len(&mut self, data: &[u8], pad: u8, len: usize) { + assert!(data.len() <= len); + let offset = self.data.len(); + self.data.extend_from_slice(data); + self.data.resize(offset + len, pad); + } + + fn write_u8(&mut self, data: u8) { + self.data.push(data); + } + + fn write_i16_le(&mut self, data: i16) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + fn write_u16_le(&mut self, data: u16) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + fn write_u32_be(&mut self, data: u32) { + self.data.extend_from_slice(&data.to_be_bytes()); + } + + fn write_u32_le(&mut self, data: u32) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + fn reserve_bytes(&mut self, count: usize) -> usize { + let offset = self.data.len(); + self.data.resize(offset + count, 0); + offset + } + + fn set_u32_be(&mut self, offset: usize, data: u32) { + self.data[offset..][..4].copy_from_slice(&data.to_be_bytes()); + } + + fn set_u32_le(&mut self, offset: usize, data: u32) { + self.data[offset..][..4].copy_from_slice(&data.to_le_bytes()); + } + + fn align(&mut self, alignment: usize, pad: u8) { + let offset = self.data.len(); + self.data.resize(offset.next_multiple_of(alignment), pad); + } + } + + struct Writer { + data: Data, + long_names: Option>, + } + + impl Deref for Writer { + type Target = Data; + + fn deref(&self) -> &Self::Target { + &self.data + } + } + + impl DerefMut for Writer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data + } + } + impl Writer { fn new() -> Self { - Self { data: Vec::from(*b"!\n") } + let long_names = Some(vec![]); + let mut data = Data::new(); + data.write(b"!\n"); + Self { data, long_names } + } + + fn member_name(&mut self, name: &str) -> MemberName { + let Some(ref mut long_buf) = self.long_names else { + panic!("already wrote long names member"); + }; + + if name.len() < 16 { + let mut buf = [0u8; 16]; + buf[..name.len()].copy_from_slice(name.as_bytes()); + buf[name.len()] = b'/'; + buf[name.len() + 1..].fill(b' '); + MemberName::Short(buf) + } else { + let name = std::ffi::CString::new(name).expect("names cannot contain \\0"); + let name = name.as_bytes_with_nul(); + + // Find the name *including the null terminator* in the existing long names buffer. + // Note, this could find "bar\0" in "foobar\0", but that seems to be fine according + // to the spec? It still counts as a null terminated "bar" string. + let offset = long_buf + .windows(name.len()) + .position(|window| window == name) + .unwrap_or_else(|| { + // Didn't already have it, so add it to the end. + let offset = long_buf.len(); + long_buf.extend_from_slice(name); + offset + }); + MemberName::Long(offset) + } } - fn start_member_raw(&mut self) -> Member<'_> { + fn start_member(&mut self, name: MemberName) -> Member<'_> { let header_offset = self.data.len(); // fill the header with blanks... - self.data.resize(header_offset + Member::HEADER_SIZE - 2, b' '); + self.data.data.resize(header_offset + Member::HEADER_SIZE - 2, b' '); // except for end marker - self.data.extend_from_slice(b"`\n"); + self.data.write(b"`\n"); let mut member = Member::new(&mut self.data, header_offset); - // init date, mode to default values as produced by MSVC tools + member.set_name(name); + // init date, mode to default values as produced by MSVC tools. + // uid, gid are already defaulted to blank. member.set_time_date_stamp(-1); member.set_mode(0); member } - fn start_member(&mut self, name: &str) -> Member<'_> { - let mut member = self.start_member_raw(); - member.set_name(name); - member + fn write_long_names(&mut self) { + let data = self.long_names.take().expect("already wrote long names member"); + let mut member = self.start_member(MemberName::LongNames); + member.write(&data); + drop(member); } } struct Member<'a> { - data: &'a mut Vec, + data: &'a mut Data, header_offset: usize, } + impl Deref for Member<'_> { + type Target = Data; + + fn deref(&self) -> &Self::Target { + self.data + } + } + + impl DerefMut for Member<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.data + } + } + impl<'a> Member<'a> { const HEADER_SIZE: usize = 60; - fn new(data: &'a mut Vec, header_offset: usize) -> Self { + fn new(data: &'a mut Data, header_offset: usize) -> Self { Self { data, header_offset } } fn header_slice(&mut self, offset: usize, len: usize) -> &mut [u8] { - &mut self.data[self.header_offset + offset..][..len] - } - - fn set_name(&mut self, name: &str) { - assert!(name.len() < 16, "long member names not supported yet"); - self.set_raw_name(name.as_bytes()); - self.data[self.header_offset + name.len()] = b'/'; + self.data.slice(self.header_offset + offset, len) } - fn set_raw_name(&mut self, raw_name: &[u8]) { - assert!(raw_name.len() <= 16, "raw name must be <= 16 bytes"); - self.header_slice(0, raw_name.len()).copy_from_slice(raw_name); + fn set_name(&mut self, name: MemberName) { + let mut field = self.header_slice(0, 16); + match name { + MemberName::SymbolTable => { + field[0..1].copy_from_slice(b"/"); + field[1..].fill(b' '); + } + MemberName::LongNames => { + field[0..2].copy_from_slice(b"//"); + field[2..].fill(b' '); + } + MemberName::Short(name) => { + field.copy_from_slice(&name); + // already includes trailing / and spaces + } + MemberName::Long(offset) => { + use std::io::Write; + field.fill(b' '); + write!(field, "/{offset}").expect("writing long name should not fail"); + } + } } fn set_time_date_stamp(&mut self, value: i32) { @@ -267,29 +765,6 @@ mod windows_import_lib { write!(std::io::Cursor::new(self.header_slice(offset, size)), "{value}") .expect("value too large"); } - - fn write_c_str(&mut self, data: &str) { - self.data.extend_from_slice(data.as_bytes()); - self.data.push(0); - } - - fn write_u16_le(&mut self, data: u16) { - self.data.extend_from_slice(&data.to_le_bytes()); - } - - fn write_u32_be(&mut self, data: u32) { - self.data.extend_from_slice(&data.to_be_bytes()); - } - - fn write_u32_le(&mut self, data: u32) { - self.data.extend_from_slice(&data.to_le_bytes()); - } - - fn reserve_bytes(&mut self, count: usize) -> usize { - let offset = self.data.len(); - self.data.resize(offset + count, 0); - offset - } } impl<'a> Drop for Member<'a> { @@ -297,10 +772,7 @@ mod windows_import_lib { let data_size = self.data.len() - self.header_offset - Self::HEADER_SIZE; assert!(data_size < i32::MAX as usize); self.set_decimal_field(48, 10, data_size as i32); - // pad to even address - if data_size % 2 == 1 { - self.data.push(b'\n'); - } + self.align(2, b'\n'); } } } From 984ed7ef23beda881f6031678dd681cf15871a8e Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Sun, 5 Nov 2023 20:37:45 +1300 Subject: [PATCH 04/11] WIP move to using object::pe::* types to write, fix __IMPORT_DESCRIPTOR_* export --- src/archive.rs | 432 +++++++++++++++++++++++-------------------------- 1 file changed, 203 insertions(+), 229 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index e9cd70d17..da4a670bb 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -52,15 +52,27 @@ fn get_import_or_native_object_symbols( } } -// todo: pull out to a proper location. Really should be in `object` crate! -// todo: support ordinals -// todo: support name types (e.g. verbatim+) -// todo: support windows-gnu flavor? -// todo: provide machine -// todo: remove any panics, nice errors mod windows_import_lib { + // todo: pull out to a proper location. Really should be in `object` crate! + // todo: support ordinals + // todo: support name types (e.g. verbatim+) + // todo: support windows-gnu flavor? + // todo: provide machine + // todo: remove any panics, nice errors + use std::ops::{Deref, DerefMut}; + use object::pe::*; + use object::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; + + fn u16(value: u16) -> U16 { + U16Bytes::new(LE, value) + } + + fn u32(value: u32) -> U32 { + U32Bytes::new(LE, value) + } + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format // // Windows .lib files are System-V (aka. GNU) flavored ar files with a couple of extra lookup @@ -106,11 +118,6 @@ mod windows_import_lib { // // Then regular members follow. // - // This library emits only import libraries, that is, libraries with a short import object - // describing an import from a dll. That means each member contains exactly one import - // definition, although to be usable from MSVC __declspec(dllimport) it must include an alias - // with an `__imp_` prefix. - // // The member name doesn't seem to matter, including duplicates, we will use the dll name since // that's what's in the files generated by MSVC tools. // @@ -129,7 +136,8 @@ mod windows_import_lib { // 20: data: // size_of_data bytes // name: * // import name; null terminated string // dll_name: * // dll name; null terminated string - pub fn generate(dll_name: &str, import_names: &[&str]) -> Vec { + + pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { // member count: one for each import_name in argument order, followed by the import // descriptor. let member_count = import_names.len() + 1; @@ -220,34 +228,30 @@ mod windows_import_lib { let mut member = writer.start_member(member_name); // update member offsets let member_offset = member.header_offset as u32; - // fixme: also update the GNU alias symbol entry - member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); + // Updating GNU symbol table is a bit messy with the aliases + for (member_index, symbol) in symbols.values().enumerate() { + if let SymbolValue::Import(symbol_index) = symbol { + if *symbol_index == index { + member + .set_u32_be(gnu_member_table_offset + member_index * 4, member_offset); + } + } + } member.set_u32_le(ms_member_table_offset + index * 4, member_offset); - // write import object: - // signature - member.write_u16_le(object::pe::IMAGE_FILE_MACHINE_UNKNOWN); - member.write_u16_le(object::pe::IMPORT_OBJECT_HDR_SIG2); - // version - member.write_u16_le(0); - // machine = AMD64 - member.write_u16_le(object::pe::IMAGE_FILE_MACHINE_AMD64); - // time_date_stamp - member.write_u32_le(0); - // size_of_data - member.write_u32_le((name.len() + 1 + dll_name.len() + 1) as u32); - // ordinal_or_hint - member.write_u16_le(0); - // object_type | name_type = IMPORT_OBJECT_CODE | IMPORT_OBJECT_NAME - member.write_u16_le({ - use object::pe::*; - IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT - | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT + + member.write_pod(&ImportObjectHeader { + sig1: u16(IMAGE_FILE_MACHINE_UNKNOWN), + sig2: u16(IMPORT_OBJECT_HDR_SIG2), + version: u16(0), + machine: u16(IMAGE_FILE_MACHINE_AMD64), + time_date_stamp: u32(0), + size_of_data: u32((name.len() + 1 + dll_name.len() + 1) as u32), + ordinal_or_hint: u16(0), + name_type: u16(IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT + | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT), }); - // data: - // name member.write_c_str(name); - // dll_name member.write_c_str(dll_name); drop(member); @@ -260,17 +264,17 @@ mod windows_import_lib { .set_u32_be(gnu_member_table_offset + import_names.len() * 4, member_offset); import_descriptor .set_u32_le(ms_member_table_offset + import_names.len() * 4, member_offset); - // This is a COFF object containing two sections: + // This is a COFF object containing 2 sections: // .idata$2: import directory entry: // 20 bytes, all 0 on disk, an Import Directory Table entry // filled out by the linker with relocations. // .idata$6: DLL name: // The null terminated file name of the dll - // The import directory entry has three relocations: + // The import directory entry has 3 relocations: // 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 // 12: [2] name rva => points to DLL name section .idata$6 // 16: [4] import address table rva => points to UNDEF symbol .idata$5 - // The COFF symbol table contains the following symbols: + // The COFF symbol table contains 5 symbols: // [0]: external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 // [1]: section .idata$2 => section 1 // [2]: static .idata$6 => section 2 @@ -317,209 +321,187 @@ mod windows_import_lib { // 16: storage_class: u8, // IMAGE_SYM_CLASS_* // 17: number_of_aux_symbols: u8, // 0 for our output // pointer_to_symbol_table + number_of_symbols * 18: string_table: - // sequence of null-terminated strings + // 0: string_table_size: u32, // including this size + // 4: sequence of null-terminated strings // COFF File header: - import_descriptor.write_u16_le(object::pe::IMAGE_FILE_MACHINE_AMD64); - // number_of_sections - import_descriptor.write_u16_le(2); - // time_date_stamp - import_descriptor.write_u32_le(0); - // pointer_to_symbol_table - let pointer_to_symbol_table_offset = import_descriptor.reserve_bytes(4); - // number_of_symbols - import_descriptor.write_u32_le(3); - // size_of_optional_header: - import_descriptor.write_u16_le(0); - // characteristics: - import_descriptor.write_u16_le(0); + let coff_file_offset = import_descriptor.data.len(); + + import_descriptor.write_pod(&ImageFileHeader { + machine: u16(IMAGE_FILE_MACHINE_AMD64), + number_of_sections: u16(2), + time_date_stamp: u32(0), + pointer_to_symbol_table: u32(0), // filled out later + number_of_symbols: u32(5), + size_of_optional_header: u16(0), + characteristics: u16(0), + }); // Section table: // [0] .idata$2: import directory entry - // name: - import_descriptor.write_pad_len(".idata$2".as_bytes(), 0u8, 8); - // virtual_size: - import_descriptor.write_u32_le(0); - // virtual_address: - import_descriptor.write_u32_le(0); - // size_of_raw_data: - import_descriptor.write_u32_le(20); - // pointer_to_raw_data: - let import_directory_entry_section_pointer_to_raw_data_offset = - import_descriptor.reserve_bytes(4); - // pointer_to_relocations: - let import_directory_entry_section_pointer_to_relocations_offset = - import_descriptor.reserve_bytes(4); - // pointer_to_line_number: - import_descriptor.write_u32_le(0); - // number_of_relocations: - import_descriptor.write_u16_le(3); - // number_of_line_numbers: - import_descriptor.write_u16_le(0); - // characteristics: - import_descriptor.write_u32_le( - object::pe::IMAGE_SCN_ALIGN_4BYTES - | object::pe::IMAGE_SCN_CNT_INITIALIZED_DATA - | object::pe::IMAGE_SCN_MEM_READ - | object::pe::IMAGE_SCN_MEM_WRITE, - ); + let import_directory_entry_section_offset = import_descriptor.data.len(); + import_descriptor.write_pod(&ImageSectionHeader { + name: *b".idata$2", + virtual_size: u32(0), + virtual_address: u32(0), + size_of_raw_data: u32(20), + pointer_to_raw_data: u32(0), // filled out later + pointer_to_relocations: u32(0), // filled out later + pointer_to_linenumbers: u32(0), // no COFF linenumbers + number_of_relocations: u16(3), + number_of_linenumbers: u16(0), + characteristics: u32(IMAGE_SCN_ALIGN_4BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE), + }); // [1] .idata$6: dll name - // name: - import_descriptor.write_pad_len(b".idata$6", 0u8, 8); - // virtual_size: - import_descriptor.write_u32_le(0); - // virtual_address: - import_descriptor.write_u32_le(0); - // size_of_raw_data: (padding to alignment shouldn't be needed, but this matches MSVC) - import_descriptor.write_u32_le((dll_name.len() as u32 + 1).next_multiple_of(2)); - // pointer_to_raw_data: - let dll_name_section_pointer_to_raw_data_offset = import_descriptor.reserve_bytes(4); - // pointer_to_relocations: - import_descriptor.write_u32_le(0); - // pointer_to_line_number: - import_descriptor.write_u32_le(0); - // number_of_relocations: - import_descriptor.write_u16_le(0); - // number_of_line_numbers: - import_descriptor.write_u16_le(0); - // characteristics: - import_descriptor.write_u32_le( - object::pe::IMAGE_SCN_ALIGN_2BYTES - | object::pe::IMAGE_SCN_CNT_INITIALIZED_DATA - | object::pe::IMAGE_SCN_MEM_READ - | object::pe::IMAGE_SCN_MEM_WRITE, - ); + let dll_name_section_offset = import_descriptor.data.len(); + import_descriptor.write_pod(&ImageSectionHeader { + name: *b".idata$6", + virtual_size: u32(0), + virtual_address: u32(0), + size_of_raw_data: u32((dll_name.len() as u32 + 1).next_multiple_of(2)), + pointer_to_raw_data: u32(0), // filled out later + pointer_to_relocations: u32(0), // no relocations + pointer_to_linenumbers: u32(0), + number_of_relocations: u16(0), + number_of_linenumbers: u16(0), + characteristics: u32(IMAGE_SCN_ALIGN_2BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE), + }); // [0] section .idata$2 data - let import_directory_entry_section_pointer_to_raw_data = - import_descriptor.data.len() - import_descriptor.header_offset; - import_descriptor.set_u32_le( - import_directory_entry_section_pointer_to_raw_data_offset, - import_directory_entry_section_pointer_to_raw_data as u32, + import_descriptor.data.set_u32_le( + import_directory_entry_section_offset + 20, // .pointer_to_raw_data + (import_descriptor.data.len() - coff_file_offset) as u32, ); import_descriptor.reserve_bytes(20); // [0] section .idata$2 relocations - let import_directory_entry_section_pointer_to_relocations = - import_descriptor.data.len() - import_descriptor.header_offset; - import_descriptor.set_u32_le( - import_directory_entry_section_pointer_to_relocations_offset, - import_directory_entry_section_pointer_to_relocations as u32, + import_descriptor.data.set_u32_le( + import_directory_entry_section_offset + 24, // .pointer_to_relocations + (import_descriptor.data.len() - coff_file_offset) as u32, ); // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 - import_descriptor.write_u32_le(0); - import_descriptor.write_u32_le(3); - import_descriptor.write_u16_le(object::pe::IMAGE_REL_AMD64_ADDR32NB); + import_descriptor.write_pod(&ImageRelocation { + virtual_address: u32(0), + symbol_table_index: u32(3), + typ: u16(IMAGE_REL_AMD64_ADDR32NB), + }); // relocation 1: [2] name rva => points to DLL name section .idata$6 - import_descriptor.write_u32_le(12); - import_descriptor.write_u32_le(2); - import_descriptor.write_u16_le(object::pe::IMAGE_REL_AMD64_ADDR32NB); + import_descriptor.write_pod(&ImageRelocation { + virtual_address: u32(12), + symbol_table_index: u32(2), + typ: u16(IMAGE_REL_AMD64_ADDR32NB), + }); // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 - import_descriptor.write_u32_le(16); - import_descriptor.write_u32_le(4); - import_descriptor.write_u16_le(object::pe::IMAGE_REL_AMD64_ADDR32NB); + import_descriptor.write_pod(&ImageRelocation { + virtual_address: u32(16), + symbol_table_index: u32(4), + typ: u16(IMAGE_REL_AMD64_ADDR32NB), + }); // [1] section .idata$6 data - let dll_name_section_pointer_to_raw_data = - import_descriptor.data.len() - import_descriptor.header_offset; - import_descriptor.set_u32_le( - dll_name_section_pointer_to_raw_data_offset, - dll_name_section_pointer_to_raw_data as u32, + import_descriptor.data.set_u32_le( + dll_name_section_offset + 20, // .pointer_to_raw_data + (import_descriptor.data.len() - coff_file_offset) as u32, ); import_descriptor.write_c_str(dll_name); - // pad to even offset import_descriptor.align(2, 0u8); - let mut symbol_string_table = vec![]; + struct CoffStringTable(Vec); + + impl CoffStringTable { + fn new() -> Self { + Self(vec![]) + } + + fn get_symbol_raw_name(&mut self, value: &str) -> [u8; 8] { + let mut result = [0u8; 8]; + if value.len() > 8 { + // add 4 for the string table length + let offset = 4 + self + .0 + .windows(value.len()) + .position(|window| window == value.as_bytes()) + .unwrap_or_else(|| { + let offset = self.0.len(); + self.0.extend_from_slice(value.as_bytes()); + self.0.push(0); + offset + }); + result[4..].copy_from_slice(&u32::to_le_bytes(offset as u32)); + } else { + result.copy_from_slice(value.as_bytes()) + } + result + } + } + + let mut coff_string_table = CoffStringTable::new(); + // COFF symbol table: - let pointer_to_symbol_table = - import_descriptor.data.len() - import_descriptor.header_offset; - import_descriptor - .set_u32_le(pointer_to_symbol_table_offset, (pointer_to_symbol_table) as u32); + import_descriptor.data.set_u32_le( + coff_file_offset + 8, + (import_descriptor.data.len() - coff_file_offset) as u32, + ); + + // A complicated way to say 0. + let null_type = IMAGE_SYM_DTYPE_NULL << 8 | IMAGE_SYM_TYPE_NULL; + // [0] external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 - // name: - // import_descriptor_symbol is definitely longer than 8, so use the long name format. - symbol_string_table.extend_from_slice(import_descriptor_symbol.as_bytes()); - symbol_string_table.push(0); - import_descriptor.write_u32_le(0); - import_descriptor.write_u32_le(0); - // value: - import_descriptor.write_u32_le(0); - // section_number: - import_descriptor.write_u16_le(1); - // base_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); - // complex_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); - // storage_class: - import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_EXTERNAL); - // number_of_aux_symbols: - import_descriptor.write_u8(0); + import_descriptor.write_pod(&ImageSymbol { + name: coff_string_table.get_symbol_raw_name(&import_descriptor_symbol), + value: u32(0), + section_number: u16(1), + typ: u16(null_type), + storage_class: IMAGE_SYM_CLASS_EXTERNAL, + number_of_aux_symbols: 0, + }); // [1] section .idata$2 => section 1 - // name: - // ".idata$2" is definitely shorter than 8, so use the short name format. - import_descriptor.write_pad_len(b".idata$2", 0u8, 8); - // value: - import_descriptor.write_u32_le(0); - // section_number: - import_descriptor.write_u16_le(1); - // base_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); - // complex_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); - // storage_class: - import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_SECTION); - // number_of_aux_symbols: - import_descriptor.write_u8(0); + import_descriptor.write_pod(&ImageSymbol { + name: coff_string_table.get_symbol_raw_name(".idata$2"), + value: u32(0), + section_number: u16(1), + typ: u16(null_type), + storage_class: IMAGE_SYM_CLASS_SECTION, + number_of_aux_symbols: 0, + }); // [2] static .idata$6 => section 2 - // name: - // ".idata$6" is definitely shorter than 8, so use the short name format. - import_descriptor.write_pad_len(b".idata$6", 0u8, 8); - // value: - import_descriptor.write_u32_le(0); - // section_number: - import_descriptor.write_u16_le(2); - // base_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); - // complex_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); - // storage_class: - import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_STATIC); - // number_of_aux_symbols: - import_descriptor.write_u8(0); + import_descriptor.write_pod(&ImageSymbol { + name: coff_string_table.get_symbol_raw_name(".idata$6"), + value: u32(0), + section_number: u16(2), + typ: u16(null_type), + storage_class: IMAGE_SYM_CLASS_STATIC, + number_of_aux_symbols: 0, + }); // [3] section .idata$4 => undef - // name: - import_descriptor.write_pad_len(b".idata$4", 0u8, 8); - // value: - import_descriptor.write_u32_le(0); - // section_number: - import_descriptor.write_i16_le(object::pe::IMAGE_SYM_UNDEFINED as i16); - // base_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); - // complex_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); - // storage_class: - import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_SECTION); - // number_of_aux_symbols: - import_descriptor.write_u8(0); + import_descriptor.write_pod(&ImageSymbol { + name: coff_string_table.get_symbol_raw_name(".idata$4"), + value: u32(0), + section_number: u16(IMAGE_SYM_UNDEFINED as u16), + typ: u16(null_type), + storage_class: IMAGE_SYM_CLASS_SECTION, + number_of_aux_symbols: 0, + }); // [4] section .idata$5 => undef - // name: - import_descriptor.write_pad_len(b".idata$5", 0u8, 8); - // value: - import_descriptor.write_u32_le(0); - // section_number: - import_descriptor.write_i16_le(object::pe::IMAGE_SYM_UNDEFINED as i16); - // base_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_TYPE_NULL as u8); - // complex_type: - import_descriptor.write_u8(object::pe::IMAGE_SYM_DTYPE_NULL as u8); - // storage_class: - import_descriptor.write_u8(object::pe::IMAGE_SYM_CLASS_SECTION); - // number_of_aux_symbols: - import_descriptor.write_u8(0); - - // COFF symbol string table - import_descriptor.write(&symbol_string_table); + import_descriptor.write_pod(&ImageSymbol { + name: coff_string_table.get_symbol_raw_name(".idata$5"), + value: u32(0), + section_number: u16(IMAGE_SYM_UNDEFINED as u16), + typ: u16(null_type), + storage_class: IMAGE_SYM_CLASS_SECTION, + number_of_aux_symbols: 0, + }); + + // COFF string table: found by looking for the end of the symbol table + // length of entire string table, including the length + import_descriptor.write_u32_le(4 + coff_string_table.0.len() as u32); + import_descriptor.write(&coff_string_table.0); drop(import_descriptor); @@ -551,6 +533,13 @@ mod windows_import_lib { self.data.extend_from_slice(data); } + fn write_pod(&mut self, value: &T) + where + T: object::pod::Pod, + { + self.data.extend_from_slice(object::bytes_of(value)); + } + fn slice(&mut self, offset: usize, len: usize) -> &mut [u8] { &mut self.data[offset..offset + len] } @@ -560,21 +549,6 @@ mod windows_import_lib { self.data.push(0); } - fn write_pad_len(&mut self, data: &[u8], pad: u8, len: usize) { - assert!(data.len() <= len); - let offset = self.data.len(); - self.data.extend_from_slice(data); - self.data.resize(offset + len, pad); - } - - fn write_u8(&mut self, data: u8) { - self.data.push(data); - } - - fn write_i16_le(&mut self, data: i16) { - self.data.extend_from_slice(&data.to_le_bytes()); - } - fn write_u16_le(&mut self, data: u16) { self.data.extend_from_slice(&data.to_le_bytes()); } From 15f8053dc7af0c30f9c68a6e46678c97468a6d6b Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Sun, 5 Nov 2023 20:54:14 +1300 Subject: [PATCH 05/11] WIP fix U16/U16Bytes split. --- src/archive.rs | 58 ++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index da4a670bb..da3af2295 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -66,10 +66,18 @@ mod windows_import_lib { use object::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; fn u16(value: u16) -> U16 { - U16Bytes::new(LE, value) + U16::new(LE, value) } fn u32(value: u32) -> U32 { + U32::new(LE, value) + } + + fn u16b(value: u16) -> U16Bytes { + U16Bytes::new(LE, value) + } + + fn u32b(value: u32) -> U32Bytes { U32Bytes::new(LE, value) } @@ -387,21 +395,21 @@ mod windows_import_lib { ); // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 import_descriptor.write_pod(&ImageRelocation { - virtual_address: u32(0), - symbol_table_index: u32(3), - typ: u16(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: u32b(0), + symbol_table_index: u32b(3), + typ: u16b(IMAGE_REL_AMD64_ADDR32NB), }); // relocation 1: [2] name rva => points to DLL name section .idata$6 import_descriptor.write_pod(&ImageRelocation { - virtual_address: u32(12), - symbol_table_index: u32(2), - typ: u16(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: u32b(12), + symbol_table_index: u32b(2), + typ: u16b(IMAGE_REL_AMD64_ADDR32NB), }); // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 import_descriptor.write_pod(&ImageRelocation { - virtual_address: u32(16), - symbol_table_index: u32(4), - typ: u16(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: u32b(16), + symbol_table_index: u32b(4), + typ: u16b(IMAGE_REL_AMD64_ADDR32NB), }); // [1] section .idata$6 data @@ -455,45 +463,45 @@ mod windows_import_lib { // [0] external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 import_descriptor.write_pod(&ImageSymbol { name: coff_string_table.get_symbol_raw_name(&import_descriptor_symbol), - value: u32(0), - section_number: u16(1), - typ: u16(null_type), + value: u32b(0), + section_number: u16b(1), + typ: u16b(null_type), storage_class: IMAGE_SYM_CLASS_EXTERNAL, number_of_aux_symbols: 0, }); // [1] section .idata$2 => section 1 import_descriptor.write_pod(&ImageSymbol { name: coff_string_table.get_symbol_raw_name(".idata$2"), - value: u32(0), - section_number: u16(1), - typ: u16(null_type), + value: u32b(0), + section_number: u16b(1), + typ: u16b(null_type), storage_class: IMAGE_SYM_CLASS_SECTION, number_of_aux_symbols: 0, }); // [2] static .idata$6 => section 2 import_descriptor.write_pod(&ImageSymbol { name: coff_string_table.get_symbol_raw_name(".idata$6"), - value: u32(0), - section_number: u16(2), - typ: u16(null_type), + value: u32b(0), + section_number: u16b(2), + typ: u16b(null_type), storage_class: IMAGE_SYM_CLASS_STATIC, number_of_aux_symbols: 0, }); // [3] section .idata$4 => undef import_descriptor.write_pod(&ImageSymbol { name: coff_string_table.get_symbol_raw_name(".idata$4"), - value: u32(0), - section_number: u16(IMAGE_SYM_UNDEFINED as u16), - typ: u16(null_type), + value: u32b(0), + section_number: u16b(IMAGE_SYM_UNDEFINED as u16), + typ: u16b(null_type), storage_class: IMAGE_SYM_CLASS_SECTION, number_of_aux_symbols: 0, }); // [4] section .idata$5 => undef import_descriptor.write_pod(&ImageSymbol { name: coff_string_table.get_symbol_raw_name(".idata$5"), - value: u32(0), - section_number: u16(IMAGE_SYM_UNDEFINED as u16), - typ: u16(null_type), + value: u32b(0), + section_number: u16b(IMAGE_SYM_UNDEFINED as u16), + typ: u16b(null_type), storage_class: IMAGE_SYM_CLASS_SECTION, number_of_aux_symbols: 0, }); From 08dcc1c52a51b40d054a81ca77d13896445b2196 Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Wed, 8 Nov 2023 00:50:15 +1300 Subject: [PATCH 06/11] Implement remaining required symbols and objects. And break out code into dll_import_lib module. --- src/archive.rs | 709 +---------------------------- src/dll_import_lib/ar.rs | 234 ++++++++++ src/dll_import_lib/coff.rs | 464 +++++++++++++++++++ src/dll_import_lib/data.rs | 73 +++ src/dll_import_lib/mod.rs | 194 ++++++++ src/dll_import_lib/string_table.rs | 28 ++ src/lib.rs | 1 + 7 files changed, 995 insertions(+), 708 deletions(-) create mode 100644 src/dll_import_lib/ar.rs create mode 100644 src/dll_import_lib/coff.rs create mode 100644 src/dll_import_lib/data.rs create mode 100644 src/dll_import_lib/mod.rs create mode 100644 src/dll_import_lib/string_table.rs diff --git a/src/archive.rs b/src/archive.rs index da3af2295..630d21765 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -27,7 +27,7 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { } let lib_path = tmpdir.join(format!("{lib_name}_import.lib")); // todo: emit session error instead of expects - fs::write(&lib_path, windows_import_lib::generate(lib_name, &import_names)) + fs::write(&lib_path, crate::dll_import_lib::generate(lib_name, &import_names)) .expect("failed to write import library"); lib_path @@ -51,710 +51,3 @@ fn get_import_or_native_object_symbols( get_native_object_symbols(buf, f) } } - -mod windows_import_lib { - // todo: pull out to a proper location. Really should be in `object` crate! - // todo: support ordinals - // todo: support name types (e.g. verbatim+) - // todo: support windows-gnu flavor? - // todo: provide machine - // todo: remove any panics, nice errors - - use std::ops::{Deref, DerefMut}; - - use object::pe::*; - use object::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; - - fn u16(value: u16) -> U16 { - U16::new(LE, value) - } - - fn u32(value: u32) -> U32 { - U32::new(LE, value) - } - - fn u16b(value: u16) -> U16Bytes { - U16Bytes::new(LE, value) - } - - fn u32b(value: u32) -> U32Bytes { - U32Bytes::new(LE, value) - } - - // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format - // - // Windows .lib files are System-V (aka. GNU) flavored ar files with a couple of extra lookup - // members. - // - // An archive is the 8 bytes b"!\n" - // followed by a sequence of 60 byte member headers: - // 0: name: [u8; 16], // member name, terminated with "/". If it is longer than 15, then - // // use "/n" where "n" is a decimal for the offset in bytes into - // // the longnames ("//") member contents. - // 16: date: [u8; 12], // ASCII decimal seconds since UNIX epoch - always -1 for MSVC - // 28: uid: [u8; 6], // ASCII decimal user id. Always blank for MSVC - // 34: gid: [u8; 6], // ditto for group id. - // 40: mode: [u8; 8], // ASCII octal UNIX mode. 0 for MSVC - // 48: size: [u8; 10], // ASCII decimal data size. - // 58: end: b"`\n", - // then size bytes of payload. If payload is odd sized, pad - // to an even offset with \n. - // - // You must store two extra members at the start, a standard System-V / GNU symbol lookup table - // member (the "first linker member" in the linked documentation), and the Windows symbol table, - // (the "second linker member") both with empty ("/") names. - // - // The standard System-V/GNU symbol table member has the name "/" with following contents, - // using big-endian numbers: - // count: u32, // number of indexed symbols - // offsets: [u32, count], // file offsets to the header of the member that contains - // // that symbol. - // names: * // sequence of null terminated symbol names. - // - // The Windows table member also has the name "/", and has the following contents, using - // little-endian numbers, where the symbol_members must be sorted to allow binary search: - // member_count: u32, // number of members - // member_offsets: [u32; member_count], // file offsets to each member header - // symbol_count: u32, // number of symbols - // symbol_member: [u16; symbol_count], // *1-based* index of the member that contains - // // each symbol - // symbol_names: * // sequence of null terminated symbol names - // - // Then the long names member ("//") as with regular GNU ar files, just a sequence of - // null terminated strings indexed by members using the long name format "/n" as described - // above. - // - // Then regular members follow. - // - // The member name doesn't seem to matter, including duplicates, we will use the dll name since - // that's what's in the files generated by MSVC tools. - // - // The short import object has the form: - // header: - // 0: sig1: 0u16 - // 2: sig2: 0xFFFFu16 - // 4: version: u16, // normally 0 - // 6: machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 - // 8: time_date_stamp: u32, // normally 0 - // 12: size_of_data: u32, // size following the header - // 16: ordinal_or_hint: u16, // depending on flag - // 18: object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 - // name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 - // reserved: u11, - // 20: data: // size_of_data bytes - // name: * // import name; null terminated string - // dll_name: * // dll name; null terminated string - - pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { - // member count: one for each import_name in argument order, followed by the import - // descriptor. - let member_count = import_names.len() + 1; - - assert!(member_count <= 0xFFFF, "too many import names"); - - // foo.dll => foo so we can construct the import descriptor symbol. - // At least for the Windows system dlls, don't seem to need any further - // escaping, e.g. "api-ms-win-appmodel-runtime-l1-1-1.dll" => - // "__IMPORT_DESCRIPTOR_api-ms-win-appmodel-runtime-l1-1-1" - // Not using std::path to avoid having to handle non-unicode paths. - let mut dll_basename = String::from(dll_name); - if let Some(index) = dll_basename.rfind('.') { - dll_basename.truncate(index); - } - - // Identify the target of a symbol - #[derive(Copy, Clone)] - enum SymbolValue { - // a short import object, specifically for import_names[.0] - Import(usize), - // the __IMPORT_DESCRIPTOR_{dll_basename} used to build the final .idata section. - Descriptor, - } - - // Note we are using the behavior of BTee* that it keeps its keys in sorted order: this - // is required by the MSVC symbol table so the linker can use binary search. - let mut symbols = - std::collections::BTreeMap::, SymbolValue>::new(); - - for (index, &name) in import_names.iter().enumerate() { - symbols.insert(name.into(), SymbolValue::Import(index)); - symbols.insert(format!("__imp_{name}").into(), SymbolValue::Import(index)); - } - - let import_descriptor_symbol = format!("__IMPORT_DESCRIPTOR_{dll_basename}"); - symbols.insert(import_descriptor_symbol.as_str().into(), SymbolValue::Descriptor); - - let symbol_count = symbols.len(); - - let mut writer = Writer::new(); - - // member names are all the dll_name with the MSVC tools. - let member_name = writer.member_name(dll_name); - - // Standard System-V / GNU symbol table member - let mut gnu_symbols = writer.start_member(MemberName::SymbolTable); - // member table: one entry per symbol (duplicates allowed for aliasing) - gnu_symbols.write_u32_be(symbol_count as u32); - // reserve space for member offsets. - let gnu_member_table_offset = gnu_symbols.reserve_bytes(symbol_count * 4); - // symbol string table - for name in symbols.keys() { - gnu_symbols.write_c_str(name); - } - // done with legacy symbol directory - drop(gnu_symbols); - - // MSVC tools symbol table member - let mut ms_symbols = writer.start_member(MemberName::SymbolTable); - // member offset table - ms_symbols.write_u32_le(member_count as u32); - let ms_member_table_offset = ms_symbols.reserve_bytes(member_count * 4); - // symbol table - ms_symbols.write_u32_le(symbol_count as u32); - // member index we assume symbols are in the same order as the member table. - for &value in symbols.values() { - let member_index = match value { - SymbolValue::Import(index) => index, - SymbolValue::Descriptor => import_names.len(), - }; - // Yep, it's a 1-based index. Who knows why. - // cast to u16 should be safe due to assert!() on member_count above. - ms_symbols.write_u16_le(1 + member_index as u16); - } - // string table again (could just copy from legacy string table above?) - for name in symbols.keys() { - ms_symbols.write_c_str(name); - } - // done with current symbol directory - drop(ms_symbols); - - writer.write_long_names(); - // can't use writer.member_name() from here - - // short import object members - for (index, name) in import_names.iter().enumerate() { - let mut member = writer.start_member(member_name); - // update member offsets - let member_offset = member.header_offset as u32; - - // Updating GNU symbol table is a bit messy with the aliases - for (member_index, symbol) in symbols.values().enumerate() { - if let SymbolValue::Import(symbol_index) = symbol { - if *symbol_index == index { - member - .set_u32_be(gnu_member_table_offset + member_index * 4, member_offset); - } - } - } - member.set_u32_le(ms_member_table_offset + index * 4, member_offset); - - member.write_pod(&ImportObjectHeader { - sig1: u16(IMAGE_FILE_MACHINE_UNKNOWN), - sig2: u16(IMPORT_OBJECT_HDR_SIG2), - version: u16(0), - machine: u16(IMAGE_FILE_MACHINE_AMD64), - time_date_stamp: u32(0), - size_of_data: u32((name.len() + 1 + dll_name.len() + 1) as u32), - ordinal_or_hint: u16(0), - name_type: u16(IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT - | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT), - }); - member.write_c_str(name); - member.write_c_str(dll_name); - - drop(member); - } - - // import descriptor member - let mut import_descriptor = writer.start_member(member_name); - let member_offset = import_descriptor.header_offset as u32; - import_descriptor - .set_u32_be(gnu_member_table_offset + import_names.len() * 4, member_offset); - import_descriptor - .set_u32_le(ms_member_table_offset + import_names.len() * 4, member_offset); - // This is a COFF object containing 2 sections: - // .idata$2: import directory entry: - // 20 bytes, all 0 on disk, an Import Directory Table entry - // filled out by the linker with relocations. - // .idata$6: DLL name: - // The null terminated file name of the dll - // The import directory entry has 3 relocations: - // 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 - // 12: [2] name rva => points to DLL name section .idata$6 - // 16: [4] import address table rva => points to UNDEF symbol .idata$5 - // The COFF symbol table contains 5 symbols: - // [0]: external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 - // [1]: section .idata$2 => section 1 - // [2]: static .idata$6 => section 2 - // [3]: section .idata$4 => undef - // [4]: section .idata$5 => undef - // Unfortunately, the object crate doesn't support writing COFF objects. - // For now, continue to use dumb explicit writer code, but this should be cleaned up. - // - // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#coff-file-header-object-and-image - // - // COFF file: - // 0: header: - // 0: machine: u16, // an IMAGE_MACHINE_* value - // 2: number_of_sections: u16, - // 4: time_date_stamp: u32, // 0 in MSVC tools - // 8: pointer_to_symbol_table: u32, // offset in COFF file to COFF symbol table - // 12: number_of_symbols: u32, // number of entries in symbol table - // 16: size_of_optional_header: u16, // 0 for object file - // 18: characteristics: u16, // union of IMAGE_FILE_* flags; 0 for our output - // 20: sections table: - // 0: name: [u8; 8], // null padded section name - // 8: virtual_size: u32, // 0 for object files - // 12: virtual_address: u32, // 0 for object files - // 16: size_of_raw_data: u32, // size of section on disk - // 20: pointer_to_raw_data: u32, // section data COFF file offset - // 24: pointer_to_relocations: u32, // relocation table COFF file offset - // 28: pointer_to_line_number: u32, // 0 - // 32: number_of_relocations: u16, // number of entries in relocation tables - // 34: number_of_line_numbers: u16, // 0 - // 36: characteristics: u32, // union of IMAGE_SCN_* flags - // section in sections: - // section.pointer_to_raw_data: raw_data: [u8; section.size_of_raw_data] - // section.pointer_to_relocations: - // 0: virtual_address: u32, // rva of relocation - // 4: symbol_table_index: u32, // index into COFF symbol table - // 8: type: u16, // IMAGE_REL_* value - // pointer_to_symbol_table: - // i in 0..number_of_symbols: - // 0: name: [u8; 8] | { 0u32; offset_in_string_table: u32 }; - // 8: value: u32, - // 12: section_number: u16, // IMAGE_SYM_* or 1-based section index - // 14: base_type: u8, // IMAGE_SYM_TYPE_*, always NULL - // 15: complex_type: u8, // IMAGE_SYM_DTYPE_*, always NULL for our output - // 16: storage_class: u8, // IMAGE_SYM_CLASS_* - // 17: number_of_aux_symbols: u8, // 0 for our output - // pointer_to_symbol_table + number_of_symbols * 18: string_table: - // 0: string_table_size: u32, // including this size - // 4: sequence of null-terminated strings - - // COFF File header: - let coff_file_offset = import_descriptor.data.len(); - - import_descriptor.write_pod(&ImageFileHeader { - machine: u16(IMAGE_FILE_MACHINE_AMD64), - number_of_sections: u16(2), - time_date_stamp: u32(0), - pointer_to_symbol_table: u32(0), // filled out later - number_of_symbols: u32(5), - size_of_optional_header: u16(0), - characteristics: u16(0), - }); - - // Section table: - // [0] .idata$2: import directory entry - let import_directory_entry_section_offset = import_descriptor.data.len(); - import_descriptor.write_pod(&ImageSectionHeader { - name: *b".idata$2", - virtual_size: u32(0), - virtual_address: u32(0), - size_of_raw_data: u32(20), - pointer_to_raw_data: u32(0), // filled out later - pointer_to_relocations: u32(0), // filled out later - pointer_to_linenumbers: u32(0), // no COFF linenumbers - number_of_relocations: u16(3), - number_of_linenumbers: u16(0), - characteristics: u32(IMAGE_SCN_ALIGN_4BYTES - | IMAGE_SCN_CNT_INITIALIZED_DATA - | IMAGE_SCN_MEM_READ - | IMAGE_SCN_MEM_WRITE), - }); - - // [1] .idata$6: dll name - let dll_name_section_offset = import_descriptor.data.len(); - import_descriptor.write_pod(&ImageSectionHeader { - name: *b".idata$6", - virtual_size: u32(0), - virtual_address: u32(0), - size_of_raw_data: u32((dll_name.len() as u32 + 1).next_multiple_of(2)), - pointer_to_raw_data: u32(0), // filled out later - pointer_to_relocations: u32(0), // no relocations - pointer_to_linenumbers: u32(0), - number_of_relocations: u16(0), - number_of_linenumbers: u16(0), - characteristics: u32(IMAGE_SCN_ALIGN_2BYTES - | IMAGE_SCN_CNT_INITIALIZED_DATA - | IMAGE_SCN_MEM_READ - | IMAGE_SCN_MEM_WRITE), - }); - - // [0] section .idata$2 data - import_descriptor.data.set_u32_le( - import_directory_entry_section_offset + 20, // .pointer_to_raw_data - (import_descriptor.data.len() - coff_file_offset) as u32, - ); - import_descriptor.reserve_bytes(20); - // [0] section .idata$2 relocations - import_descriptor.data.set_u32_le( - import_directory_entry_section_offset + 24, // .pointer_to_relocations - (import_descriptor.data.len() - coff_file_offset) as u32, - ); - // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 - import_descriptor.write_pod(&ImageRelocation { - virtual_address: u32b(0), - symbol_table_index: u32b(3), - typ: u16b(IMAGE_REL_AMD64_ADDR32NB), - }); - // relocation 1: [2] name rva => points to DLL name section .idata$6 - import_descriptor.write_pod(&ImageRelocation { - virtual_address: u32b(12), - symbol_table_index: u32b(2), - typ: u16b(IMAGE_REL_AMD64_ADDR32NB), - }); - // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 - import_descriptor.write_pod(&ImageRelocation { - virtual_address: u32b(16), - symbol_table_index: u32b(4), - typ: u16b(IMAGE_REL_AMD64_ADDR32NB), - }); - - // [1] section .idata$6 data - import_descriptor.data.set_u32_le( - dll_name_section_offset + 20, // .pointer_to_raw_data - (import_descriptor.data.len() - coff_file_offset) as u32, - ); - import_descriptor.write_c_str(dll_name); - import_descriptor.align(2, 0u8); - - struct CoffStringTable(Vec); - - impl CoffStringTable { - fn new() -> Self { - Self(vec![]) - } - - fn get_symbol_raw_name(&mut self, value: &str) -> [u8; 8] { - let mut result = [0u8; 8]; - if value.len() > 8 { - // add 4 for the string table length - let offset = 4 + self - .0 - .windows(value.len()) - .position(|window| window == value.as_bytes()) - .unwrap_or_else(|| { - let offset = self.0.len(); - self.0.extend_from_slice(value.as_bytes()); - self.0.push(0); - offset - }); - result[4..].copy_from_slice(&u32::to_le_bytes(offset as u32)); - } else { - result.copy_from_slice(value.as_bytes()) - } - result - } - } - - let mut coff_string_table = CoffStringTable::new(); - - // COFF symbol table: - import_descriptor.data.set_u32_le( - coff_file_offset + 8, - (import_descriptor.data.len() - coff_file_offset) as u32, - ); - - // A complicated way to say 0. - let null_type = IMAGE_SYM_DTYPE_NULL << 8 | IMAGE_SYM_TYPE_NULL; - - // [0] external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 - import_descriptor.write_pod(&ImageSymbol { - name: coff_string_table.get_symbol_raw_name(&import_descriptor_symbol), - value: u32b(0), - section_number: u16b(1), - typ: u16b(null_type), - storage_class: IMAGE_SYM_CLASS_EXTERNAL, - number_of_aux_symbols: 0, - }); - // [1] section .idata$2 => section 1 - import_descriptor.write_pod(&ImageSymbol { - name: coff_string_table.get_symbol_raw_name(".idata$2"), - value: u32b(0), - section_number: u16b(1), - typ: u16b(null_type), - storage_class: IMAGE_SYM_CLASS_SECTION, - number_of_aux_symbols: 0, - }); - // [2] static .idata$6 => section 2 - import_descriptor.write_pod(&ImageSymbol { - name: coff_string_table.get_symbol_raw_name(".idata$6"), - value: u32b(0), - section_number: u16b(2), - typ: u16b(null_type), - storage_class: IMAGE_SYM_CLASS_STATIC, - number_of_aux_symbols: 0, - }); - // [3] section .idata$4 => undef - import_descriptor.write_pod(&ImageSymbol { - name: coff_string_table.get_symbol_raw_name(".idata$4"), - value: u32b(0), - section_number: u16b(IMAGE_SYM_UNDEFINED as u16), - typ: u16b(null_type), - storage_class: IMAGE_SYM_CLASS_SECTION, - number_of_aux_symbols: 0, - }); - // [4] section .idata$5 => undef - import_descriptor.write_pod(&ImageSymbol { - name: coff_string_table.get_symbol_raw_name(".idata$5"), - value: u32b(0), - section_number: u16b(IMAGE_SYM_UNDEFINED as u16), - typ: u16b(null_type), - storage_class: IMAGE_SYM_CLASS_SECTION, - number_of_aux_symbols: 0, - }); - - // COFF string table: found by looking for the end of the symbol table - // length of entire string table, including the length - import_descriptor.write_u32_le(4 + coff_string_table.0.len() as u32); - import_descriptor.write(&coff_string_table.0); - - drop(import_descriptor); - - writer.data.data - } - - #[derive(Copy, Clone)] - enum MemberName { - SymbolTable, // "/" - LongNames, // "//" - Short([u8; 16]), // "{0}/" - Long(usize), // "/{0}" - } - - struct Data { - data: Vec, - } - - impl Data { - fn new() -> Self { - Self { data: vec![] } - } - - fn len(&self) -> usize { - self.data.len() - } - - fn write(&mut self, data: &[u8]) { - self.data.extend_from_slice(data); - } - - fn write_pod(&mut self, value: &T) - where - T: object::pod::Pod, - { - self.data.extend_from_slice(object::bytes_of(value)); - } - - fn slice(&mut self, offset: usize, len: usize) -> &mut [u8] { - &mut self.data[offset..offset + len] - } - - fn write_c_str(&mut self, data: &str) { - self.data.extend_from_slice(data.as_bytes()); - self.data.push(0); - } - - fn write_u16_le(&mut self, data: u16) { - self.data.extend_from_slice(&data.to_le_bytes()); - } - - fn write_u32_be(&mut self, data: u32) { - self.data.extend_from_slice(&data.to_be_bytes()); - } - - fn write_u32_le(&mut self, data: u32) { - self.data.extend_from_slice(&data.to_le_bytes()); - } - - fn reserve_bytes(&mut self, count: usize) -> usize { - let offset = self.data.len(); - self.data.resize(offset + count, 0); - offset - } - - fn set_u32_be(&mut self, offset: usize, data: u32) { - self.data[offset..][..4].copy_from_slice(&data.to_be_bytes()); - } - - fn set_u32_le(&mut self, offset: usize, data: u32) { - self.data[offset..][..4].copy_from_slice(&data.to_le_bytes()); - } - - fn align(&mut self, alignment: usize, pad: u8) { - let offset = self.data.len(); - self.data.resize(offset.next_multiple_of(alignment), pad); - } - } - - struct Writer { - data: Data, - long_names: Option>, - } - - impl Deref for Writer { - type Target = Data; - - fn deref(&self) -> &Self::Target { - &self.data - } - } - - impl DerefMut for Writer { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.data - } - } - - impl Writer { - fn new() -> Self { - let long_names = Some(vec![]); - let mut data = Data::new(); - data.write(b"!\n"); - Self { data, long_names } - } - - fn member_name(&mut self, name: &str) -> MemberName { - let Some(ref mut long_buf) = self.long_names else { - panic!("already wrote long names member"); - }; - - if name.len() < 16 { - let mut buf = [0u8; 16]; - buf[..name.len()].copy_from_slice(name.as_bytes()); - buf[name.len()] = b'/'; - buf[name.len() + 1..].fill(b' '); - MemberName::Short(buf) - } else { - let name = std::ffi::CString::new(name).expect("names cannot contain \\0"); - let name = name.as_bytes_with_nul(); - - // Find the name *including the null terminator* in the existing long names buffer. - // Note, this could find "bar\0" in "foobar\0", but that seems to be fine according - // to the spec? It still counts as a null terminated "bar" string. - let offset = long_buf - .windows(name.len()) - .position(|window| window == name) - .unwrap_or_else(|| { - // Didn't already have it, so add it to the end. - let offset = long_buf.len(); - long_buf.extend_from_slice(name); - offset - }); - MemberName::Long(offset) - } - } - - fn start_member(&mut self, name: MemberName) -> Member<'_> { - let header_offset = self.data.len(); - // fill the header with blanks... - self.data.data.resize(header_offset + Member::HEADER_SIZE - 2, b' '); - // except for end marker - self.data.write(b"`\n"); - - let mut member = Member::new(&mut self.data, header_offset); - member.set_name(name); - // init date, mode to default values as produced by MSVC tools. - // uid, gid are already defaulted to blank. - member.set_time_date_stamp(-1); - member.set_mode(0); - member - } - - fn write_long_names(&mut self) { - let data = self.long_names.take().expect("already wrote long names member"); - let mut member = self.start_member(MemberName::LongNames); - member.write(&data); - drop(member); - } - } - - struct Member<'a> { - data: &'a mut Data, - header_offset: usize, - } - - impl Deref for Member<'_> { - type Target = Data; - - fn deref(&self) -> &Self::Target { - self.data - } - } - - impl DerefMut for Member<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - self.data - } - } - - impl<'a> Member<'a> { - const HEADER_SIZE: usize = 60; - - fn new(data: &'a mut Data, header_offset: usize) -> Self { - Self { data, header_offset } - } - - fn header_slice(&mut self, offset: usize, len: usize) -> &mut [u8] { - self.data.slice(self.header_offset + offset, len) - } - - fn set_name(&mut self, name: MemberName) { - let mut field = self.header_slice(0, 16); - match name { - MemberName::SymbolTable => { - field[0..1].copy_from_slice(b"/"); - field[1..].fill(b' '); - } - MemberName::LongNames => { - field[0..2].copy_from_slice(b"//"); - field[2..].fill(b' '); - } - MemberName::Short(name) => { - field.copy_from_slice(&name); - // already includes trailing / and spaces - } - MemberName::Long(offset) => { - use std::io::Write; - field.fill(b' '); - write!(field, "/{offset}").expect("writing long name should not fail"); - } - } - } - - fn set_time_date_stamp(&mut self, value: i32) { - self.set_decimal_field(16, 12, value); - } - - fn set_uid(&mut self, value: i32) { - self.set_decimal_field(28, 6, value); - } - - fn set_gid(&mut self, value: i32) { - self.set_decimal_field(34, 6, value); - } - - fn set_mode(&mut self, value: i32) { - use std::io::Write; - write!(std::io::Cursor::new(self.header_slice(40, 8)), "{value:o}") - .expect("value too large"); - } - - fn set_decimal_field(&mut self, offset: usize, size: usize, value: i32) { - use std::io::Write; - write!(std::io::Cursor::new(self.header_slice(offset, size)), "{value}") - .expect("value too large"); - } - } - - impl<'a> Drop for Member<'a> { - fn drop(&mut self) { - let data_size = self.data.len() - self.header_offset - Self::HEADER_SIZE; - assert!(data_size < i32::MAX as usize); - self.set_decimal_field(48, 10, data_size as i32); - self.align(2, b'\n'); - } - } -} diff --git a/src/dll_import_lib/ar.rs b/src/dll_import_lib/ar.rs new file mode 100644 index 000000000..3211c6a44 --- /dev/null +++ b/src/dll_import_lib/ar.rs @@ -0,0 +1,234 @@ +//! https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format +//! +//! Windows .lib files are System-V (aka. GNU) flavored ar files with an additional MSVC-specific +//! symbol lookup member after the standard one. +//! +//! An ar archive is the 8 bytes `b"!\n"` followed by a sequence of 60 byte member headers: +//! +//! ```plaintext +//! 0: name: [u8; 16], // member name, terminated with "/". If it is longer than 15, then +//! // use "/n" where "n" is a decimal for the offset in bytes into +//! // the longnames ("//") member contents. +//! 16: date: [u8; 12], // ASCII decimal seconds since UNIX epoch - always -1 for MSVC +//! 28: uid: [u8; 6], // ASCII decimal user id. Always blank for MSVC +//! 34: gid: [u8; 6], // ditto for group id. +//! 40: mode: [u8; 8], // ASCII octal UNIX mode. 0 for MSVC +//! 48: size: [u8; 10], // ASCII decimal data size. +//! 58: end: b"`\n", +//! ``` +//! +//! then `size` bytes of member payload data. If payload is odd sized, it must be padded to an even +//! offset with `\n`. +//! +//! Standard archives have an initial member with the raw name `/` containing a table with the +//! offsets of the members containing exported symbols, with big-endian encoding: +//! +//! ```plaintext +//! count: u32_be, // number of indexed symbols +//! offsets: [u32_be, count], // file offsets to the header of the member that contains +//! // that symbol. +//! names: * // sequence of null terminated symbol names. +//! ``` +//! +//! MSVC lib archives then have an additional table member that also has the name `/`, and stores +//! the same information. This uses little-endian encoding, separates the member offset table from +//! the symbol table, and requires symbols to be sorted to allow binary search lookups. +//! +//! ```plaintext +//! member_count: u32, // number of members +//! member_offsets: [u32; member_count], // file offsets to each member header +//! symbol_count: u32, // number of symbols +//! symbol_member: [u16; symbol_count], // *1-based* index of the member that contains +//! // each symbol +//! symbol_names: * // sequence of null terminated symbol names in the same +//! // order as symbol_member. +//! ``` +//! +//! Then the standard long names member (`//`), which stores just a sequence of null terminated +//! strings indexed by members using the long name format `/n` as described above. This is not +//! required for MSVC if there are no long names. +//! +//! Then content members follow. +//! +//! The member name doesn't seem to matter, including duplicates, for import libraries MSVC uses +//! the dll name for every member. +//! +//! The short import object has the form: +//! +//! ```plaintext +//! 0: header: +//! 0: sig1: 0u16 +//! 2: sig2: 0xFFFFu16 +//! 4: version: u16, // normally 0 +//! 6: machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 +//! 8: time_date_stamp: u32, // normally 0 +//! 12: size_of_data: u32, // size following the header +//! 16: ordinal_or_hint: u16, // depending on flag +//! 18: object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 +//! name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 +//! reserved: u11, +//! 20: data: // size_of_data bytes +//! name: * // import name; null terminated string +//! dll_name: * // dll name; null terminated string +//! ``` + +use std::io::Write; +use std::ops::{Deref, DerefMut}; + +use super::string_table::StringTable; +use super::DataWriter; + +#[derive(Copy, Clone)] +pub(crate) struct MemberName(pub [u8; 16]); + +impl MemberName { + pub(crate) const SYMBOL_TABLE: Self = MemberName(*b"/ "); + pub(crate) const LONG_NAMES: Self = MemberName(*b"// "); +} + +pub(crate) struct Writer { + data: DataWriter, + long_names: Option, +} + +impl Writer { + #[allow(clippy::new_without_default)] // Default should probably not write a signature? + pub(crate) fn new() -> Self { + let long_names = Some(StringTable::new()); + let mut data = DataWriter::new(); + data.write(b"!\n"); + Self { data, long_names } + } + + pub(crate) fn member_name(&mut self, name: &str) -> MemberName { + let Some(ref mut long_buf) = self.long_names else { + panic!("already wrote long names member"); + }; + + if name.len() < 16 { + let mut buf = [b' '; 16]; + buf[..name.len()].copy_from_slice(name.as_bytes()); + buf[name.len()] = b'/'; + MemberName(buf) + } else { + let offset = long_buf.find_or_insert(name); + let mut buf = [b' '; 16]; + write!(&mut buf[..], "/{offset}").expect("writing long name should not fail"); + MemberName(buf) + } + } + + pub(crate) fn start_member(&mut self, name: MemberName) -> Member<'_> { + Member::new(&mut self.data, name) + } + + pub(crate) fn write_long_names(&mut self) { + let string_table = self.long_names.take().expect("already wrote long names member"); + let mut member = self.start_member(MemberName::LONG_NAMES); + member.write(string_table.data()); + drop(member); + } + + pub(crate) fn into_data(self) -> Vec { + self.data.into_data() + } +} + +impl Deref for Writer { + type Target = DataWriter; + + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl DerefMut for Writer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data + } +} + +pub(crate) struct Member<'data> { + pub(crate) data: &'data mut DataWriter, + pub(crate) header_offset: usize, +} + +impl<'data> Member<'data> { + const HEADER_SIZE: usize = std::mem::size_of::(); + + fn new(data: &'data mut DataWriter, name: MemberName) -> Self { + // fill the header MSVC defaults. + let header_offset = data.write_pod(&object::archive::Header { + name: name.0, + date: *b"-1 ", + uid: [b' '; 6], + gid: [b' '; 6], + mode: *b"0 ", + size: [b' '; 10], // filled out in Drop + terminator: object::archive::TERMINATOR, + }); + + Self { data, header_offset } + } + + pub(crate) fn header_mut(&mut self) -> &mut object::archive::Header { + self.data.get_pod_mut(self.header_offset) + } + + pub(crate) fn set_name(&mut self, name: MemberName) { + self.header_mut().name = name.0; + } + + pub(crate) fn set_time_date_stamp(&mut self, value: i32) -> std::io::Result<()> { + let header = self.header_mut(); + write!(&mut header.date[..], "{value:<12}") + } + + pub(crate) fn set_uid(&mut self, value: Option) -> std::io::Result<()> { + let header = self.header_mut(); + if let Some(value) = value { + write!(&mut header.uid[..], "{value:<6}") + } else { + header.uid.fill(b' '); + Ok(()) + } + } + + pub(crate) fn set_gid(&mut self, value: Option) -> std::io::Result<()> { + let header = self.header_mut(); + if let Some(value) = value { + write!(&mut header.gid[..], "{value:<6}") + } else { + header.gid.fill(b' '); + Ok(()) + } + } + + pub(crate) fn set_mode(&mut self, value: u16) -> std::io::Result<()> { + write!(&mut self.header_mut().mode[..], "{value:o<8}") + } +} + +impl Deref for Member<'_> { + type Target = DataWriter; + + fn deref(&self) -> &Self::Target { + self.data + } +} + +impl DerefMut for Member<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.data + } +} + +impl<'a> Drop for Member<'a> { + fn drop(&mut self) { + let data_start = self.header_offset + Self::HEADER_SIZE; + let data_size = self.data.len() - data_start; + write!(&mut self.header_mut().size[..], "{data_size}") + .expect("data size should always fit in 10 bytes"); + self.data.align(2, b'\n'); + } +} diff --git a/src/dll_import_lib/coff.rs b/src/dll_import_lib/coff.rs new file mode 100644 index 000000000..46decf7a6 --- /dev/null +++ b/src/dll_import_lib/coff.rs @@ -0,0 +1,464 @@ +// Unfortunately, the object crate doesn't support writing COFF objects. +// This should probably be moved upstream at some point. +// +// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#coff-file-header-object-and-image +// +// COFF file: +// 0: header: +// 0: machine: u16, // an IMAGE_MACHINE_* value +// 2: number_of_sections: u16, +// 4: time_date_stamp: u32, // 0 in MSVC tools +// 8: pointer_to_symbol_table: u32, // offset in COFF file to COFF symbol table +// 12: number_of_symbols: u32, // number of entries in symbol table +// 16: size_of_optional_header: u16, // 0 for object file +// 18: characteristics: u16, // union of IMAGE_FILE_* flags; 0 for our output +// 20: sections table: +// 0: name: [u8; 8], // null padded section name +// 8: virtual_size: u32, // 0 for object files +// 12: virtual_address: u32, // 0 for object files +// 16: size_of_raw_data: u32, // size of section on disk +// 20: pointer_to_raw_data: u32, // section data COFF file offset +// 24: pointer_to_relocations: u32, // relocation table COFF file offset +// 28: pointer_to_line_number: u32, // 0 +// 32: number_of_relocations: u16, // number of entries in relocation tables +// 34: number_of_line_numbers: u16, // 0 +// 36: characteristics: u32, // union of IMAGE_SCN_* flags +// section in sections: +// section.pointer_to_raw_data: raw_data: [u8; section.size_of_raw_data] +// section.pointer_to_relocations: +// 0: virtual_address: u32, // rva of relocation +// 4: symbol_table_index: u32, // index into COFF symbol table +// 8: type: u16, // IMAGE_REL_* value +// pointer_to_symbol_table: +// i in 0..number_of_symbols: +// 0: name: [u8; 8] | { 0u32; offset_in_string_table: u32 }; +// 8: value: u32, +// 12: section_number: u16, // IMAGE_SYM_* or 1-based section index +// 14: base_type: u8, // IMAGE_SYM_TYPE_*, always NULL +// 15: complex_type: u8, // IMAGE_SYM_DTYPE_*, always NULL for our output +// 16: storage_class: u8, // IMAGE_SYM_CLASS_* +// 17: number_of_aux_symbols: u8, // 0 for our output +// pointer_to_symbol_table + number_of_symbols * 18: string_table: +// 0: string_table_size: u32, // including this size +// 4: sequence of null-terminated strings + +use object::pe::*; +use object::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; +use std::ops::{Deref, DerefMut}; + +use super::data::DataWriter; +use super::string_table::StringTable; + +pub(crate) const NULL_IMPORT_DESCRIPTOR_SYMBOL: &str = "__NULL_IMPORT_DESCRIPTOR"; + +fn u16_aligned(value: u16) -> U16 { + U16::new(LE, value) +} + +fn u32_aligned(value: u32) -> U32 { + U32::new(LE, value) +} + +fn u16_unaligned(value: u16) -> U16Bytes { + U16Bytes::new(LE, value) +} + +fn u32_unaligned(value: u32) -> U32Bytes { + U32Bytes::new(LE, value) +} + +pub(crate) fn write_short_import( + data: &mut DataWriter, + dll_name: &str, + name: &&str, + ordinal_or_hint: Option, +) { + data.write_pod(&ImportObjectHeader { + sig1: u16_aligned(IMAGE_FILE_MACHINE_UNKNOWN), + sig2: u16_aligned(IMPORT_OBJECT_HDR_SIG2), + version: u16_aligned(0), + machine: u16_aligned(IMAGE_FILE_MACHINE_AMD64), + time_date_stamp: u32_aligned(0), + size_of_data: u32_aligned((name.len() + 1 + dll_name.len() + 1) as u32), + ordinal_or_hint: u16_aligned(ordinal_or_hint.unwrap_or_default()), + name_type: u16_aligned( + IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT + | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT, + ), + }); + data.write_c_str(name); + data.write_c_str(dll_name); +} + +pub(crate) fn write_import_descriptor( + data: &mut DataWriter, + dll_name: &str, + import_descriptor_symbol: &str, + null_thunk_data_symbol: &str, +) { + // This is a COFF object containing 2 sections: + // .idata$2: import directory entry: + // 20 bytes, all 0 on disk, an Import Directory Table entry + // filled out by the linker with relocations. + // .idata$6: DLL name: + // The null terminated file name of the dll + // The import directory entry has 3 relocations: + // 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 + // 12: [2] name rva => points to DLL name section .idata$6 + // 16: [4] import address table rva => points to UNDEF symbol .idata$5 + // The COFF symbol table contains 5 symbols: + // [0]: external __IMPORT_DESCRIPTOR_{dll_basename} => section 1 + // [1]: section .idata$2 => section 1 + // [2]: static .idata$6 => section 2 + // [3]: section .idata$4 => undef + // [4]: section .idata$5 => undef + // [5]: external __NULL_IMPORT_DESCRIPTOR => undef + // [6]: external __NULL_THUNK_DATA => undef + + // COFF File header: + let mut file = CoffFileWriter::new(data, IMAGE_FILE_MACHINE_AMD64); + + // Section table: + // [0] .idata$2: import directory entry + let import_directory_header = file.write_section_header( + ".idata$2", + IMAGE_SCN_ALIGN_4BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE, + ); + // [1] .idata$6: dll name + let dll_name_header = file.write_section_header( + ".idata$6", + IMAGE_SCN_ALIGN_2BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE, + ); + + // [0] section .idata$2 data + CoffSectionRawData::new(&mut file, import_directory_header).reserve_bytes(20); + + // [0] section .idata$2 relocations + let import_descriptor_pointer_to_relocations = file.data.len() - file.offset; + + let header = import_directory_header.get_mut(file.data); + header.number_of_relocations = u16_aligned(3); + + header.pointer_to_relocations = u32_aligned(import_descriptor_pointer_to_relocations as u32); + + // todo: CoffRelocWriter + + // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 + file.data.write_pod(&ImageRelocation { + virtual_address: u32_unaligned(0), + symbol_table_index: u32_unaligned(3), + typ: u16_unaligned(IMAGE_REL_AMD64_ADDR32NB), + }); + // relocation 1: [2] name rva => points to DLL name section .idata$6 + file.data.write_pod(&ImageRelocation { + virtual_address: u32_unaligned(12), + symbol_table_index: u32_unaligned(2), + typ: u16_unaligned(IMAGE_REL_AMD64_ADDR32NB), + }); + // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 + file.data.write_pod(&ImageRelocation { + virtual_address: u32_unaligned(16), + symbol_table_index: u32_unaligned(4), + typ: u16_unaligned(IMAGE_REL_AMD64_ADDR32NB), + }); + + // [1] section .idata$6 data + CoffSectionRawData::new(&mut file, dll_name_header).write_c_str(dll_name); + + // COFF symbol table: + let mut symbol_table = file.start_symbol_table(); + symbol_table.add( + import_descriptor_symbol, + SymbolOptions { + section_number: 1, + storage_class: IMAGE_SYM_CLASS_EXTERNAL, + ..Default::default() + }, + ); + symbol_table.add( + ".idata$2", + SymbolOptions { + section_number: 1, + storage_class: IMAGE_SYM_CLASS_SECTION, + ..Default::default() + }, + ); + symbol_table.add( + ".idata$6", + SymbolOptions { + section_number: 2, + storage_class: IMAGE_SYM_CLASS_STATIC, + ..Default::default() + }, + ); + symbol_table.add( + ".idata$4", + SymbolOptions { storage_class: IMAGE_SYM_CLASS_SECTION, ..Default::default() }, + ); + symbol_table.add( + ".idata$5", + SymbolOptions { storage_class: IMAGE_SYM_CLASS_SECTION, ..Default::default() }, + ); + symbol_table.add( + NULL_IMPORT_DESCRIPTOR_SYMBOL, + SymbolOptions { storage_class: IMAGE_SYM_CLASS_EXTERNAL, ..Default::default() }, + ); + symbol_table.add( + null_thunk_data_symbol, + SymbolOptions { storage_class: IMAGE_SYM_CLASS_EXTERNAL, ..Default::default() }, + ); +} + +pub(crate) fn write_null_thunk_data(data: &mut DataWriter, symbol: &str) { + // This is a COFF file with a two sections with 8 bytes of null data + let mut file = CoffFileWriter::new(data, IMAGE_FILE_MACHINE_AMD64); + + let import_address_section = file.write_section_header( + ".idata$5", + IMAGE_SCN_ALIGN_8BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE, + ); + let import_lookup_section = file.write_section_header( + ".idata$4", + IMAGE_SCN_ALIGN_8BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE, + ); + + CoffSectionRawData::new(&mut file, import_address_section).reserve_bytes(8); + + CoffSectionRawData::new(&mut file, import_lookup_section).reserve_bytes(8); + + file.start_symbol_table().add( + symbol, + SymbolOptions { + section_number: 1, + storage_class: IMAGE_SYM_CLASS_EXTERNAL, + ..Default::default() + }, + ); +} + +pub(crate) fn write_null_import_descriptor(data: &mut DataWriter) { + // This is a COFF file with a section with 20 bytes of null data + let mut file = CoffFileWriter::new(data, IMAGE_FILE_MACHINE_AMD64); + let header = file.write_section_header( + ".idata$3", + IMAGE_SCN_ALIGN_4BYTES + | IMAGE_SCN_CNT_INITIALIZED_DATA + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_WRITE, + ); + CoffSectionRawData::new(&mut file, header).reserve_bytes(20); + file.start_symbol_table().add( + NULL_IMPORT_DESCRIPTOR_SYMBOL, + SymbolOptions { + section_number: 1, + storage_class: IMAGE_SYM_CLASS_EXTERNAL, + ..Default::default() + }, + ); +} + +struct CoffFileWriter<'data> { + data: &'data mut DataWriter, + offset: usize, + number_of_sections: u16, + string_table: CoffStringTable, +} + +impl<'data> CoffFileWriter<'data> { + fn new(data: &'data mut DataWriter, machine: u16) -> Self { + let file_offset = data.len(); + data.write_pod(&ImageFileHeader { + machine: u16_aligned(machine), + number_of_sections: u16_aligned(0), + time_date_stamp: u32_aligned(0), + pointer_to_symbol_table: u32_aligned(0), + number_of_symbols: u32_aligned(0), + size_of_optional_header: u16_aligned(0), + characteristics: u16_aligned(0), + }); + let string_table = CoffStringTable::new(); + Self { data, offset: file_offset, number_of_sections: 0, string_table } + } + + fn file_header_mut(&mut self) -> &mut ImageFileHeader { + self.data.get_pod_mut(self.offset) + } + + fn write_section_header(&mut self, name: &str, characteristics: u32) -> CoffSectionHeader { + self.number_of_sections += 1; + let offset = self.data.write_pod(&ImageSectionHeader { + name: self.string_table.get_raw_name(name), + virtual_size: u32_aligned(0), + virtual_address: u32_aligned(0), + size_of_raw_data: u32_aligned(0), // filled out later + pointer_to_raw_data: u32_aligned(0), // ditto. + pointer_to_relocations: u32_aligned(0), // (possibly) ditto. + pointer_to_linenumbers: u32_aligned(0), + number_of_relocations: u16_aligned(0), + number_of_linenumbers: u16_aligned(0), + characteristics: u32_aligned(characteristics), + }); + CoffSectionHeader { offset } + } + + fn start_symbol_table(&mut self) -> CoffSymbolTableWriter<'_, 'data> { + let offset = self.len(); + self.file_header_mut().pointer_to_symbol_table = u32_aligned((offset - self.offset) as u32); + CoffSymbolTableWriter { file: self, offset, number_of_symbols: 0 } + } +} + +impl Deref for CoffFileWriter<'_> { + type Target = DataWriter; + + fn deref(&self) -> &Self::Target { + self.data + } +} + +impl DerefMut for CoffFileWriter<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.data + } +} + +impl Drop for CoffFileWriter<'_> { + fn drop(&mut self) { + let number_of_sections = self.number_of_sections; + let header = self.file_header_mut(); + header.number_of_sections = u16_aligned(number_of_sections); + self.string_table.write(self.data); + } +} + +struct CoffStringTable(StringTable); + +impl CoffStringTable { + fn new() -> Self { + Self(StringTable::new()) + } + + fn write(&self, writer: &mut DataWriter) { + let data = self.0.data(); + writer.write_u32_le(data.len() as u32 + 4); + writer.write(data); + } + + pub fn get_raw_name(&mut self, value: &str) -> [u8; 8] { + let mut result = [0u8; 8]; + if value.len() > 8 { + // add 4 for the string table length + let offset = 4 + self.0.find_or_insert(value); + result[4..].copy_from_slice(&u32::to_le_bytes(offset as u32)); + } else { + result.copy_from_slice(value.as_bytes()) + } + result + } +} + +#[derive(Copy, Clone)] +struct CoffSectionHeader { + offset: usize, +} + +impl CoffSectionHeader { + fn get_mut(self, data: &mut DataWriter) -> &mut ImageSectionHeader { + data.get_pod_mut(self.offset) + } +} + +struct CoffSectionRawData<'a, 'data> { + file: &'a mut CoffFileWriter<'data>, + header: CoffSectionHeader, + offset: usize, +} + +impl<'a, 'data> CoffSectionRawData<'a, 'data> { + fn new(file: &'a mut CoffFileWriter<'data>, header: CoffSectionHeader) -> Self { + let offset = file.data.len(); + Self { file, header, offset } + } +} + +impl Deref for CoffSectionRawData<'_, '_> { + type Target = DataWriter; + + fn deref(&self) -> &Self::Target { + self.file.data + } +} + +impl DerefMut for CoffSectionRawData<'_, '_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.file.data + } +} + +impl Drop for CoffSectionRawData<'_, '_> { + fn drop(&mut self) { + // Included in size_of_raw_data - not sure if this is correct? + // Seems to be what MSVC does for the dll_name section. + self.file.data.align(2, 0u8); + let end_offset = self.file.len(); + let header = self.header.get_mut(self.file.data); + let size_of_raw_data = end_offset - self.offset; + let pointer_to_raw_data = self.offset - self.file.offset; + header.size_of_raw_data = u32_aligned(size_of_raw_data as u32); + header.pointer_to_raw_data = u32_aligned(pointer_to_raw_data as u32); + } +} + +#[derive(Default)] +struct SymbolOptions { + value: u32, + section_number: i16, + // IMAGE_SYM_TYPE_* + base_type: u16, + // IMAGE_SYM_DTYPE_* + complex_type: u16, + storage_class: u8, + number_of_aux_symbols: u8, +} + +struct CoffSymbolTableWriter<'a, 'data> { + file: &'a mut CoffFileWriter<'data>, + offset: usize, + number_of_symbols: u32, +} + +impl CoffSymbolTableWriter<'_, '_> { + fn add(&mut self, name: &str, options: SymbolOptions) { + let name = self.file.string_table.get_raw_name(name); + self.file.write_pod(&ImageSymbol { + name, + value: u32_unaligned(options.value), + section_number: u16_unaligned(options.section_number as u16), + typ: u16_unaligned(options.base_type | options.complex_type << 8), + storage_class: options.storage_class, + number_of_aux_symbols: options.number_of_aux_symbols, + }); + self.number_of_symbols += 1; + } +} + +impl Drop for CoffSymbolTableWriter<'_, '_> { + fn drop(&mut self) { + let pointer_to_symbol_table = self.offset - self.file.offset; + let header = self.file.file_header_mut(); + header.pointer_to_symbol_table = u32_aligned(pointer_to_symbol_table as u32); + header.number_of_symbols = u32_aligned(self.number_of_symbols); + } +} diff --git a/src/dll_import_lib/data.rs b/src/dll_import_lib/data.rs new file mode 100644 index 000000000..19820f07c --- /dev/null +++ b/src/dll_import_lib/data.rs @@ -0,0 +1,73 @@ +pub(crate) struct DataWriter { + data: Vec, +} + +impl DataWriter { + pub(crate) fn new() -> Self { + Self { data: vec![] } + } + + pub(crate) fn len(&self) -> usize { + self.data.len() + } + + pub(crate) fn get_pod_mut(&mut self, offset: usize) -> &mut T + where + T: object::pod::Pod, + { + object::from_bytes_mut(&mut self.data[offset..]).expect("invalid POD offset").0 + } + + pub(crate) fn write(&mut self, data: &[u8]) { + self.data.extend_from_slice(data); + } + + pub(crate) fn write_pod(&mut self, value: &T) -> usize + where + T: object::pod::Pod, + { + let offset = self.data.len(); + self.data.extend_from_slice(object::bytes_of(value)); + offset + } + + pub(crate) fn write_c_str(&mut self, data: &str) { + self.data.extend_from_slice(data.as_bytes()); + self.data.push(0); + } + + pub(crate) fn write_u16_le(&mut self, data: u16) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + pub(crate) fn write_u32_be(&mut self, data: u32) { + self.data.extend_from_slice(&data.to_be_bytes()); + } + + pub(crate) fn write_u32_le(&mut self, data: u32) { + self.data.extend_from_slice(&data.to_le_bytes()); + } + + pub(crate) fn reserve_bytes(&mut self, count: usize) -> usize { + let offset = self.data.len(); + self.data.resize(offset + count, 0); + offset + } + + pub(crate) fn set_u32_be(&mut self, offset: usize, data: u32) { + self.data[offset..][..4].copy_from_slice(&data.to_be_bytes()); + } + + pub(crate) fn set_u32_le(&mut self, offset: usize, data: u32) { + self.data[offset..][..4].copy_from_slice(&data.to_le_bytes()); + } + + pub(crate) fn align(&mut self, alignment: usize, pad: u8) { + let offset = self.data.len(); + self.data.resize(offset.next_multiple_of(alignment), pad); + } + + pub(crate) fn into_data(self) -> Vec { + self.data + } +} diff --git a/src/dll_import_lib/mod.rs b/src/dll_import_lib/mod.rs new file mode 100644 index 000000000..ea58e4f8b --- /dev/null +++ b/src/dll_import_lib/mod.rs @@ -0,0 +1,194 @@ +// todo: pull out to a proper location. Really should be in `object` crate! +// todo: support ordinals +// todo: support name types (e.g. verbatim+) +// todo: support windows-gnu flavor? +// todo: provide machine +// todo: remove any panics, nice errors + +use data::DataWriter; + +mod ar; +mod data; +mod string_table; + +mod coff; + +pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { + // member count: one for each import_name in argument order, followed by the import + // descriptor. + let member_count = 3 + import_names.len(); + + assert!(member_count <= 0xFFFF, "too many import names"); + + // foo.dll => foo so we can construct the import descriptor symbol. + // At least for the Windows system dlls, don't seem to need any further + // escaping, e.g. "api-ms-win-appmodel-runtime-l1-1-1.dll" => + // "__IMPORT_DESCRIPTOR_api-ms-win-appmodel-runtime-l1-1-1" + // Not using std::path to avoid having to handle non-unicode paths. + let mut dll_basename = String::from(dll_name); + if let Some(index) = dll_basename.rfind('.') { + dll_basename.truncate(index); + } + + // Identify the target of a symbol + #[derive(Copy, Clone, Eq, PartialEq)] + enum SymbolValue { + // the __IMPORT_DESCRIPTOR_{dll_basename} used to build the final .idata section. + Descriptor, + // __NULL_IMPORT_DESCRIPTOR + NullDescriptor, + // \x7f{dll_basename}_NULL_THUNK_DATA + ThunkData, + // a short import object, specifically for import_names[.0] + Import(usize), + } + impl SymbolValue { + /// Location in member tables, not *necessarily* the order of the member in the archive. + fn member_index(self) -> usize { + match self { + Self::Descriptor => 0, + Self::NullDescriptor => 1, + Self::ThunkData => 2, + Self::Import(index) => 3 + index, + } + } + } + + // Note we are using the behavior of BTee* that it keeps its keys in sorted order: this + // is required by the MSVC symbol table so the linker can use binary search. + let mut symbols = std::collections::BTreeMap::, SymbolValue>::new(); + + let import_descriptor_symbol = format!("__IMPORT_DESCRIPTOR_{dll_basename}"); + symbols.insert(import_descriptor_symbol.as_str().into(), SymbolValue::Descriptor); + symbols.insert(coff::NULL_IMPORT_DESCRIPTOR_SYMBOL.into(), SymbolValue::NullDescriptor); + let null_thunk_data_symbol = format!("\x7f{dll_basename}_NULL_THUNK_DATA"); + symbols.insert(null_thunk_data_symbol.as_str().into(), SymbolValue::ThunkData); + + for (index, &name) in import_names.iter().enumerate() { + symbols.insert(name.into(), SymbolValue::Import(index)); + symbols.insert(format!("__imp_{name}").into(), SymbolValue::Import(index)); + } + + let symbol_count = symbols.len(); + + let mut writer = ar::Writer::new(); + + // member names are all the dll_name with the MSVC tools. + let member_name = writer.member_name(dll_name); + + // Standard System-V / GNU symbol table member + let mut gnu_symbols = writer.start_member(ar::MemberName::SYMBOL_TABLE); + // member table: one entry per symbol (duplicates allowed for aliasing) + gnu_symbols.write_u32_be(symbol_count as u32); + // reserve space for member offsets. + let gnu_member_table_offset = gnu_symbols.reserve_bytes(symbol_count * 4); + // symbol string table + for name in symbols.keys() { + gnu_symbols.write_c_str(name); + } + // done with GNU symbol directory + drop(gnu_symbols); + + // MSVC tools symbol table member + let mut ms_symbols = writer.start_member(ar::MemberName::SYMBOL_TABLE); + // member offset table + ms_symbols.write_u32_le(member_count as u32); + let ms_member_table_offset = ms_symbols.reserve_bytes(member_count * 4); + // symbol table + ms_symbols.write_u32_le(symbol_count as u32); + // member index we assume symbols are in the same order as the member table. + for &value in symbols.values() { + let member_index = value.member_index(); + // Yep, it's a 1-based index. Who knows why. + // cast to u16 should be safe due to assert!() on member_count above. + ms_symbols.write_u16_le(1 + member_index as u16); + } + // string table again (could just copy from legacy string table above?) + for name in symbols.keys() { + ms_symbols.write_c_str(name); + } + // done with MSVC symbol directory + drop(ms_symbols); + + writer.write_long_names(); + // can't use writer.member_name() from here + + { + // import descriptor member + let mut member = writer.start_member(member_name); + + let symbol_value = SymbolValue::Descriptor; + // update member offsets + let member_offset = member.header_offset as u32; + // Updating GNU symbol table is a bit messy with the aliases + for (index, value) in symbols.values().enumerate() { + if symbol_value == *value { + member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); + } + } + member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); + + coff::write_import_descriptor( + &mut member, + dll_name, + &import_descriptor_symbol, + &null_thunk_data_symbol, + ); + } + + { + // null thunk data member + let mut member = writer.start_member(member_name); + + let symbol_value = SymbolValue::ThunkData; + // update member offsets + let member_offset = member.header_offset as u32; + // Updating GNU symbol table is a bit messy with the aliases + for (index, value) in symbols.values().enumerate() { + if symbol_value == *value { + member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); + } + } + member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); + + coff::write_null_thunk_data(&mut member, &null_thunk_data_symbol); + } + + { + // null import descriptor member + let mut member = writer.start_member(member_name); + + let symbol_value = SymbolValue::NullDescriptor; + // update member offsets + let member_offset = member.header_offset as u32; + // Updating GNU symbol table is a bit messy with the aliases + for (index, value) in symbols.values().enumerate() { + if symbol_value == *value { + member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); + } + } + member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); + + coff::write_null_import_descriptor(&mut member); + } + + // short import object members + for (index, name) in import_names.iter().enumerate() { + let mut member = writer.start_member(member_name); + + let symbol_value = SymbolValue::Import(index); + // update member offsets + let member_offset = member.header_offset as u32; + // Updating GNU symbol table is a bit messy with the aliases + for (index, value) in symbols.values().enumerate() { + if symbol_value == *value { + member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); + } + } + member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); + + coff::write_short_import(&mut member, dll_name, name, None); + } + + writer.into_data() +} diff --git a/src/dll_import_lib/string_table.rs b/src/dll_import_lib/string_table.rs new file mode 100644 index 000000000..3d3cbf2d4 --- /dev/null +++ b/src/dll_import_lib/string_table.rs @@ -0,0 +1,28 @@ +pub struct StringTable(Vec); + +impl StringTable { + pub fn new() -> Self { + Self(vec![]) + } + + pub fn data(&self) -> &[u8] { + self.0.as_slice() + } + + pub fn find_or_insert(&mut self, value: &str) -> usize { + // Find the name *including the null terminator* in the existing buffer. + // Note, this could find "bar\0" in "foobar\0", but that should be fine? + // It still counts as a null terminated "bar" string. + self.0 + .windows(value.len() + 1) + .position(|window| { + &window[..value.len()] == value.as_bytes() && window[value.len()] == b'\0' + }) + .unwrap_or_else(|| { + let offset = self.0.len(); + self.0.extend_from_slice(value.as_bytes()); + self.0.push(b'\0'); + offset + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index 148193b5a..610c9ce52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,6 +61,7 @@ mod config; mod constant; mod debuginfo; mod discriminant; +mod dll_import_lib; mod driver; mod global_asm; mod inline_asm; From a38d47146c0536f7526ded3b138afc945f160891 Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Wed, 8 Nov 2023 23:55:38 +1300 Subject: [PATCH 07/11] Avoid requiring aligned access to COFF fields. And fix lints. --- src/dll_import_lib/ar.rs | 3 +- src/dll_import_lib/coff.rs | 189 +++++++++++++++++++---------- src/dll_import_lib/string_table.rs | 8 +- 3 files changed, 131 insertions(+), 69 deletions(-) diff --git a/src/dll_import_lib/ar.rs b/src/dll_import_lib/ar.rs index 3211c6a44..100c1e448 100644 --- a/src/dll_import_lib/ar.rs +++ b/src/dll_import_lib/ar.rs @@ -79,7 +79,7 @@ use super::string_table::StringTable; use super::DataWriter; #[derive(Copy, Clone)] -pub(crate) struct MemberName(pub [u8; 16]); +pub(crate) struct MemberName(pub(crate) [u8; 16]); impl MemberName { pub(crate) const SYMBOL_TABLE: Self = MemberName(*b"/ "); @@ -153,6 +153,7 @@ pub(crate) struct Member<'data> { pub(crate) header_offset: usize, } +#[allow(dead_code)] // TODO: remove if this isn't pulled out into a crate? impl<'data> Member<'data> { const HEADER_SIZE: usize = std::mem::size_of::(); diff --git a/src/dll_import_lib/coff.rs b/src/dll_import_lib/coff.rs index 46decf7a6..24f4300f4 100644 --- a/src/dll_import_lib/coff.rs +++ b/src/dll_import_lib/coff.rs @@ -43,7 +43,7 @@ // 4: sequence of null-terminated strings use object::pe::*; -use object::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; +use object::{LittleEndian as LE, U16Bytes, U32Bytes}; use std::ops::{Deref, DerefMut}; use super::data::DataWriter; @@ -51,40 +51,60 @@ use super::string_table::StringTable; pub(crate) const NULL_IMPORT_DESCRIPTOR_SYMBOL: &str = "__NULL_IMPORT_DESCRIPTOR"; -fn u16_aligned(value: u16) -> U16 { - U16::new(LE, value) +fn u16(value: u16) -> U16Bytes { + U16Bytes::new(LE, value) } -fn u32_aligned(value: u32) -> U32 { - U32::new(LE, value) +fn u32(value: u32) -> U32Bytes { + U32Bytes::new(LE, value) } -fn u16_unaligned(value: u16) -> U16Bytes { - U16Bytes::new(LE, value) +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub(crate) struct ImportObjectHeaderUnaligned { + /// Must be IMAGE_FILE_MACHINE_UNKNOWN + pub(crate) sig1: U16Bytes, + /// Must be IMPORT_OBJECT_HDR_SIG2. + pub(crate) sig2: U16Bytes, + pub(crate) version: U16Bytes, + pub(crate) machine: U16Bytes, + /// Time/date stamp + pub(crate) time_date_stamp: U32Bytes, + /// particularly useful for incremental links + pub(crate) size_of_data: U32Bytes, + + /// if grf & IMPORT_OBJECT_ORDINAL + pub(crate) ordinal_or_hint: U16Bytes, + + // WORD Type : 2; + // WORD NameType : 3; + // WORD Reserved : 11; + pub(crate) name_type: U16Bytes, } -fn u32_unaligned(value: u32) -> U32Bytes { - U32Bytes::new(LE, value) -} +/// # Safety +/// A type that is `Pod` must: +/// - be `#[repr(C)]` or `#[repr(transparent)]` +/// - have no invalid byte values +/// - have no padding +unsafe impl object::pod::Pod for ImportObjectHeaderUnaligned {} pub(crate) fn write_short_import( data: &mut DataWriter, dll_name: &str, name: &&str, - ordinal_or_hint: Option, + ordinal_or_hint: Option, ) { - data.write_pod(&ImportObjectHeader { - sig1: u16_aligned(IMAGE_FILE_MACHINE_UNKNOWN), - sig2: u16_aligned(IMPORT_OBJECT_HDR_SIG2), - version: u16_aligned(0), - machine: u16_aligned(IMAGE_FILE_MACHINE_AMD64), - time_date_stamp: u32_aligned(0), - size_of_data: u32_aligned((name.len() + 1 + dll_name.len() + 1) as u32), - ordinal_or_hint: u16_aligned(ordinal_or_hint.unwrap_or_default()), - name_type: u16_aligned( - IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT - | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT, - ), + data.write_pod(&ImportObjectHeaderUnaligned { + sig1: u16(IMAGE_FILE_MACHINE_UNKNOWN), + sig2: u16(IMPORT_OBJECT_HDR_SIG2), + version: u16(0), + machine: u16(IMAGE_FILE_MACHINE_AMD64), + time_date_stamp: u32(0), + size_of_data: u32((name.len() + 1 + dll_name.len() + 1) as u32), + ordinal_or_hint: u16(ordinal_or_hint.unwrap_or_default()), + name_type: u16(IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT + | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT), }); data.write_c_str(name); data.write_c_str(dll_name); @@ -143,29 +163,29 @@ pub(crate) fn write_import_descriptor( let import_descriptor_pointer_to_relocations = file.data.len() - file.offset; let header = import_directory_header.get_mut(file.data); - header.number_of_relocations = u16_aligned(3); + header.number_of_relocations = u16(3); - header.pointer_to_relocations = u32_aligned(import_descriptor_pointer_to_relocations as u32); + header.pointer_to_relocations = u32(import_descriptor_pointer_to_relocations as u32); // todo: CoffRelocWriter // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 file.data.write_pod(&ImageRelocation { - virtual_address: u32_unaligned(0), - symbol_table_index: u32_unaligned(3), - typ: u16_unaligned(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: u32(0), + symbol_table_index: u32(3), + typ: u16(IMAGE_REL_AMD64_ADDR32NB), }); // relocation 1: [2] name rva => points to DLL name section .idata$6 file.data.write_pod(&ImageRelocation { - virtual_address: u32_unaligned(12), - symbol_table_index: u32_unaligned(2), - typ: u16_unaligned(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: u32(12), + symbol_table_index: u32(2), + typ: u16(IMAGE_REL_AMD64_ADDR32NB), }); // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 file.data.write_pod(&ImageRelocation { - virtual_address: u32_unaligned(16), - symbol_table_index: u32_unaligned(4), - typ: u16_unaligned(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: u32(16), + symbol_table_index: u32(4), + typ: u16(IMAGE_REL_AMD64_ADDR32NB), }); // [1] section .idata$6 data @@ -269,6 +289,47 @@ pub(crate) fn write_null_import_descriptor(data: &mut DataWriter) { ); } +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub(crate) struct ImageFileHeaderUnaligned { + pub(crate) machine: U16Bytes, + pub(crate) number_of_sections: U16Bytes, + pub(crate) time_date_stamp: U32Bytes, + pub(crate) pointer_to_symbol_table: U32Bytes, + pub(crate) number_of_symbols: U32Bytes, + pub(crate) size_of_optional_header: U16Bytes, + pub(crate) characteristics: U16Bytes, +} + +/// # Safety +/// A type that is `Pod` must: +/// - be `#[repr(C)]` or `#[repr(transparent)]` +/// - have no invalid byte values +/// - have no padding +unsafe impl object::pod::Pod for ImageFileHeaderUnaligned {} + +#[derive(Debug, Default, Clone, Copy)] +#[repr(C)] +pub(crate) struct ImageSectionHeaderUnaligned { + pub(crate) name: [u8; IMAGE_SIZEOF_SHORT_NAME], + pub(crate) virtual_size: U32Bytes, + pub(crate) virtual_address: U32Bytes, + pub(crate) size_of_raw_data: U32Bytes, + pub(crate) pointer_to_raw_data: U32Bytes, + pub(crate) pointer_to_relocations: U32Bytes, + pub(crate) pointer_to_linenumbers: U32Bytes, + pub(crate) number_of_relocations: U16Bytes, + pub(crate) number_of_linenumbers: U16Bytes, + pub(crate) characteristics: U32Bytes, +} + +/// # Safety +/// A type that is `Pod` must: +/// - be `#[repr(C)]` or `#[repr(transparent)]` +/// - have no invalid byte values +/// - have no padding +unsafe impl object::pod::Pod for ImageSectionHeaderUnaligned {} + struct CoffFileWriter<'data> { data: &'data mut DataWriter, offset: usize, @@ -279,43 +340,43 @@ struct CoffFileWriter<'data> { impl<'data> CoffFileWriter<'data> { fn new(data: &'data mut DataWriter, machine: u16) -> Self { let file_offset = data.len(); - data.write_pod(&ImageFileHeader { - machine: u16_aligned(machine), - number_of_sections: u16_aligned(0), - time_date_stamp: u32_aligned(0), - pointer_to_symbol_table: u32_aligned(0), - number_of_symbols: u32_aligned(0), - size_of_optional_header: u16_aligned(0), - characteristics: u16_aligned(0), + data.write_pod(&ImageFileHeaderUnaligned { + machine: u16(machine), + number_of_sections: u16(0), + time_date_stamp: u32(0), + pointer_to_symbol_table: u32(0), + number_of_symbols: u32(0), + size_of_optional_header: u16(0), + characteristics: u16(0), }); let string_table = CoffStringTable::new(); Self { data, offset: file_offset, number_of_sections: 0, string_table } } - fn file_header_mut(&mut self) -> &mut ImageFileHeader { + fn file_header_mut(&mut self) -> &mut ImageFileHeaderUnaligned { self.data.get_pod_mut(self.offset) } fn write_section_header(&mut self, name: &str, characteristics: u32) -> CoffSectionHeader { self.number_of_sections += 1; - let offset = self.data.write_pod(&ImageSectionHeader { + let offset = self.data.write_pod(&ImageSectionHeaderUnaligned { name: self.string_table.get_raw_name(name), - virtual_size: u32_aligned(0), - virtual_address: u32_aligned(0), - size_of_raw_data: u32_aligned(0), // filled out later - pointer_to_raw_data: u32_aligned(0), // ditto. - pointer_to_relocations: u32_aligned(0), // (possibly) ditto. - pointer_to_linenumbers: u32_aligned(0), - number_of_relocations: u16_aligned(0), - number_of_linenumbers: u16_aligned(0), - characteristics: u32_aligned(characteristics), + virtual_size: u32(0), + virtual_address: u32(0), + size_of_raw_data: u32(0), // filled out later + pointer_to_raw_data: u32(0), // ditto. + pointer_to_relocations: u32(0), // (possibly) ditto. + pointer_to_linenumbers: u32(0), + number_of_relocations: u16(0), + number_of_linenumbers: u16(0), + characteristics: u32(characteristics), }); CoffSectionHeader { offset } } fn start_symbol_table(&mut self) -> CoffSymbolTableWriter<'_, 'data> { let offset = self.len(); - self.file_header_mut().pointer_to_symbol_table = u32_aligned((offset - self.offset) as u32); + self.file_header_mut().pointer_to_symbol_table = u32((offset - self.offset) as u32); CoffSymbolTableWriter { file: self, offset, number_of_symbols: 0 } } } @@ -338,7 +399,7 @@ impl Drop for CoffFileWriter<'_> { fn drop(&mut self) { let number_of_sections = self.number_of_sections; let header = self.file_header_mut(); - header.number_of_sections = u16_aligned(number_of_sections); + header.number_of_sections = u16(number_of_sections); self.string_table.write(self.data); } } @@ -356,7 +417,7 @@ impl CoffStringTable { writer.write(data); } - pub fn get_raw_name(&mut self, value: &str) -> [u8; 8] { + pub(crate) fn get_raw_name(&mut self, value: &str) -> [u8; 8] { let mut result = [0u8; 8]; if value.len() > 8 { // add 4 for the string table length @@ -375,7 +436,7 @@ struct CoffSectionHeader { } impl CoffSectionHeader { - fn get_mut(self, data: &mut DataWriter) -> &mut ImageSectionHeader { + fn get_mut(self, data: &mut DataWriter) -> &mut ImageSectionHeaderUnaligned { data.get_pod_mut(self.offset) } } @@ -416,8 +477,8 @@ impl Drop for CoffSectionRawData<'_, '_> { let header = self.header.get_mut(self.file.data); let size_of_raw_data = end_offset - self.offset; let pointer_to_raw_data = self.offset - self.file.offset; - header.size_of_raw_data = u32_aligned(size_of_raw_data as u32); - header.pointer_to_raw_data = u32_aligned(pointer_to_raw_data as u32); + header.size_of_raw_data = u32(size_of_raw_data as u32); + header.pointer_to_raw_data = u32(pointer_to_raw_data as u32); } } @@ -444,9 +505,9 @@ impl CoffSymbolTableWriter<'_, '_> { let name = self.file.string_table.get_raw_name(name); self.file.write_pod(&ImageSymbol { name, - value: u32_unaligned(options.value), - section_number: u16_unaligned(options.section_number as u16), - typ: u16_unaligned(options.base_type | options.complex_type << 8), + value: u32(options.value), + section_number: u16(options.section_number as u16), + typ: u16(options.base_type | options.complex_type << 8), storage_class: options.storage_class, number_of_aux_symbols: options.number_of_aux_symbols, }); @@ -458,7 +519,7 @@ impl Drop for CoffSymbolTableWriter<'_, '_> { fn drop(&mut self) { let pointer_to_symbol_table = self.offset - self.file.offset; let header = self.file.file_header_mut(); - header.pointer_to_symbol_table = u32_aligned(pointer_to_symbol_table as u32); - header.number_of_symbols = u32_aligned(self.number_of_symbols); + header.pointer_to_symbol_table = u32(pointer_to_symbol_table as u32); + header.number_of_symbols = u32(self.number_of_symbols); } } diff --git a/src/dll_import_lib/string_table.rs b/src/dll_import_lib/string_table.rs index 3d3cbf2d4..f1150d1bf 100644 --- a/src/dll_import_lib/string_table.rs +++ b/src/dll_import_lib/string_table.rs @@ -1,15 +1,15 @@ -pub struct StringTable(Vec); +pub(crate) struct StringTable(Vec); impl StringTable { - pub fn new() -> Self { + pub(crate) fn new() -> Self { Self(vec![]) } - pub fn data(&self) -> &[u8] { + pub(crate) fn data(&self) -> &[u8] { self.0.as_slice() } - pub fn find_or_insert(&mut self, value: &str) -> usize { + pub(crate) fn find_or_insert(&mut self, value: &str) -> usize { // Find the name *including the null terminator* in the existing buffer. // Note, this could find "bar\0" in "foobar\0", but that should be fine? // It still counts as a null terminated "bar" string. From 50cbdc825ca5d32aa65c48714f764856c61a74f8 Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Thu, 9 Nov 2023 23:38:15 +1300 Subject: [PATCH 08/11] Replace the use of my `ar` code with `ar_archive_writer` --- Cargo.lock | 10 ++ Cargo.toml | 1 + src/dll_import_lib/ar.rs | 235 ------------------------------------- src/dll_import_lib/data.rs | 16 --- src/dll_import_lib/mod.rs | 226 +++++++++++++---------------------- 5 files changed, 95 insertions(+), 393 deletions(-) delete mode 100644 src/dll_import_lib/ar.rs diff --git a/Cargo.lock b/Cargo.lock index dcb6cc575..fdfd4740e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,15 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +[[package]] +name = "ar_archive_writer" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9792d37ca5173d7e7f4fe453739a0671d0557915a030a383d6b866476bbc3e71" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.3.0" @@ -328,6 +337,7 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" name = "rustc_codegen_cranelift" version = "0.1.0" dependencies = [ + "ar_archive_writer", "cranelift-codegen", "cranelift-frontend", "cranelift-jit", diff --git a/Cargo.toml b/Cargo.toml index 30db10f74..ebdaa4f32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ target-lexicon = "0.12.0" gimli = { version = "0.28", default-features = false, features = ["write"]} object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } +ar_archive_writer = "0.1.5" indexmap = "2.0.0" libloading = { version = "0.7.3", optional = true } smallvec = "1.8.1" diff --git a/src/dll_import_lib/ar.rs b/src/dll_import_lib/ar.rs deleted file mode 100644 index 100c1e448..000000000 --- a/src/dll_import_lib/ar.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format -//! -//! Windows .lib files are System-V (aka. GNU) flavored ar files with an additional MSVC-specific -//! symbol lookup member after the standard one. -//! -//! An ar archive is the 8 bytes `b"!\n"` followed by a sequence of 60 byte member headers: -//! -//! ```plaintext -//! 0: name: [u8; 16], // member name, terminated with "/". If it is longer than 15, then -//! // use "/n" where "n" is a decimal for the offset in bytes into -//! // the longnames ("//") member contents. -//! 16: date: [u8; 12], // ASCII decimal seconds since UNIX epoch - always -1 for MSVC -//! 28: uid: [u8; 6], // ASCII decimal user id. Always blank for MSVC -//! 34: gid: [u8; 6], // ditto for group id. -//! 40: mode: [u8; 8], // ASCII octal UNIX mode. 0 for MSVC -//! 48: size: [u8; 10], // ASCII decimal data size. -//! 58: end: b"`\n", -//! ``` -//! -//! then `size` bytes of member payload data. If payload is odd sized, it must be padded to an even -//! offset with `\n`. -//! -//! Standard archives have an initial member with the raw name `/` containing a table with the -//! offsets of the members containing exported symbols, with big-endian encoding: -//! -//! ```plaintext -//! count: u32_be, // number of indexed symbols -//! offsets: [u32_be, count], // file offsets to the header of the member that contains -//! // that symbol. -//! names: * // sequence of null terminated symbol names. -//! ``` -//! -//! MSVC lib archives then have an additional table member that also has the name `/`, and stores -//! the same information. This uses little-endian encoding, separates the member offset table from -//! the symbol table, and requires symbols to be sorted to allow binary search lookups. -//! -//! ```plaintext -//! member_count: u32, // number of members -//! member_offsets: [u32; member_count], // file offsets to each member header -//! symbol_count: u32, // number of symbols -//! symbol_member: [u16; symbol_count], // *1-based* index of the member that contains -//! // each symbol -//! symbol_names: * // sequence of null terminated symbol names in the same -//! // order as symbol_member. -//! ``` -//! -//! Then the standard long names member (`//`), which stores just a sequence of null terminated -//! strings indexed by members using the long name format `/n` as described above. This is not -//! required for MSVC if there are no long names. -//! -//! Then content members follow. -//! -//! The member name doesn't seem to matter, including duplicates, for import libraries MSVC uses -//! the dll name for every member. -//! -//! The short import object has the form: -//! -//! ```plaintext -//! 0: header: -//! 0: sig1: 0u16 -//! 2: sig2: 0xFFFFu16 -//! 4: version: u16, // normally 0 -//! 6: machine: u16, // IMAGE_MACHINE_* value, e.g. 0x8664 for AMD64 -//! 8: time_date_stamp: u32, // normally 0 -//! 12: size_of_data: u32, // size following the header -//! 16: ordinal_or_hint: u16, // depending on flag -//! 18: object_type: u2, // IMPORT_OBJECT_{CODE,DATA,CONST} = 0, 1, 2 -//! name_type: u3, // IMPORT_OBJECT_{ORDINAL,NAME,NAME_NO_PREFIX,NAME_UNDECORATE,NAME_EXPORTAS} = 0, 1, 2, 3, 4 -//! reserved: u11, -//! 20: data: // size_of_data bytes -//! name: * // import name; null terminated string -//! dll_name: * // dll name; null terminated string -//! ``` - -use std::io::Write; -use std::ops::{Deref, DerefMut}; - -use super::string_table::StringTable; -use super::DataWriter; - -#[derive(Copy, Clone)] -pub(crate) struct MemberName(pub(crate) [u8; 16]); - -impl MemberName { - pub(crate) const SYMBOL_TABLE: Self = MemberName(*b"/ "); - pub(crate) const LONG_NAMES: Self = MemberName(*b"// "); -} - -pub(crate) struct Writer { - data: DataWriter, - long_names: Option, -} - -impl Writer { - #[allow(clippy::new_without_default)] // Default should probably not write a signature? - pub(crate) fn new() -> Self { - let long_names = Some(StringTable::new()); - let mut data = DataWriter::new(); - data.write(b"!\n"); - Self { data, long_names } - } - - pub(crate) fn member_name(&mut self, name: &str) -> MemberName { - let Some(ref mut long_buf) = self.long_names else { - panic!("already wrote long names member"); - }; - - if name.len() < 16 { - let mut buf = [b' '; 16]; - buf[..name.len()].copy_from_slice(name.as_bytes()); - buf[name.len()] = b'/'; - MemberName(buf) - } else { - let offset = long_buf.find_or_insert(name); - let mut buf = [b' '; 16]; - write!(&mut buf[..], "/{offset}").expect("writing long name should not fail"); - MemberName(buf) - } - } - - pub(crate) fn start_member(&mut self, name: MemberName) -> Member<'_> { - Member::new(&mut self.data, name) - } - - pub(crate) fn write_long_names(&mut self) { - let string_table = self.long_names.take().expect("already wrote long names member"); - let mut member = self.start_member(MemberName::LONG_NAMES); - member.write(string_table.data()); - drop(member); - } - - pub(crate) fn into_data(self) -> Vec { - self.data.into_data() - } -} - -impl Deref for Writer { - type Target = DataWriter; - - fn deref(&self) -> &Self::Target { - &self.data - } -} - -impl DerefMut for Writer { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.data - } -} - -pub(crate) struct Member<'data> { - pub(crate) data: &'data mut DataWriter, - pub(crate) header_offset: usize, -} - -#[allow(dead_code)] // TODO: remove if this isn't pulled out into a crate? -impl<'data> Member<'data> { - const HEADER_SIZE: usize = std::mem::size_of::(); - - fn new(data: &'data mut DataWriter, name: MemberName) -> Self { - // fill the header MSVC defaults. - let header_offset = data.write_pod(&object::archive::Header { - name: name.0, - date: *b"-1 ", - uid: [b' '; 6], - gid: [b' '; 6], - mode: *b"0 ", - size: [b' '; 10], // filled out in Drop - terminator: object::archive::TERMINATOR, - }); - - Self { data, header_offset } - } - - pub(crate) fn header_mut(&mut self) -> &mut object::archive::Header { - self.data.get_pod_mut(self.header_offset) - } - - pub(crate) fn set_name(&mut self, name: MemberName) { - self.header_mut().name = name.0; - } - - pub(crate) fn set_time_date_stamp(&mut self, value: i32) -> std::io::Result<()> { - let header = self.header_mut(); - write!(&mut header.date[..], "{value:<12}") - } - - pub(crate) fn set_uid(&mut self, value: Option) -> std::io::Result<()> { - let header = self.header_mut(); - if let Some(value) = value { - write!(&mut header.uid[..], "{value:<6}") - } else { - header.uid.fill(b' '); - Ok(()) - } - } - - pub(crate) fn set_gid(&mut self, value: Option) -> std::io::Result<()> { - let header = self.header_mut(); - if let Some(value) = value { - write!(&mut header.gid[..], "{value:<6}") - } else { - header.gid.fill(b' '); - Ok(()) - } - } - - pub(crate) fn set_mode(&mut self, value: u16) -> std::io::Result<()> { - write!(&mut self.header_mut().mode[..], "{value:o<8}") - } -} - -impl Deref for Member<'_> { - type Target = DataWriter; - - fn deref(&self) -> &Self::Target { - self.data - } -} - -impl DerefMut for Member<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - self.data - } -} - -impl<'a> Drop for Member<'a> { - fn drop(&mut self) { - let data_start = self.header_offset + Self::HEADER_SIZE; - let data_size = self.data.len() - data_start; - write!(&mut self.header_mut().size[..], "{data_size}") - .expect("data size should always fit in 10 bytes"); - self.data.align(2, b'\n'); - } -} diff --git a/src/dll_import_lib/data.rs b/src/dll_import_lib/data.rs index 19820f07c..e4beb9d1b 100644 --- a/src/dll_import_lib/data.rs +++ b/src/dll_import_lib/data.rs @@ -36,14 +36,6 @@ impl DataWriter { self.data.push(0); } - pub(crate) fn write_u16_le(&mut self, data: u16) { - self.data.extend_from_slice(&data.to_le_bytes()); - } - - pub(crate) fn write_u32_be(&mut self, data: u32) { - self.data.extend_from_slice(&data.to_be_bytes()); - } - pub(crate) fn write_u32_le(&mut self, data: u32) { self.data.extend_from_slice(&data.to_le_bytes()); } @@ -54,14 +46,6 @@ impl DataWriter { offset } - pub(crate) fn set_u32_be(&mut self, offset: usize, data: u32) { - self.data[offset..][..4].copy_from_slice(&data.to_be_bytes()); - } - - pub(crate) fn set_u32_le(&mut self, offset: usize, data: u32) { - self.data[offset..][..4].copy_from_slice(&data.to_le_bytes()); - } - pub(crate) fn align(&mut self, alignment: usize, pad: u8) { let offset = self.data.len(); self.data.resize(offset.next_multiple_of(alignment), pad); diff --git a/src/dll_import_lib/mod.rs b/src/dll_import_lib/mod.rs index ea58e4f8b..b72af3129 100644 --- a/src/dll_import_lib/mod.rs +++ b/src/dll_import_lib/mod.rs @@ -5,20 +5,19 @@ // todo: provide machine // todo: remove any panics, nice errors +use std::ffi::CStr; + +use object::{Object, ObjectSymbol}; + use data::DataWriter; -mod ar; mod data; mod string_table; mod coff; pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { - // member count: one for each import_name in argument order, followed by the import - // descriptor. - let member_count = 3 + import_names.len(); - - assert!(member_count <= 0xFFFF, "too many import names"); + let mut members = Vec::new(); // foo.dll => foo so we can construct the import descriptor symbol. // At least for the Windows system dlls, don't seem to need any further @@ -30,165 +29,108 @@ pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { dll_basename.truncate(index); } - // Identify the target of a symbol - #[derive(Copy, Clone, Eq, PartialEq)] - enum SymbolValue { - // the __IMPORT_DESCRIPTOR_{dll_basename} used to build the final .idata section. - Descriptor, - // __NULL_IMPORT_DESCRIPTOR - NullDescriptor, - // \x7f{dll_basename}_NULL_THUNK_DATA - ThunkData, - // a short import object, specifically for import_names[.0] - Import(usize), - } - impl SymbolValue { - /// Location in member tables, not *necessarily* the order of the member in the archive. - fn member_index(self) -> usize { - match self { - Self::Descriptor => 0, - Self::NullDescriptor => 1, - Self::ThunkData => 2, - Self::Import(index) => 3 + index, - } - } - } - - // Note we are using the behavior of BTee* that it keeps its keys in sorted order: this - // is required by the MSVC symbol table so the linker can use binary search. - let mut symbols = std::collections::BTreeMap::, SymbolValue>::new(); - let import_descriptor_symbol = format!("__IMPORT_DESCRIPTOR_{dll_basename}"); - symbols.insert(import_descriptor_symbol.as_str().into(), SymbolValue::Descriptor); - symbols.insert(coff::NULL_IMPORT_DESCRIPTOR_SYMBOL.into(), SymbolValue::NullDescriptor); let null_thunk_data_symbol = format!("\x7f{dll_basename}_NULL_THUNK_DATA"); - symbols.insert(null_thunk_data_symbol.as_str().into(), SymbolValue::ThunkData); - for (index, &name) in import_names.iter().enumerate() { - symbols.insert(name.into(), SymbolValue::Import(index)); - symbols.insert(format!("__imp_{name}").into(), SymbolValue::Import(index)); - } - - let symbol_count = symbols.len(); - - let mut writer = ar::Writer::new(); - - // member names are all the dll_name with the MSVC tools. - let member_name = writer.member_name(dll_name); - - // Standard System-V / GNU symbol table member - let mut gnu_symbols = writer.start_member(ar::MemberName::SYMBOL_TABLE); - // member table: one entry per symbol (duplicates allowed for aliasing) - gnu_symbols.write_u32_be(symbol_count as u32); - // reserve space for member offsets. - let gnu_member_table_offset = gnu_symbols.reserve_bytes(symbol_count * 4); - // symbol string table - for name in symbols.keys() { - gnu_symbols.write_c_str(name); - } - // done with GNU symbol directory - drop(gnu_symbols); - - // MSVC tools symbol table member - let mut ms_symbols = writer.start_member(ar::MemberName::SYMBOL_TABLE); - // member offset table - ms_symbols.write_u32_le(member_count as u32); - let ms_member_table_offset = ms_symbols.reserve_bytes(member_count * 4); - // symbol table - ms_symbols.write_u32_le(symbol_count as u32); - // member index we assume symbols are in the same order as the member table. - for &value in symbols.values() { - let member_index = value.member_index(); - // Yep, it's a 1-based index. Who knows why. - // cast to u16 should be safe due to assert!() on member_count above. - ms_symbols.write_u16_le(1 + member_index as u16); - } - // string table again (could just copy from legacy string table above?) - for name in symbols.keys() { - ms_symbols.write_c_str(name); + fn coff_get_symbols( + buf: &[u8], + f: &mut dyn FnMut(&[u8]) -> std::io::Result<()>, + ) -> std::io::Result { + type NtCoffFile<'data> = + object::read::coff::CoffFile<'data, &'data [u8], object::pe::ImageFileHeader>; + let file = NtCoffFile::parse(buf).unwrap(); + for symbol in file.symbols() { + if symbol.is_definition() { + f(symbol.name_bytes().unwrap())?; + } + } + Ok(true) } - // done with MSVC symbol directory - drop(ms_symbols); - - writer.write_long_names(); - // can't use writer.member_name() from here { // import descriptor member - let mut member = writer.start_member(member_name); - - let symbol_value = SymbolValue::Descriptor; - // update member offsets - let member_offset = member.header_offset as u32; - // Updating GNU symbol table is a bit messy with the aliases - for (index, value) in symbols.values().enumerate() { - if symbol_value == *value { - member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); - } - } - member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); - + let mut buf = DataWriter::new(); coff::write_import_descriptor( - &mut member, + &mut buf, dll_name, &import_descriptor_symbol, &null_thunk_data_symbol, ); + members.push(ar_archive_writer::NewArchiveMember { + member_name: dll_name.to_string(), + buf: Box::new(buf.into_data()), + get_symbols: coff_get_symbols, + mtime: 0, + uid: 0, + gid: 0, + perms: 0, + }); } { // null thunk data member - let mut member = writer.start_member(member_name); - - let symbol_value = SymbolValue::ThunkData; - // update member offsets - let member_offset = member.header_offset as u32; - // Updating GNU symbol table is a bit messy with the aliases - for (index, value) in symbols.values().enumerate() { - if symbol_value == *value { - member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); - } - } - member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); - - coff::write_null_thunk_data(&mut member, &null_thunk_data_symbol); + let mut buf = DataWriter::new(); + coff::write_null_thunk_data(&mut buf, &null_thunk_data_symbol); + members.push(ar_archive_writer::NewArchiveMember { + member_name: dll_name.to_string(), + buf: Box::new(buf.into_data()), + get_symbols: coff_get_symbols, + mtime: 0, + uid: 0, + gid: 0, + perms: 0, + }); } { // null import descriptor member - let mut member = writer.start_member(member_name); - - let symbol_value = SymbolValue::NullDescriptor; - // update member offsets - let member_offset = member.header_offset as u32; - // Updating GNU symbol table is a bit messy with the aliases - for (index, value) in symbols.values().enumerate() { - if symbol_value == *value { - member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); - } - } - member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); - - coff::write_null_import_descriptor(&mut member); + let mut buf = DataWriter::new(); + coff::write_null_import_descriptor(&mut buf); + members.push(ar_archive_writer::NewArchiveMember { + member_name: dll_name.to_string(), + buf: Box::new(buf.into_data()), + get_symbols: coff_get_symbols, + mtime: 0, + uid: 0, + gid: 0, + perms: 0, + }); } // short import object members - for (index, name) in import_names.iter().enumerate() { - let mut member = writer.start_member(member_name); - - let symbol_value = SymbolValue::Import(index); - // update member offsets - let member_offset = member.header_offset as u32; - // Updating GNU symbol table is a bit messy with the aliases - for (index, value) in symbols.values().enumerate() { - if symbol_value == *value { - member.set_u32_be(gnu_member_table_offset + index * 4, member_offset); - } - } - member.set_u32_le(ms_member_table_offset + symbol_value.member_index() * 4, member_offset); - - coff::write_short_import(&mut member, dll_name, name, None); + for name in import_names.iter() { + let mut buf = DataWriter::new(); + coff::write_short_import(&mut buf, dll_name, name, None); + members.push(ar_archive_writer::NewArchiveMember { + member_name: dll_name.to_string(), + buf: Box::new(buf.into_data()), + get_symbols: |buf, f| { + const NAME_OFFSET: usize = std::mem::size_of::(); + let name = CStr::from_bytes_until_nul(&buf[NAME_OFFSET..]).unwrap(); + f(name.to_bytes())?; + f(format!("__imp_{}", name.to_str().unwrap()).as_bytes())?; + Ok(true) + }, + mtime: 0, + uid: 0, + gid: 0, + perms: 0, + }); } - writer.into_data() + let mut result = Vec::new(); + let write_symtab = true; + let deterministic = true; + let thin = false; + ar_archive_writer::write_archive_to_stream( + &mut std::io::Cursor::new(&mut result), + &members, + write_symtab, + ar_archive_writer::ArchiveKind::Gnu, + deterministic, + thin, + ) + .expect("write ar failed"); + + result } From 58325ad0a94f9f0450a34884dafdfa32f75efb68 Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Sat, 11 Nov 2023 14:49:23 +1300 Subject: [PATCH 09/11] Implement ordinal / name-types --- src/archive.rs | 34 ++++++++++++++--- src/dll_import_lib/coff.rs | 78 ++++++++++++++++++++++++++++++++------ src/dll_import_lib/mod.rs | 9 +++-- 3 files changed, 100 insertions(+), 21 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 630d21765..b96070f2a 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -21,14 +21,36 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { tmpdir: &Path, _is_direct_dependency: bool, ) -> PathBuf { - let mut import_names = Vec::new(); - for dll_import in dll_imports { - import_names.push(dll_import.name.as_str()); - } let lib_path = tmpdir.join(format!("{lib_name}_import.lib")); + + // todo: use the same DllImport type? + let import_lib_imports = dll_imports + .into_iter() + .map(|import| crate::dll_import_lib::Import { + symbol_name: import.name.to_string(), + ordinal_or_hint: import.ordinal(), + name_type: match import.import_name_type { + Some(rustc_session::cstore::PeImportNameType::Ordinal(_)) => { + crate::dll_import_lib::ImportNameType::Ordinal + } + None | Some(rustc_session::cstore::PeImportNameType::Decorated) => { + crate::dll_import_lib::ImportNameType::Name + } + Some(rustc_session::cstore::PeImportNameType::NoPrefix) => { + crate::dll_import_lib::ImportNameType::NameNoPrefix + } + Some(rustc_session::cstore::PeImportNameType::Undecorated) => { + crate::dll_import_lib::ImportNameType::NameUndecorate + } + }, + import_type: crate::dll_import_lib::ImportType::Code, + }) + .collect::>(); + + let import_lib = crate::dll_import_lib::generate(lib_name, &import_lib_imports); + // todo: emit session error instead of expects - fs::write(&lib_path, crate::dll_import_lib::generate(lib_name, &import_names)) - .expect("failed to write import library"); + fs::write(&lib_path, import_lib).expect("failed to write import library"); lib_path } diff --git a/src/dll_import_lib/coff.rs b/src/dll_import_lib/coff.rs index 24f4300f4..1d7548bfc 100644 --- a/src/dll_import_lib/coff.rs +++ b/src/dll_import_lib/coff.rs @@ -89,25 +89,81 @@ pub(crate) struct ImportObjectHeaderUnaligned { /// - have no padding unsafe impl object::pod::Pod for ImportObjectHeaderUnaligned {} -pub(crate) fn write_short_import( - data: &mut DataWriter, - dll_name: &str, - name: &&str, - ordinal_or_hint: Option, -) { +/// The IMPORT_OBJECT_* constants used to find the exported value in the DLL. +pub(crate) enum ImportNameType { + /// No import name, import by ordinal only. + Ordinal, + /// Import name == public symbol name. + Name, + /// Import name == public symbol name skipping leading ?, @, or optionally _. + NameNoPrefix, + /// Import name == public symbol name skipping leading ?, @, or optionally _ and truncating at first @. + NameUndecorate, + /// Import name == a name is explicitly provided after the DLL name. + NameExportAs { export_name: String }, +} + +impl ImportNameType { + fn as_u16(&self) -> u16 { + match self { + ImportNameType::Ordinal => IMPORT_OBJECT_ORDINAL, + ImportNameType::Name => IMPORT_OBJECT_NAME, + ImportNameType::NameNoPrefix => IMPORT_OBJECT_NAME_NO_PREFIX, + ImportNameType::NameUndecorate => IMPORT_OBJECT_NAME_UNDECORATE, + ImportNameType::NameExportAs { .. } => IMPORT_OBJECT_NAME_EXPORTAS, + } + } +} + +/// The IMPORT_OBJECT_* constants that defines how the import is linked, or in the words of the PE +/// documentation: +/// > These values are used to determine which section contributions must be generated by the tool +/// > that uses the library if it must access that data. +pub(crate) enum ImportType { + Code, + Data, + Const, +} + +impl ImportType { + fn as_u16(&self) -> u16 { + match self { + ImportType::Code => IMPORT_OBJECT_CODE, + ImportType::Data => IMPORT_OBJECT_DATA, + ImportType::Const => IMPORT_OBJECT_CONST, + } + } +} + +pub(crate) struct Import { + pub(crate) symbol_name: String, + pub(crate) name_type: ImportNameType, + pub(crate) import_type: ImportType, + pub(crate) ordinal_or_hint: Option, +} + +pub(crate) fn write_short_import(data: &mut DataWriter, dll_name: &str, import: &Import) { + let mut size_of_data = import.symbol_name.len() + 1 + dll_name.len() + 1; + if let ImportNameType::NameExportAs { export_name } = &import.name_type { + size_of_data += export_name.len() + 1; + } + data.write_pod(&ImportObjectHeaderUnaligned { sig1: u16(IMAGE_FILE_MACHINE_UNKNOWN), sig2: u16(IMPORT_OBJECT_HDR_SIG2), version: u16(0), machine: u16(IMAGE_FILE_MACHINE_AMD64), time_date_stamp: u32(0), - size_of_data: u32((name.len() + 1 + dll_name.len() + 1) as u32), - ordinal_or_hint: u16(ordinal_or_hint.unwrap_or_default()), - name_type: u16(IMPORT_OBJECT_CODE << IMPORT_OBJECT_TYPE_SHIFT - | IMPORT_OBJECT_NAME << IMPORT_OBJECT_NAME_SHIFT), + size_of_data: u32(size_of_data as u32), + ordinal_or_hint: u16(import.ordinal_or_hint.unwrap_or_default()), + name_type: u16(import.import_type.as_u16() << IMPORT_OBJECT_TYPE_SHIFT + | import.name_type.as_u16() << IMPORT_OBJECT_NAME_SHIFT), }); - data.write_c_str(name); + data.write_c_str(&import.symbol_name); data.write_c_str(dll_name); + if let ImportNameType::NameExportAs { export_name } = &import.name_type { + data.write_c_str(&export_name); + } } pub(crate) fn write_import_descriptor( diff --git a/src/dll_import_lib/mod.rs b/src/dll_import_lib/mod.rs index b72af3129..0bf97f2d6 100644 --- a/src/dll_import_lib/mod.rs +++ b/src/dll_import_lib/mod.rs @@ -11,12 +11,13 @@ use object::{Object, ObjectSymbol}; use data::DataWriter; +mod coff; mod data; mod string_table; -mod coff; +pub(crate) use coff::{Import, ImportNameType, ImportType}; -pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { +pub(crate) fn generate(dll_name: &str, imports: &[Import]) -> Vec { let mut members = Vec::new(); // foo.dll => foo so we can construct the import descriptor symbol. @@ -98,9 +99,9 @@ pub(crate) fn generate(dll_name: &str, import_names: &[&str]) -> Vec { } // short import object members - for name in import_names.iter() { + for import in imports { let mut buf = DataWriter::new(); - coff::write_short_import(&mut buf, dll_name, name, None); + coff::write_short_import(&mut buf, dll_name, &import); members.push(ar_archive_writer::NewArchiveMember { member_name: dll_name.to_string(), buf: Box::new(buf.into_data()), From 326067e967f631cd58c989caf5587f55d1e0e6b7 Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Tue, 21 Nov 2023 01:18:35 +1300 Subject: [PATCH 10/11] Some restructuring to lift the important bits up to archive.rs. - Adds explicit checks for suported target - Pass in architecture/machine - Use a builder rather than a single function API for the library. - Use session errors rather than panicking at the top level. --- src/archive.rs | 51 ++++++++-- src/dll_import_lib/coff.rs | 203 ++++++++++++++++++++++++------------- src/dll_import_lib/mod.rs | 190 +++++++++++++++------------------- 3 files changed, 255 insertions(+), 189 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index b96070f2a..be0615d28 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -6,6 +6,8 @@ use rustc_codegen_ssa::back::archive::{ }; use rustc_session::Session; +struct UnsupportedTargetForRawDyLib; + pub(crate) struct ArArchiveBuilderBuilder; impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { @@ -15,18 +17,26 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { fn create_dll_import_lib( &self, - _sess: &Session, + sess: &Session, lib_name: &str, dll_imports: &[rustc_session::cstore::DllImport], tmpdir: &Path, _is_direct_dependency: bool, ) -> PathBuf { - let lib_path = tmpdir.join(format!("{lib_name}_import.lib")); + if sess.target.arch != "x86_64" || !sess.target.is_like_msvc { + sess.span_fatal( + dll_imports.iter().map(|import| import.span).collect::>(), + "cranelift codegen currently only supports raw_dylib on x86_64 msvc targets.", + ) + } + + let mut import_lib = crate::dll_import_lib::ImportLibraryBuilder::new( + lib_name, + crate::dll_import_lib::Machine::X86_64, + ); - // todo: use the same DllImport type? - let import_lib_imports = dll_imports - .into_iter() - .map(|import| crate::dll_import_lib::Import { + for import in dll_imports { + import_lib.add_import(crate::dll_import_lib::Import { symbol_name: import.name.to_string(), ordinal_or_hint: import.ordinal(), name_type: match import.import_name_type { @@ -44,13 +54,32 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { } }, import_type: crate::dll_import_lib::ImportType::Code, - }) - .collect::>(); + }); + } + + let lib_path = tmpdir.join(format!( + "{prefix}{lib_name}_import{suffix}", + prefix = sess.target.staticlib_prefix, + suffix = sess.target.staticlib_suffix, + )); - let import_lib = crate::dll_import_lib::generate(lib_name, &import_lib_imports); + let mut file = match fs::OpenOptions::new().write(true).create_new(true).open(&lib_path) { + Ok(file) => file, + Err(error) => { + sess.fatal(format!( + "failed to create import library file `{path}`: {error}", + path = lib_path.display(), + )); + } + }; - // todo: emit session error instead of expects - fs::write(&lib_path, import_lib).expect("failed to write import library"); + // import_lib.write() internally uses BufWriter, so we don't need anything here. + if let Err(error) = import_lib.write(&mut file) { + sess.fatal(format!( + "failed to write import library `{path}`: {error}", + path = lib_path.display(), + )); + } lib_path } diff --git a/src/dll_import_lib/coff.rs b/src/dll_import_lib/coff.rs index 1d7548bfc..b0547c7c2 100644 --- a/src/dll_import_lib/coff.rs +++ b/src/dll_import_lib/coff.rs @@ -51,11 +51,25 @@ use super::string_table::StringTable; pub(crate) const NULL_IMPORT_DESCRIPTOR_SYMBOL: &str = "__NULL_IMPORT_DESCRIPTOR"; -fn u16(value: u16) -> U16Bytes { +/// Supported COFF machine types. +#[derive(Debug, Clone, Copy)] +pub(crate) enum Machine { + X86_64, +} + +impl Machine { + fn as_raw(self) -> u16 { + match self { + Self::X86_64 => IMAGE_FILE_MACHINE_AMD64, + } + } +} + +fn make_u16(value: u16) -> U16Bytes { U16Bytes::new(LE, value) } -fn u32(value: u32) -> U32Bytes { +fn make_u32(value: u32) -> U32Bytes { U32Bytes::new(LE, value) } @@ -135,43 +149,76 @@ impl ImportType { } } +// A more directly COFF short import descriptor equivalent of rustc_session::cstore::DllImport. pub(crate) struct Import { pub(crate) symbol_name: String, pub(crate) name_type: ImportNameType, pub(crate) import_type: ImportType, - pub(crate) ordinal_or_hint: Option, + pub(crate) ordinal_or_hint: Option, } -pub(crate) fn write_short_import(data: &mut DataWriter, dll_name: &str, import: &Import) { +pub(crate) fn write_short_import(dll_name: &str, machine: Machine, import: &Import) -> Vec { + let mut vec = Vec::new(); + let mut size_of_data = import.symbol_name.len() + 1 + dll_name.len() + 1; if let ImportNameType::NameExportAs { export_name } = &import.name_type { size_of_data += export_name.len() + 1; } - data.write_pod(&ImportObjectHeaderUnaligned { - sig1: u16(IMAGE_FILE_MACHINE_UNKNOWN), - sig2: u16(IMPORT_OBJECT_HDR_SIG2), - version: u16(0), - machine: u16(IMAGE_FILE_MACHINE_AMD64), - time_date_stamp: u32(0), - size_of_data: u32(size_of_data as u32), - ordinal_or_hint: u16(import.ordinal_or_hint.unwrap_or_default()), - name_type: u16(import.import_type.as_u16() << IMPORT_OBJECT_TYPE_SHIFT - | import.name_type.as_u16() << IMPORT_OBJECT_NAME_SHIFT), - }); - data.write_c_str(&import.symbol_name); - data.write_c_str(dll_name); + vec.extend_from_slice(object::bytes_of(&ImportObjectHeaderUnaligned { + sig1: make_u16(IMAGE_FILE_MACHINE_UNKNOWN), + sig2: make_u16(IMPORT_OBJECT_HDR_SIG2), + version: make_u16(0), + machine: make_u16(machine.as_raw()), + time_date_stamp: make_u32(0), + size_of_data: make_u32(size_of_data as u32), + ordinal_or_hint: make_u16(import.ordinal_or_hint.unwrap_or_default()), + name_type: make_u16( + import.import_type.as_u16() << IMPORT_OBJECT_TYPE_SHIFT + | import.name_type.as_u16() << IMPORT_OBJECT_NAME_SHIFT, + ), + })); + vec.extend_from_slice(import.symbol_name.as_bytes()); + vec.push(0); + vec.extend_from_slice(dll_name.as_bytes()); + vec.push(0); if let ImportNameType::NameExportAs { export_name } = &import.name_type { - data.write_c_str(&export_name); + vec.extend_from_slice(export_name.as_bytes()); + vec.push(0); + } + + vec +} + +pub(crate) struct ImportDescriptorValues { + pub(crate) dll_name: String, + pub(crate) machine: Machine, + pub(crate) import_descriptor_symbol: String, + pub(crate) null_thunk_data_symbol: String, +} + +impl ImportDescriptorValues { + pub(crate) fn new(dll_name: String, machine: Machine) -> Self { + // foo.dll => foo so we can construct the import descriptor symbol. + // At least for the Windows system dlls, don't seem to need any further + // escaping, e.g. "api-ms-win-appmodel-runtime-l1-1-1.dll" => + // "__IMPORT_DESCRIPTOR_api-ms-win-appmodel-runtime-l1-1-1" + // Not using std::path to avoid having to handle non-unicode paths. + let mut dll_basename = dll_name.clone(); + if let Some(index) = dll_basename.rfind('.') { + dll_basename.truncate(index); + } + + let import_descriptor_symbol = format!("__IMPORT_DESCRIPTOR_{dll_basename}"); + let null_thunk_data_symbol = format!("\x7f{dll_basename}_NULL_THUNK_DATA"); + + Self { dll_name, machine, import_descriptor_symbol, null_thunk_data_symbol } } } -pub(crate) fn write_import_descriptor( - data: &mut DataWriter, - dll_name: &str, - import_descriptor_symbol: &str, - null_thunk_data_symbol: &str, -) { +/// Return a COFF object file containing the import descriptor table entry for the +/// given DLL name. +pub(crate) fn generate_import_descriptor(values: &ImportDescriptorValues) -> Vec { // This is a COFF object containing 2 sections: // .idata$2: import directory entry: // 20 bytes, all 0 on disk, an Import Directory Table entry @@ -192,7 +239,9 @@ pub(crate) fn write_import_descriptor( // [6]: external __NULL_THUNK_DATA => undef // COFF File header: - let mut file = CoffFileWriter::new(data, IMAGE_FILE_MACHINE_AMD64); + let mut writer = DataWriter::new(); + + let mut file = CoffFileWriter::new(&mut writer, values.machine); // Section table: // [0] .idata$2: import directory entry @@ -219,38 +268,38 @@ pub(crate) fn write_import_descriptor( let import_descriptor_pointer_to_relocations = file.data.len() - file.offset; let header = import_directory_header.get_mut(file.data); - header.number_of_relocations = u16(3); + header.number_of_relocations = make_u16(3); - header.pointer_to_relocations = u32(import_descriptor_pointer_to_relocations as u32); + header.pointer_to_relocations = make_u32(import_descriptor_pointer_to_relocations as u32); // todo: CoffRelocWriter // relocation 0: [3] import lookup table rva => points to UNDEF symbol .idata$4 file.data.write_pod(&ImageRelocation { - virtual_address: u32(0), - symbol_table_index: u32(3), - typ: u16(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: make_u32(0), + symbol_table_index: make_u32(3), + typ: make_u16(IMAGE_REL_AMD64_ADDR32NB), }); // relocation 1: [2] name rva => points to DLL name section .idata$6 file.data.write_pod(&ImageRelocation { - virtual_address: u32(12), - symbol_table_index: u32(2), - typ: u16(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: make_u32(12), + symbol_table_index: make_u32(2), + typ: make_u16(IMAGE_REL_AMD64_ADDR32NB), }); // relocation 2: [4] import address table rva => points to UNDEF symbol .idata$5 file.data.write_pod(&ImageRelocation { - virtual_address: u32(16), - symbol_table_index: u32(4), - typ: u16(IMAGE_REL_AMD64_ADDR32NB), + virtual_address: make_u32(16), + symbol_table_index: make_u32(4), + typ: make_u16(IMAGE_REL_AMD64_ADDR32NB), }); // [1] section .idata$6 data - CoffSectionRawData::new(&mut file, dll_name_header).write_c_str(dll_name); + CoffSectionRawData::new(&mut file, dll_name_header).write_c_str(&values.dll_name); // COFF symbol table: let mut symbol_table = file.start_symbol_table(); symbol_table.add( - import_descriptor_symbol, + &values.import_descriptor_symbol, SymbolOptions { section_number: 1, storage_class: IMAGE_SYM_CLASS_EXTERNAL, @@ -286,14 +335,19 @@ pub(crate) fn write_import_descriptor( SymbolOptions { storage_class: IMAGE_SYM_CLASS_EXTERNAL, ..Default::default() }, ); symbol_table.add( - null_thunk_data_symbol, + &values.null_thunk_data_symbol, SymbolOptions { storage_class: IMAGE_SYM_CLASS_EXTERNAL, ..Default::default() }, ); + drop(symbol_table); + drop(file); + + writer.into_data() } -pub(crate) fn write_null_thunk_data(data: &mut DataWriter, symbol: &str) { +pub(crate) fn generate_null_thunk_data(machine: Machine, symbol: &str) -> Vec { + let mut writer = DataWriter::new(); // This is a COFF file with a two sections with 8 bytes of null data - let mut file = CoffFileWriter::new(data, IMAGE_FILE_MACHINE_AMD64); + let mut file = CoffFileWriter::new(&mut writer, machine); let import_address_section = file.write_section_header( ".idata$5", @@ -322,11 +376,19 @@ pub(crate) fn write_null_thunk_data(data: &mut DataWriter, symbol: &str) { ..Default::default() }, ); + drop(file); + + writer.into_data() } -pub(crate) fn write_null_import_descriptor(data: &mut DataWriter) { +/// Return the COFF object file containing the "Null Import Descriptor" symbols, +/// used by the linker to terminate the import descriptor table in .idata. +/// At least one of these must exist across the linker input files. +pub(crate) fn generate_null_import_descriptor(machine: Machine) -> Vec { + let mut writer = DataWriter::new(); + // This is a COFF file with a section with 20 bytes of null data - let mut file = CoffFileWriter::new(data, IMAGE_FILE_MACHINE_AMD64); + let mut file = CoffFileWriter::new(&mut writer, machine); let header = file.write_section_header( ".idata$3", IMAGE_SCN_ALIGN_4BYTES @@ -343,6 +405,9 @@ pub(crate) fn write_null_import_descriptor(data: &mut DataWriter) { ..Default::default() }, ); + drop(file); + + writer.into_data() } #[derive(Debug, Clone, Copy)] @@ -394,16 +459,16 @@ struct CoffFileWriter<'data> { } impl<'data> CoffFileWriter<'data> { - fn new(data: &'data mut DataWriter, machine: u16) -> Self { + fn new(data: &'data mut DataWriter, machine: Machine) -> Self { let file_offset = data.len(); data.write_pod(&ImageFileHeaderUnaligned { - machine: u16(machine), - number_of_sections: u16(0), - time_date_stamp: u32(0), - pointer_to_symbol_table: u32(0), - number_of_symbols: u32(0), - size_of_optional_header: u16(0), - characteristics: u16(0), + machine: make_u16(machine.as_raw()), + number_of_sections: make_u16(0), + time_date_stamp: make_u32(0), + pointer_to_symbol_table: make_u32(0), + number_of_symbols: make_u32(0), + size_of_optional_header: make_u16(0), + characteristics: make_u16(0), }); let string_table = CoffStringTable::new(); Self { data, offset: file_offset, number_of_sections: 0, string_table } @@ -417,22 +482,22 @@ impl<'data> CoffFileWriter<'data> { self.number_of_sections += 1; let offset = self.data.write_pod(&ImageSectionHeaderUnaligned { name: self.string_table.get_raw_name(name), - virtual_size: u32(0), - virtual_address: u32(0), - size_of_raw_data: u32(0), // filled out later - pointer_to_raw_data: u32(0), // ditto. - pointer_to_relocations: u32(0), // (possibly) ditto. - pointer_to_linenumbers: u32(0), - number_of_relocations: u16(0), - number_of_linenumbers: u16(0), - characteristics: u32(characteristics), + virtual_size: make_u32(0), + virtual_address: make_u32(0), + size_of_raw_data: make_u32(0), // filled out later + pointer_to_raw_data: make_u32(0), // ditto. + pointer_to_relocations: make_u32(0), // (possibly) ditto. + pointer_to_linenumbers: make_u32(0), + number_of_relocations: make_u16(0), + number_of_linenumbers: make_u16(0), + characteristics: make_u32(characteristics), }); CoffSectionHeader { offset } } fn start_symbol_table(&mut self) -> CoffSymbolTableWriter<'_, 'data> { let offset = self.len(); - self.file_header_mut().pointer_to_symbol_table = u32((offset - self.offset) as u32); + self.file_header_mut().pointer_to_symbol_table = make_u32((offset - self.offset) as u32); CoffSymbolTableWriter { file: self, offset, number_of_symbols: 0 } } } @@ -455,7 +520,7 @@ impl Drop for CoffFileWriter<'_> { fn drop(&mut self) { let number_of_sections = self.number_of_sections; let header = self.file_header_mut(); - header.number_of_sections = u16(number_of_sections); + header.number_of_sections = make_u16(number_of_sections); self.string_table.write(self.data); } } @@ -533,8 +598,8 @@ impl Drop for CoffSectionRawData<'_, '_> { let header = self.header.get_mut(self.file.data); let size_of_raw_data = end_offset - self.offset; let pointer_to_raw_data = self.offset - self.file.offset; - header.size_of_raw_data = u32(size_of_raw_data as u32); - header.pointer_to_raw_data = u32(pointer_to_raw_data as u32); + header.size_of_raw_data = make_u32(size_of_raw_data as u32); + header.pointer_to_raw_data = make_u32(pointer_to_raw_data as u32); } } @@ -561,9 +626,9 @@ impl CoffSymbolTableWriter<'_, '_> { let name = self.file.string_table.get_raw_name(name); self.file.write_pod(&ImageSymbol { name, - value: u32(options.value), - section_number: u16(options.section_number as u16), - typ: u16(options.base_type | options.complex_type << 8), + value: make_u32(options.value), + section_number: make_u16(options.section_number as u16), + typ: make_u16(options.base_type | options.complex_type << 8), storage_class: options.storage_class, number_of_aux_symbols: options.number_of_aux_symbols, }); @@ -575,7 +640,7 @@ impl Drop for CoffSymbolTableWriter<'_, '_> { fn drop(&mut self) { let pointer_to_symbol_table = self.offset - self.file.offset; let header = self.file.file_header_mut(); - header.pointer_to_symbol_table = u32(pointer_to_symbol_table as u32); - header.number_of_symbols = u32(self.number_of_symbols); + header.pointer_to_symbol_table = make_u32(pointer_to_symbol_table as u32); + header.number_of_symbols = make_u32(self.number_of_symbols); } } diff --git a/src/dll_import_lib/mod.rs b/src/dll_import_lib/mod.rs index 0bf97f2d6..e45d9a013 100644 --- a/src/dll_import_lib/mod.rs +++ b/src/dll_import_lib/mod.rs @@ -1,6 +1,4 @@ // todo: pull out to a proper location. Really should be in `object` crate! -// todo: support ordinals -// todo: support name types (e.g. verbatim+) // todo: support windows-gnu flavor? // todo: provide machine // todo: remove any panics, nice errors @@ -9,129 +7,103 @@ use std::ffi::CStr; use object::{Object, ObjectSymbol}; -use data::DataWriter; - mod coff; mod data; mod string_table; -pub(crate) use coff::{Import, ImportNameType, ImportType}; +use crate::dll_import_lib::coff::ImportDescriptorValues; +pub(crate) use coff::{Import, ImportNameType, ImportType, Machine}; -pub(crate) fn generate(dll_name: &str, imports: &[Import]) -> Vec { - let mut members = Vec::new(); +pub(crate) struct ImportLibraryBuilder { + dll_name: String, + machine: Machine, + members: Vec>, +} - // foo.dll => foo so we can construct the import descriptor symbol. - // At least for the Windows system dlls, don't seem to need any further - // escaping, e.g. "api-ms-win-appmodel-runtime-l1-1-1.dll" => - // "__IMPORT_DESCRIPTOR_api-ms-win-appmodel-runtime-l1-1-1" - // Not using std::path to avoid having to handle non-unicode paths. - let mut dll_basename = String::from(dll_name); - if let Some(index) = dll_basename.rfind('.') { - dll_basename.truncate(index); +impl ImportLibraryBuilder { + pub(crate) fn new(dll_name: &str, machine: Machine) -> Self { + let values = ImportDescriptorValues::new(dll_name.to_string(), machine); + let mut members = Vec::new(); + members.push(coff_member(dll_name, coff::generate_import_descriptor(&values))); + members.push(coff_member( + dll_name, + coff::generate_null_thunk_data(machine, &values.null_thunk_data_symbol), + )); + members.push(coff_member(dll_name, coff::generate_null_import_descriptor(machine))); + Self { dll_name: values.dll_name, machine, members } } - let import_descriptor_symbol = format!("__IMPORT_DESCRIPTOR_{dll_basename}"); - let null_thunk_data_symbol = format!("\x7f{dll_basename}_NULL_THUNK_DATA"); - - fn coff_get_symbols( - buf: &[u8], - f: &mut dyn FnMut(&[u8]) -> std::io::Result<()>, - ) -> std::io::Result { - type NtCoffFile<'data> = - object::read::coff::CoffFile<'data, &'data [u8], object::pe::ImageFileHeader>; - let file = NtCoffFile::parse(buf).unwrap(); - for symbol in file.symbols() { - if symbol.is_definition() { - f(symbol.name_bytes().unwrap())?; - } - } - Ok(true) + pub(crate) fn add_import(&mut self, import: Import) { + self.members.push(import_member(&self.dll_name, self.machine, &import)); } + pub(crate) fn write(&self, w: &mut W) -> std::io::Result<()> + where + W: ?Sized + std::io::Write + std::io::Seek, { - // import descriptor member - let mut buf = DataWriter::new(); - coff::write_import_descriptor( - &mut buf, - dll_name, - &import_descriptor_symbol, - &null_thunk_data_symbol, - ); - members.push(ar_archive_writer::NewArchiveMember { - member_name: dll_name.to_string(), - buf: Box::new(buf.into_data()), - get_symbols: coff_get_symbols, - mtime: 0, - uid: 0, - gid: 0, - perms: 0, - }); + let mut w = std::io::BufWriter::new(w); + let write_symtab = true; + let deterministic = true; + let thin = false; + ar_archive_writer::write_archive_to_stream( + &mut w, + &self.members, + write_symtab, + ar_archive_writer::ArchiveKind::Gnu, + deterministic, + thin, + )?; + // must flush before drop to ensure any final IO errors are reported. + std::io::Write::flush(&mut w)?; + Ok(()) } +} - { - // null thunk data member - let mut buf = DataWriter::new(); - coff::write_null_thunk_data(&mut buf, &null_thunk_data_symbol); - members.push(ar_archive_writer::NewArchiveMember { - member_name: dll_name.to_string(), - buf: Box::new(buf.into_data()), - get_symbols: coff_get_symbols, - mtime: 0, - uid: 0, - gid: 0, - perms: 0, - }); +fn coff_member(dll_name: &str, buf: Vec) -> ar_archive_writer::NewArchiveMember<'static> { + ar_archive_writer::NewArchiveMember { + member_name: dll_name.to_string(), + buf: Box::new(buf), + get_symbols: coff_get_symbols, + mtime: 0, + uid: 0, + gid: 0, + perms: 0, } +} - { - // null import descriptor member - let mut buf = DataWriter::new(); - coff::write_null_import_descriptor(&mut buf); - members.push(ar_archive_writer::NewArchiveMember { - member_name: dll_name.to_string(), - buf: Box::new(buf.into_data()), - get_symbols: coff_get_symbols, - mtime: 0, - uid: 0, - gid: 0, - perms: 0, - }); +fn import_member( + dll_name: &str, + machine: Machine, + import: &Import, +) -> ar_archive_writer::NewArchiveMember<'static> { + ar_archive_writer::NewArchiveMember { + member_name: dll_name.to_string(), + buf: Box::new(coff::write_short_import(dll_name, machine, import)), + get_symbols: |buf, f| { + const NAME_OFFSET: usize = std::mem::size_of::(); + let name = CStr::from_bytes_until_nul(&buf[NAME_OFFSET..]).unwrap(); + f(name.to_bytes())?; + f(format!("__imp_{}", name.to_str().unwrap()).as_bytes())?; + Ok(true) + }, + mtime: 0, + uid: 0, + gid: 0, + perms: 0, } +} - // short import object members - for import in imports { - let mut buf = DataWriter::new(); - coff::write_short_import(&mut buf, dll_name, &import); - members.push(ar_archive_writer::NewArchiveMember { - member_name: dll_name.to_string(), - buf: Box::new(buf.into_data()), - get_symbols: |buf, f| { - const NAME_OFFSET: usize = std::mem::size_of::(); - let name = CStr::from_bytes_until_nul(&buf[NAME_OFFSET..]).unwrap(); - f(name.to_bytes())?; - f(format!("__imp_{}", name.to_str().unwrap()).as_bytes())?; - Ok(true) - }, - mtime: 0, - uid: 0, - gid: 0, - perms: 0, - }); +fn coff_get_symbols( + buf: &[u8], + f: &mut dyn FnMut(&[u8]) -> std::io::Result<()>, +) -> std::io::Result { + type NtCoffFile<'data> = + object::read::coff::CoffFile<'data, &'data [u8], object::pe::ImageFileHeader>; + let file = NtCoffFile::parse(buf).unwrap(); + for symbol in file.symbols() { + if symbol.is_definition() { + f(symbol.name_bytes().unwrap())?; + } } - - let mut result = Vec::new(); - let write_symtab = true; - let deterministic = true; - let thin = false; - ar_archive_writer::write_archive_to_stream( - &mut std::io::Cursor::new(&mut result), - &members, - write_symtab, - ar_archive_writer::ArchiveKind::Gnu, - deterministic, - thin, - ) - .expect("write ar failed"); - - result + Ok(true) } From cc26ee19d2eb28f82e161eafbc8e10b2daf7146b Mon Sep 17 00:00:00 2001 From: Simon Buchan Date: Fri, 24 Nov 2023 00:55:36 +1300 Subject: [PATCH 11/11] Add basic raw_dylib test --- build_system/tests.rs | 1 + config.txt | 1 + example/raw-dylib.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 example/raw-dylib.rs diff --git a/build_system/tests.rs b/build_system/tests.rs index 1e24d1b11..0f3ed737d 100644 --- a/build_system/tests.rs +++ b/build_system/tests.rs @@ -97,6 +97,7 @@ const BASE_SYSROOT_SUITE: &[TestCase] = &[ ), TestCase::build_bin_and_run("aot.float-minmax-pass", "example/float-minmax-pass.rs", &[]), TestCase::build_bin_and_run("aot.mod_bench", "example/mod_bench.rs", &[]), + TestCase::build_bin_and_run("aot.raw_dylib", "example/raw-dylib.rs", &[]), TestCase::build_bin_and_run("aot.issue-72793", "example/issue-72793.rs", &[]), TestCase::build_bin("aot.issue-59326", "example/issue-59326.rs"), ]; diff --git a/config.txt b/config.txt index 7ff805e58..7ac93d1b9 100644 --- a/config.txt +++ b/config.txt @@ -40,6 +40,7 @@ aot.subslice-patterns-const-eval aot.track-caller-attribute aot.float-minmax-pass aot.mod_bench +aot.raw_dylib aot.issue-72793 aot.issue-59326 diff --git a/example/raw-dylib.rs b/example/raw-dylib.rs new file mode 100644 index 000000000..2496f9bf4 --- /dev/null +++ b/example/raw-dylib.rs @@ -0,0 +1,42 @@ +fn main() { + #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "msvc"))] + x86_64_pc_windows_msvc::test(); +} + +#[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "msvc"))] +mod x86_64_pc_windows_msvc { + #![allow(clippy::upper_case_acronyms)] + + // Expanded windows_sys, with --cfg windows_raw_dylib, on not(target_arch = "x86"). + // + // With target_arch = "x86", #[link] needs import_name_type = "undecorated" for windows APIs for + // windows APIs - and the extern abi depends on the specific API. + + // use windows_sys::core::PWSTR; + // use windows_sys::{Win32::Foundation::*, Win32::UI::WindowsAndMessaging::*}; + type PWSTR = *mut u16; + type BOOL = i32; + type HWND = isize; + type LPARAM = isize; + type WNDENUMPROC = Option BOOL>; + + #[link(name = "user32.dll", kind = "raw-dylib", modifiers = "+verbatim")] + extern "system" { + fn EnumWindows(lpenumfunc: WNDENUMPROC, lparam: LPARAM) -> BOOL; + + fn GetWindowTextW(hwnd: HWND, buf: PWSTR, buflen: i32) -> i32; + } + + pub fn test() { + unsafe { EnumWindows(Some(enum_window), 0) }; + } + + extern "system" fn enum_window(window: HWND, _: LPARAM) -> BOOL { + let mut text: [u16; 512] = [0; 512]; + + let len = unsafe { GetWindowTextW(window, text.as_mut_ptr(), text.len() as i32) }; + let text = String::from_utf16_lossy(&text[..len as usize]); + + 1 // TRUE + } +}