Skip to content

Commit 7065279

Browse files
Expose Comment in ZipArchive and ZipArchiveEntry (#59442)
Co-authored-by: Adam Sitnik <[email protected]>
1 parent 8b94165 commit 7065279

File tree

12 files changed

+370
-87
lines changed

12 files changed

+370
-87
lines changed

src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,5 +383,69 @@ internal static void AddEntry(ZipArchive archive, string name, string contents,
383383
w.WriteLine(contents);
384384
}
385385
}
386+
387+
protected const string Utf8SmileyEmoji = "\ud83d\ude04";
388+
protected const string Utf8LowerCaseOUmlautChar = "\u00F6";
389+
protected const string Utf8CopyrightChar = "\u00A9";
390+
protected const string AsciiFileName = "file.txt";
391+
// The o with umlaut is a character that exists in both latin1 and utf8
392+
protected const string Utf8AndLatin1FileName = $"{Utf8LowerCaseOUmlautChar}.txt";
393+
// emojis only make sense in utf8
394+
protected const string Utf8FileName = $"{Utf8SmileyEmoji}.txt";
395+
protected static readonly string ALettersUShortMaxValueMinusOne = new string('a', ushort.MaxValue - 1);
396+
protected static readonly string ALettersUShortMaxValue = ALettersUShortMaxValueMinusOne + 'a';
397+
protected static readonly string ALettersUShortMaxValueMinusOneAndCopyRightChar = ALettersUShortMaxValueMinusOne + Utf8CopyrightChar;
398+
protected static readonly string ALettersUShortMaxValueMinusOneAndTwoCopyRightChars = ALettersUShortMaxValueMinusOneAndCopyRightChar + Utf8CopyrightChar;
399+
400+
// Returns pairs that are returned the same way by Utf8 and Latin1
401+
// Returns: originalComment, expectedComment
402+
private static IEnumerable<object[]> SharedComment_Data()
403+
{
404+
yield return new object[] { null, string.Empty };
405+
yield return new object[] { string.Empty, string.Empty };
406+
yield return new object[] { "a", "a" };
407+
yield return new object[] { Utf8LowerCaseOUmlautChar, Utf8LowerCaseOUmlautChar };
408+
}
409+
410+
// Returns pairs as expected by Utf8
411+
// Returns: originalComment, expectedComment
412+
public static IEnumerable<object[]> Utf8Comment_Data()
413+
{
414+
string asciiOriginalOverMaxLength = ALettersUShortMaxValue + "aaa";
415+
416+
// A smiley emoji code point consists of two characters,
417+
// meaning the whole emoji should be fully truncated
418+
string utf8OriginalALettersAndOneEmojiDoesNotFit = ALettersUShortMaxValueMinusOne + Utf8SmileyEmoji;
419+
420+
// A smiley emoji code point consists of two characters,
421+
// so it should not be truncated if it's the last character and the total length is not over the limit.
422+
string utf8OriginalALettersAndOneEmojiFits = "aaaaa" + Utf8SmileyEmoji;
423+
424+
yield return new object[] { asciiOriginalOverMaxLength, ALettersUShortMaxValue };
425+
yield return new object[] { utf8OriginalALettersAndOneEmojiDoesNotFit, ALettersUShortMaxValueMinusOne };
426+
yield return new object[] { utf8OriginalALettersAndOneEmojiFits, utf8OriginalALettersAndOneEmojiFits };
427+
428+
foreach (object[] e in SharedComment_Data())
429+
{
430+
yield return e;
431+
}
432+
}
433+
434+
// Returns pairs as expected by Latin1
435+
// Returns: originalComment, expectedComment
436+
public static IEnumerable<object[]> Latin1Comment_Data()
437+
{
438+
// In Latin1, all characters are exactly 1 byte
439+
440+
string latin1ExpectedALettersAndOneOUmlaut = ALettersUShortMaxValueMinusOne + Utf8LowerCaseOUmlautChar;
441+
string latin1OriginalALettersAndTwoOUmlauts = latin1ExpectedALettersAndOneOUmlaut + Utf8LowerCaseOUmlautChar;
442+
443+
yield return new object[] { latin1OriginalALettersAndTwoOUmlauts, latin1ExpectedALettersAndOneOUmlaut };
444+
445+
foreach (object[] e in SharedComment_Data())
446+
{
447+
yield return e;
448+
}
449+
}
386450
}
387451
}

src/libraries/System.IO.Compression/ref/System.IO.Compression.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ public ZipArchive(System.IO.Stream stream) { }
9494
public ZipArchive(System.IO.Stream stream, System.IO.Compression.ZipArchiveMode mode) { }
9595
public ZipArchive(System.IO.Stream stream, System.IO.Compression.ZipArchiveMode mode, bool leaveOpen) { }
9696
public ZipArchive(System.IO.Stream stream, System.IO.Compression.ZipArchiveMode mode, bool leaveOpen, System.Text.Encoding? entryNameEncoding) { }
97+
[System.Diagnostics.CodeAnalysis.AllowNull]
98+
public string Comment { get { throw null; } set { } }
9799
public System.Collections.ObjectModel.ReadOnlyCollection<System.IO.Compression.ZipArchiveEntry> Entries { get { throw null; } }
98100
public System.IO.Compression.ZipArchiveMode Mode { get { throw null; } }
99101
public System.IO.Compression.ZipArchiveEntry CreateEntry(string entryName) { throw null; }
@@ -106,6 +108,8 @@ public partial class ZipArchiveEntry
106108
{
107109
internal ZipArchiveEntry() { }
108110
public System.IO.Compression.ZipArchive Archive { get { throw null; } }
111+
[System.Diagnostics.CodeAnalysis.AllowNull]
112+
public string Comment { get { throw null; } set { } }
109113
public long CompressedLength { get { throw null; } }
110114
[System.CLSCompliantAttribute(false)]
111115
public uint Crc32 { get { throw null; } }

src/libraries/System.IO.Compression/src/Resources/Strings.resx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@
212212
<data name="EntriesInCreateMode" xml:space="preserve">
213213
<value>Cannot access entries in Create mode.</value>
214214
</data>
215-
<data name="EntryNameEncodingNotSupported" xml:space="preserve">
216-
<value>The specified entry name encoding is not supported.</value>
215+
<data name="EntryNameAndCommentEncodingNotSupported" xml:space="preserve">
216+
<value>The specified encoding is not supported for entry names and comments.</value>
217217
</data>
218218
<data name="EntryNamesTooLong" xml:space="preserve">
219219
<value>Entry names cannot require more than 2^16 bits.</value>

src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Collections.Generic;
88
using System.Collections.ObjectModel;
99
using System.Diagnostics;
10+
using System.Diagnostics.CodeAnalysis;
1011
using System.Text;
1112

1213
namespace System.IO.Compression
@@ -27,8 +28,8 @@ public class ZipArchive : IDisposable
2728
private uint _numberOfThisDisk; //only valid after ReadCentralDirectory
2829
private long _expectedNumberOfEntries;
2930
private Stream? _backingStream;
30-
private byte[]? _archiveComment;
31-
private Encoding? _entryNameEncoding;
31+
private byte[] _archiveComment;
32+
private Encoding? _entryNameAndCommentEncoding;
3233

3334
#if DEBUG_FORCE_ZIP64
3435
public bool _forceZip64;
@@ -121,7 +122,7 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?
121122
if (stream == null)
122123
throw new ArgumentNullException(nameof(stream));
123124

124-
EntryNameEncoding = entryNameEncoding;
125+
EntryNameAndCommentEncoding = entryNameEncoding;
125126
Stream? extraTempStream = null;
126127

127128
try
@@ -173,7 +174,7 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?
173174
_centralDirectoryStart = 0; // invalid until ReadCentralDirectory
174175
_isDisposed = false;
175176
_numberOfThisDisk = 0; // invalid until ReadCentralDirectory
176-
_archiveComment = null;
177+
_archiveComment = Array.Empty<byte>();
177178

178179
switch (mode)
179180
{
@@ -211,6 +212,20 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?
211212
}
212213
}
213214

215+
/// <summary>
216+
/// Gets or sets the optional archive comment.
217+
/// </summary>
218+
/// <remarks>
219+
/// The comment encoding is determined by the <c>entryNameEncoding</c> parameter of the <see cref="ZipArchive(Stream,ZipArchiveMode,bool,Encoding?)"/> constructor.
220+
/// If the comment byte length is larger than <see cref="ushort.MaxValue"/>, it will be truncated when disposing the archive.
221+
/// </remarks>
222+
[AllowNull]
223+
public string Comment
224+
{
225+
get => (EntryNameAndCommentEncoding ?? Encoding.UTF8).GetString(_archiveComment);
226+
set => _archiveComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _);
227+
}
228+
214229
/// <summary>
215230
/// The collection of entries that are currently in the ZipArchive. This may not accurately represent the actual entries that are present in the underlying file or stream.
216231
/// </summary>
@@ -345,9 +360,9 @@ public void Dispose()
345360

346361
internal uint NumberOfThisDisk => _numberOfThisDisk;
347362

348-
internal Encoding? EntryNameEncoding
363+
internal Encoding? EntryNameAndCommentEncoding
349364
{
350-
get { return _entryNameEncoding; }
365+
get => _entryNameAndCommentEncoding;
351366

352367
private set
353368
{
@@ -370,10 +385,10 @@ private set
370385
(value.Equals(Encoding.BigEndianUnicode)
371386
|| value.Equals(Encoding.Unicode)))
372387
{
373-
throw new ArgumentException(SR.EntryNameEncodingNotSupported, nameof(EntryNameEncoding));
388+
throw new ArgumentException(SR.EntryNameAndCommentEncodingNotSupported, nameof(EntryNameAndCommentEncoding));
374389
}
375390

376-
_entryNameEncoding = value;
391+
_entryNameAndCommentEncoding = value;
377392
}
378393
}
379394

@@ -547,9 +562,7 @@ private void ReadEndOfCentralDirectory()
547562

548563
_expectedNumberOfEntries = eocd.NumberOfEntriesInTheCentralDirectory;
549564

550-
// only bother saving the comment if we are in update mode
551-
if (_mode == ZipArchiveMode.Update)
552-
_archiveComment = eocd.ArchiveComment;
565+
_archiveComment = eocd.ArchiveComment;
553566

554567
TryReadZip64EndOfCentralDirectory(eocd, eocdStart);
555568

0 commit comments

Comments
 (0)