Skip to content

Commit

Permalink
Adds supplier/license info to pip components. (#938)
Browse files Browse the repository at this point in the history
* Adds supplier/license info to pip components.

* Rename GetReleasesAsync to GetProjectAsync

* Address feedback

---------

Co-authored-by: Sebastian Gomez <[email protected]>
  • Loading branch information
sebasgomez238 and sebasgomez238 authored Dec 28, 2023
1 parent 710273b commit 809ef0b
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
namespace Microsoft.ComponentDetection.Contracts.TypedComponent;

using System.Diagnostics.CodeAnalysis;
using Newtonsoft.Json;
using PackageUrl;

public class PipComponent : TypedComponent
Expand All @@ -10,16 +11,26 @@ private PipComponent()
/* Reserved for deserialization */
}

public PipComponent(string name, string version)
public PipComponent(string name, string version, string author = null, string license = null)
{
this.Name = this.ValidateRequiredInput(name, nameof(this.Name), nameof(ComponentType.Pip));
this.Version = this.ValidateRequiredInput(version, nameof(this.Version), nameof(ComponentType.Pip));
this.Author = author;
this.License = license;
}

public string Name { get; set; }

public string Version { get; set; }

#nullable enable
[JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
public string? Author { get; set; }

[JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
public string? License { get; set; }
#nullable disable

public override ComponentType Type => ComponentType.Pip;

[SuppressMessage("Usage", "CA1308:Normalize String to Uppercase", Justification = "Casing cannot be overwritten.")]
Expand Down
16 changes: 10 additions & 6 deletions src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public interface IPyPiClient
{
Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(string name, string version, PythonProjectRelease release);

Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetReleasesAsync(PipDependencySpecification spec);
Task<PythonProject> GetProjectAsync(PipDependencySpecification spec);
}

public sealed class PyPiClient : IPyPiClient, IDisposable
Expand Down Expand Up @@ -134,7 +134,7 @@ public async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsy
return dependencies;
}

public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetReleasesAsync(PipDependencySpecification spec)
public async Task<PythonProject> GetProjectAsync(PipDependencySpecification spec)
{
var requestUri = new Uri($"https://pypi.org/pypi/{spec.Name}/json");

Expand Down Expand Up @@ -183,7 +183,7 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele

this.logger.LogWarning($"Call to pypi.org failed, but no more retries allowed!");

return new SortedDictionary<string, IList<PythonProjectRelease>>();
return new PythonProject();
}

if (!request.IsSuccessStatusCode)
Expand All @@ -192,12 +192,16 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele

this.logger.LogWarning("Received {StatusCode} {ReasonPhrase} from {RequestUri}", request.StatusCode, request.ReasonPhrase, requestUri);

return new SortedDictionary<string, IList<PythonProjectRelease>>();
return new PythonProject();
}

var response = await request.Content.ReadAsStringAsync();
var project = JsonConvert.DeserializeObject<PythonProject>(response);
var versions = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer());
var versions = new PythonProject
{
Info = project.Info,
Releases = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer()),
};

foreach (var release in project.Releases)
{
Expand All @@ -208,7 +212,7 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
parsedVersion.Valid && parsedVersion.IsReleasedPackage &&
PythonVersionUtilities.VersionValidForSpec(release.Key, spec.DependencySpecifiers))
{
versions.Add(release.Key, release.Value);
versions.Releases.Add(release.Key, release.Value);
}
}
catch (ArgumentException ae)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
/// </summary>
public class PythonProject
{
public Dictionary<string, IList<PythonProjectRelease>> Releases { get; set; }
public SortedDictionary<string, IList<PythonProjectRelease>> Releases { get; set; }

#nullable enable
public PythonProjectInfo? Info { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;

using System.Collections.Generic;
using Newtonsoft.Json;

public class PythonProjectInfo
{
public string Author { get; set; }

[JsonProperty("author_email")]
public string AuthorEmail { get; set; }

public List<string> Classifiers { get; set; }

public string License { get; set; }

public string Maintainer { get; set; }

[JsonProperty("maintainer_email")]
public string MaintainerEmail { get; set; }

// Add other properties from the "info" object as needed
}
69 changes: 62 additions & 7 deletions src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ public class PythonResolver : IPythonResolver
private readonly IPyPiClient pypiClient;
private readonly ILogger<PythonResolver> logger;

private readonly int maxLicenseFieldLength = 100;
private readonly string classifierFieldSeparator = " :: ";
private readonly string classifierFieldLicensePrefix = "License";

public PythonResolver(IPyPiClient pypiClient, ILogger<PythonResolver> logger)
{
this.pypiClient = pypiClient;
Expand All @@ -35,7 +39,9 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
// If we have it, we probably just want to skip at this phase as this indicates duplicates
if (!state.ValidVersionMap.TryGetValue(rootPackage.Name, out _))
{
var result = await this.pypiClient.GetReleasesAsync(rootPackage);
var project = await this.pypiClient.GetProjectAsync(rootPackage);

var result = project.Releases;

if (result.Keys.Any())
{
Expand All @@ -45,7 +51,7 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
var candidateVersion = state.ValidVersionMap[rootPackage.Name].Keys.Any()
? state.ValidVersionMap[rootPackage.Name].Keys.Last() : null;

var node = new PipGraphNode(new PipComponent(rootPackage.Name, candidateVersion));
var node = new PipGraphNode(new PipComponent(rootPackage.Name, candidateVersion, license: this.GetLicenseFromProject(project), author: this.GetSupplierFromProject(project)));

state.NodeReferences[rootPackage.Name] = node;

Expand Down Expand Up @@ -103,15 +109,17 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
else
{
// We haven't encountered this package before, so let's fetch it and find a candidate
var result = await this.pypiClient.GetReleasesAsync(dependencyNode);
var project = await this.pypiClient.GetProjectAsync(dependencyNode);

var result = project.Releases;

if (result.Keys.Any())
{
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;

this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion);
this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion, license: this.GetLicenseFromProject(project), author: this.GetSupplierFromProject(project));

state.ProcessingQueue.Enqueue((root, dependencyNode));
}
Expand Down Expand Up @@ -155,7 +163,7 @@ private async Task<bool> InvalidateAndReprocessAsync(

var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null;

node.Value = new PipComponent(pipComponent.Name, candidateVersion);
node.Value = new PipComponent(pipComponent.Name, candidateVersion, license: pipComponent.License, author: pipComponent.Author);

var dependencies = (await this.FetchPackageDependenciesAsync(state, newSpec)).ToDictionary(x => x.Name, x => x);

Expand Down Expand Up @@ -201,7 +209,7 @@ private async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAs
return await this.pypiClient.FetchPackageDependenciesAsync(spec.Name, candidateVersion, packageToFetch);
}

private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string name, string version)
private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string name, string version, string license = null, string author = null)
{
if (state.NodeReferences.TryGetValue(name, out var value))
{
Expand All @@ -210,10 +218,57 @@ private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string
}
else
{
var node = new PipGraphNode(new PipComponent(name, version));
var node = new PipGraphNode(new PipComponent(name, version, license: license, author: author));
state.NodeReferences[name] = node;
parent.Children.Add(node);
node.Parents.Add(parent);
}
}

private string GetSupplierFromProject(PythonProject project)
{
if (!string.IsNullOrWhiteSpace(project.Info?.Maintainer))
{
return project.Info.Maintainer;
}

if (!string.IsNullOrWhiteSpace(project.Info?.MaintainerEmail))
{
return project.Info.MaintainerEmail;
}

if (!string.IsNullOrWhiteSpace(project.Info?.Author))
{
return project.Info.Author;
}

if (!string.IsNullOrWhiteSpace(project.Info?.AuthorEmail))
{
return project.Info.AuthorEmail;
}

// If none of the fields are populated, return null.
return null;
}

private string GetLicenseFromProject(PythonProject project)
{
// There are cases where the actual license text is found in the license field so we limit the length of this field to 100 characters.
if (project.Info?.License != null && project.Info?.License.Length < this.maxLicenseFieldLength)
{
return project.Info.License;
}

if (project.Info?.Classifiers != null)
{
var licenseClassifiers = project.Info.Classifiers.Where(x => !string.IsNullOrWhiteSpace(x) && x.StartsWith(this.classifierFieldLicensePrefix));

// Split the license classifiers by the " :: " and take the last part of the string
licenseClassifiers = licenseClassifiers.Select(x => x.Split(this.classifierFieldSeparator).Last()).ToList();

return string.Join(", ", licenseClassifiers);
}

return null;
}
}
Loading

0 comments on commit 809ef0b

Please sign in to comment.