Skip to content

Commit 000b7b3

Browse files
committed
Feature: Add the generate_pkgid_to_licenses() function for retrieving the licenses for artifacts
1 parent 958eb1a commit 000b7b3

File tree

14 files changed

+2723
-2
lines changed

14 files changed

+2723
-2
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
version: ${{ matrix.julia-version }}
3636
arch: ${{ matrix.julia-arch }}
3737
- name: Cache artifacts
38-
uses: actions/cache@v2
38+
uses: actions/cache@v4
3939
env:
4040
cache-name: cache-artifacts
4141
with:

.github/workflows/docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
with:
2828
version: "1.6"
2929
- name: Cache artifacts
30-
uses: actions/cache@v2
30+
uses: actions/cache@v4
3131
env:
3232
cache-name: cache-artifacts
3333
with:

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ version = "3.1.0"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
8+
Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
89
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
910
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
1011
Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"

src/PackageAnalyzer.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module PackageAnalyzer
22

33
# Standard libraries
44
using Pkg, TOML, UUIDs, Printf
5+
import Artifacts
56
# Third-party packages
67
using LicenseCheck # for `find_license` and `is_osi_approved`
78
using JSON3 # for interfacing with `tokei` to count lines of code
@@ -380,5 +381,6 @@ include("count_loc.jl")
380381

381382
include("deprecated_schemas.jl")
382383

384+
include("artifact_licenses.jl")
383385

384386
end # module

src/artifact_licenses.jl

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
# Structually, ArtifactLicenseInfo is the same as the "original license info". But I made it
2+
# a different type because I want to do "nominal typing", that is, I want to semantically
3+
# distinguish between artifact licenses and Julia package licenses.
4+
#
5+
# That is, the public interface for this functionality is a function named
6+
# PackageAnalyzer.artifact_license_map(). This function
7+
# returns a dict, where the keys of the dict are packages (specifically, the keys are Base.PkgIds,
8+
# which simply contain the package name and the package UUID), and the value is the artifact
9+
# license information. The artifact license information is not the same as the license for the
10+
# Julia package source code itself, so I don't want to confuse the user. I want to make it clear
11+
# that the user is only getting (from this function) the licenses for the artifacts. So that's
12+
# why I chose to return ArtifactLicenseInfos.
13+
#
14+
# In contrast, if I just returned the named tuple of
15+
# (; license_filename::String, licenses_found::Vector{String}, license_file_percent_covered::Float64),
16+
# then it would not be clear whether the user was working with the licenses from artifacts or
17+
# the Julia package source code itself. So, using a nominal ArtifactLicenseInfo type makes it
18+
# more clear, and also prevents the user from accidentally combining the two.
19+
#
20+
Base.@kwdef struct ArtifactLicenseInfo
21+
license_filename::String
22+
licenses_found::Vector{String}
23+
license_file_percent_covered::Float64
24+
end
25+
26+
_get_pkg_uuid_u(pkg::Release) = Base.UUID(pkg.uuid)
27+
_get_pkg_uuid_u(pkg::Added) = Base.UUID(pkg.uuid)
28+
29+
_get_pkg_name(pkg::Release) = pkg.name
30+
_get_pkg_name(pkg::Added) = pkg.name
31+
32+
function _construct_pkgid(pkg::PkgSource)
33+
name = _get_pkg_name(pkg)
34+
uuid_u = _get_pkg_uuid_u(pkg)
35+
id = Base.PkgId(uuid_u, name)
36+
return id
37+
end
38+
39+
# Take in a directory local_dir.
40+
# Return all of the Artifacts.toml (and JuliaArtifacts.toml) files that we find when we search
41+
# the local_dir directory recursively.
42+
function find_artifacts_toml_from_local_dir(local_dir::String)
43+
artifacts_toml_files = String[]
44+
for (root, dirs, files) in walkdir(local_dir)
45+
for name in files
46+
if name in Artifacts.artifact_names
47+
full_path = joinpath(root, name)
48+
push!(artifacts_toml_files, full_path)
49+
end
50+
end
51+
end
52+
return artifacts_toml_files
53+
end
54+
55+
_get_git_tree_sha1(x::Pair) = _get_git_tree_sha1(Dict(x))
56+
_get_git_tree_sha1(x::Dict) = Base.SHA1(x["git-tree-sha1"])
57+
58+
# Take in the "info" from an Artifacts.toml file.
59+
# Return all possible artifact hashes (git-tree-sha1).
60+
# Note: This covers the hashes for all platforms (not just the user's current platform).
61+
_get_possible_artifact_hashes_from_info(info::Dict) = [_get_git_tree_sha1(info)]
62+
function _get_possible_artifact_hashes_from_info(info::Vector)
63+
vec = Base.SHA1[]
64+
for x in info
65+
git_tree_sha1 = _get_git_tree_sha1(x)
66+
push!(vec, git_tree_sha1)
67+
end
68+
return vec
69+
end
70+
71+
# Take in the filename of an Artifacts.toml file.
72+
# Return all possible artifact hashes (git-tree-sha1).
73+
# Note: This covers the hashes for all platforms (not just the user's current platform).
74+
function get_possible_artifact_hashes_from_artifacts_toml(artifacts_toml::String)
75+
possible_hashes = Base.SHA1[]
76+
artifacts_dict = TOML.parsefile(artifacts_toml)
77+
for (name, info) in pairs(artifacts_dict)
78+
vec = _get_possible_artifact_hashes_from_info(info)
79+
append!(possible_hashes, vec)
80+
end
81+
unique!(possible_hashes)
82+
return possible_hashes
83+
end
84+
85+
# Take in the directory local_dir where a package lives.
86+
# Return all possible artifact hashes (git-tree-sha1).
87+
# Note: This covers the hashes for all platforms (not just the user's current platform).
88+
function get_possible_artifact_hashes_from_local_dir(local_dir::String; pkg)
89+
artifacts_toml_files = find_artifacts_toml_from_local_dir(local_dir)
90+
if isempty(artifacts_toml_files)
91+
msg = "Did not find any {,Julia}Artifacts.toml files for package: $(pkg)"
92+
error(msg)
93+
end
94+
possible_hashes = Base.SHA1[]
95+
for artifacts_toml in artifacts_toml_files
96+
hashes = get_possible_artifact_hashes_from_artifacts_toml(artifacts_toml::String)
97+
append!(possible_hashes, hashes)
98+
end
99+
unique!(possible_hashes)
100+
return possible_hashes
101+
end
102+
103+
# Take in an artifact hash (git-tree-sha1).
104+
# Return all of the licenses that we find.
105+
function get_licenses_from_artifact_hash(hash::Base.SHA1)
106+
artifact_root_path = Artifacts.artifact_path(hash)
107+
licenses = ArtifactLicenseInfo[]
108+
for (root, dirs, files) in walkdir(artifact_root_path)
109+
for dir in dirs
110+
full_path = joinpath(root, dir)
111+
found = LicenseCheck.find_license(full_path)
112+
if !isnothing(found)
113+
new_info = ArtifactLicenseInfo(;
114+
found.license_filename,
115+
found.licenses_found,
116+
found.license_file_percent_covered,
117+
)
118+
push!(licenses, new_info)
119+
end
120+
end
121+
end
122+
unique!(licenses)
123+
if isempty(licenses)
124+
msg = "No licenses found for artifact $(hash)"
125+
@error msg
126+
# error(msg)
127+
end
128+
return licenses
129+
end
130+
131+
# Takes in two arguments:
132+
# 1. artifact_hash_to_licenses: a dict where the keys are artifact hashes (git-tree-sha1)
133+
# and the values are lists of licenses.
134+
# 2. available_hashes: a list of artifact hashes (git-tree-sha1)
135+
#
136+
# This function goes through the list of hashes in available_hashes.
137+
# For each hash in available_hashes, the function gets the list of all licenses, and then
138+
# mutates the dict artifact_hash_to_licenses to set
139+
# artifact_hash_to_licenses[$hash] = $listoflicenses
140+
function generate_artifact_hash_to_licenses!(
141+
artifact_hash_to_licenses::Dict{Base.SHA1,Vector{ArtifactLicenseInfo}},
142+
available_hashes::Vector{Base.SHA1},
143+
)
144+
for hash in available_hashes
145+
licenses = get_licenses_from_artifact_hash(hash::Base.SHA1)
146+
artifact_hash_to_licenses[hash] = licenses
147+
end
148+
return nothing
149+
end
150+
151+
# Takes in two arguments:
152+
# 1. artifact_hash_to_licenses: a dict where the keys are artifact hashes (git-tree-sha1)
153+
# and the values are lists of licenses.
154+
# 2. pkgs: a list of PkgSources.
155+
#
156+
# This function goes through the list of packages in pkgs.
157+
# For each pkg in pkgs, the function gets the list of all available artifact hashes for that
158+
# package, and then for each of those artifact hashes, get the list of licenses. Then, for each
159+
# artifact hash, mutate the dict artifact_hash_to_licenses to set
160+
# artifact_hash_to_licenses[$hash] = $listoflicenses
161+
#
162+
# Keyword arguments:
163+
# 1. allow_no_artifacts::Vector{Base.PkgId}. If a package has no artifacts, then we throw an
164+
# error if the package is not in the allow_no_artifacts list, but we print a debug message
165+
# (and don't throw an error) if the package is in the allow_no_artifacts list.
166+
function generate_artifact_hash_to_licenses!(
167+
artifact_hash_to_licenses::Dict{Base.SHA1,Vector{ArtifactLicenseInfo}},
168+
pkgs::Vector{<:PkgSource};
169+
kwargs...,
170+
)
171+
available_hashes = Base.SHA1[]
172+
for pkg in pkgs
173+
hashes = generate_available_artifact_hashes_from_pkg(pkg::PkgSource; kwargs...)
174+
append!(available_hashes, hashes)
175+
end
176+
generate_artifact_hash_to_licenses!(artifact_hash_to_licenses, available_hashes;)
177+
return nothing
178+
end
179+
180+
# Take in a PkgSource.
181+
# Return all possible artifact hashes for this package.
182+
function generate_possible_artifact_hashes_from_pkg(pkg::PkgSource)
183+
this_pkgid = _construct_pkgid(pkg)
184+
local_dir, reachable, version, _ = PackageAnalyzer.obtain_code(pkg)
185+
if !reachable
186+
msg = "Package is not reachable: $(pkg)"
187+
error(msg)
188+
end
189+
possible_hashes = get_possible_artifact_hashes_from_local_dir(local_dir::String; pkg)
190+
return possible_hashes
191+
end
192+
193+
# Take in a PkgSource.
194+
# Return all available artifact hashes for this package. Note: This is the list of available
195+
# artifact hashes, not the list of all possible artifact hashes. The difference is this:
196+
# - Possible artifact hash = the hash is in the Artifacts.toml file, but it might be for a platform
197+
# that is different from the current platform.
198+
# - Available artifact hash = the hash actually exists locally (which means that the artifact's
199+
# platform is the same as the current platform.)
200+
#
201+
# Keyword arguments:
202+
# 1. allow_no_artifacts::Vector{Base.PkgId}. Same as documented above.
203+
function generate_available_artifact_hashes_from_pkg(
204+
pkg::PkgSource;
205+
allow_no_artifacts::Vector{Base.PkgId} = Base.PkgId[],
206+
)
207+
this_pkgid = _construct_pkgid(pkg)
208+
possible_hashes = generate_possible_artifact_hashes_from_pkg(pkg)
209+
available_hashes = filter(Artifacts.artifact_exists, possible_hashes)
210+
unique!(available_hashes)
211+
if isempty(available_hashes)
212+
msg = "No artifacts were found for package $(pkg) with PkgId $(this_pkgid)"
213+
if this_pkgid in allow_no_artifacts
214+
@debug msg
215+
else
216+
error(msg)
217+
end
218+
end
219+
return available_hashes
220+
end
221+
222+
# Takes in a list of PkgSource.
223+
# Returns a dict pkgid_to_licenses.
224+
# The key of pkgid_to_licenses are Base.PkgId.
225+
# The value of pkgid_to_licenses is the list of licenses for that package.
226+
function generate_pkgid_to_licenses(pkgs::Vector{<:PkgSource}; kwargs...)
227+
artifact_hash_to_licenses = Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}()
228+
generate_artifact_hash_to_licenses!(artifact_hash_to_licenses, pkgs; kwargs...)
229+
pkgid_to_licenses = artifact_license_map(pkgs, artifact_hash_to_licenses; kwargs...)
230+
return pkgid_to_licenses
231+
end
232+
233+
# Takes in two arguments:
234+
# 1. pkgs: list of PkgSources.
235+
# 2. artifact_hash_to_licenses: this is the :Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}
236+
# that we get after running the following:
237+
# - artifact_hash_to_licenses = Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}()
238+
# - generate_artifact_hash_to_licenses!(artifact_hash_to_licenses, pkgs; kwargs...)
239+
#
240+
# Keyword arguments:
241+
# 1. allow_no_artifacts::Vector{Base.PkgId}. Same as documented above.
242+
function artifact_license_map(
243+
pkgs::Vector{<:PkgSource},
244+
artifact_hash_to_licenses::Dict{Base.SHA1,Vector{ArtifactLicenseInfo}};
245+
kwargs...,
246+
)
247+
pkguuid_to_licenses = Dict{Base.PkgId,Vector{ArtifactLicenseInfo}}()
248+
for pkg in pkgs
249+
licenses_for_this_pkg = ArtifactLicenseInfo[]
250+
hashes = generate_available_artifact_hashes_from_pkg(pkg::PkgSource; kwargs...)
251+
for hash in hashes
252+
licenses_for_this_hash = artifact_hash_to_licenses[hash]
253+
append!(licenses_for_this_pkg, licenses_for_this_hash)
254+
end
255+
pkguuid_to_licenses[_construct_pkgid(pkg)] = licenses_for_this_pkg
256+
end
257+
return pkguuid_to_licenses
258+
end

test/artifact_licenses/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
!Manifest-*.toml

0 commit comments

Comments
 (0)