|
| 1 | +# Structually, ArtifactLicenseInfo is the same as the "original license info". But I made it |
| 2 | +# a different type because I want to do "nominal typing", that is, I want to semantically |
| 3 | +# distinguish between artifact licenses and Julia package licenses. |
| 4 | +# |
| 5 | +# That is, the public interface for this functionality is a function named |
| 6 | +# PackageAnalyzer.artifact_license_map(). This function |
| 7 | +# returns a dict, where the keys of the dict are packages (specifically, the keys are Base.PkgIds, |
| 8 | +# which simply contain the package name and the package UUID), and the value is the artifact |
| 9 | +# license information. The artifact license information is not the same as the license for the |
| 10 | +# Julia package source code itself, so I don't want to confuse the user. I want to make it clear |
| 11 | +# that the user is only getting (from this function) the licenses for the artifacts. So that's |
| 12 | +# why I chose to return ArtifactLicenseInfos. |
| 13 | +# |
| 14 | +# In contrast, if I just returned the named tuple of |
| 15 | +# (; license_filename::String, licenses_found::Vector{String}, license_file_percent_covered::Float64), |
| 16 | +# then it would not be clear whether the user was working with the licenses from artifacts or |
| 17 | +# the Julia package source code itself. So, using a nominal ArtifactLicenseInfo type makes it |
| 18 | +# more clear, and also prevents the user from accidentally combining the two. |
| 19 | +# |
| 20 | +Base.@kwdef struct ArtifactLicenseInfo |
| 21 | + license_filename::String |
| 22 | + licenses_found::Vector{String} |
| 23 | + license_file_percent_covered::Float64 |
| 24 | +end |
| 25 | + |
| 26 | +_get_pkg_uuid_u(pkg::Release) = Base.UUID(pkg.uuid) |
| 27 | +_get_pkg_uuid_u(pkg::Added) = Base.UUID(pkg.uuid) |
| 28 | + |
| 29 | +_get_pkg_name(pkg::Release) = pkg.name |
| 30 | +_get_pkg_name(pkg::Added) = pkg.name |
| 31 | + |
| 32 | +function _construct_pkgid(pkg::PkgSource) |
| 33 | + name = _get_pkg_name(pkg) |
| 34 | + uuid_u = _get_pkg_uuid_u(pkg) |
| 35 | + id = Base.PkgId(uuid_u, name) |
| 36 | + return id |
| 37 | +end |
| 38 | + |
| 39 | +# Take in a directory local_dir. |
| 40 | +# Return all of the Artifacts.toml (and JuliaArtifacts.toml) files that we find when we search |
| 41 | +# the local_dir directory recursively. |
| 42 | +function find_artifacts_toml_from_local_dir(local_dir::String) |
| 43 | + artifacts_toml_files = String[] |
| 44 | + for (root, dirs, files) in walkdir(local_dir) |
| 45 | + for name in files |
| 46 | + if name in Artifacts.artifact_names |
| 47 | + full_path = joinpath(root, name) |
| 48 | + push!(artifacts_toml_files, full_path) |
| 49 | + end |
| 50 | + end |
| 51 | + end |
| 52 | + return artifacts_toml_files |
| 53 | +end |
| 54 | + |
| 55 | +_get_git_tree_sha1(x::Pair) = _get_git_tree_sha1(Dict(x)) |
| 56 | +_get_git_tree_sha1(x::Dict) = Base.SHA1(x["git-tree-sha1"]) |
| 57 | + |
| 58 | +# Take in the "info" from an Artifacts.toml file. |
| 59 | +# Return all possible artifact hashes (git-tree-sha1). |
| 60 | +# Note: This covers the hashes for all platforms (not just the user's current platform). |
| 61 | +_get_possible_artifact_hashes_from_info(info::Dict) = [_get_git_tree_sha1(info)] |
| 62 | +function _get_possible_artifact_hashes_from_info(info::Vector) |
| 63 | + vec = Base.SHA1[] |
| 64 | + for x in info |
| 65 | + git_tree_sha1 = _get_git_tree_sha1(x) |
| 66 | + push!(vec, git_tree_sha1) |
| 67 | + end |
| 68 | + return vec |
| 69 | +end |
| 70 | + |
| 71 | +# Take in the filename of an Artifacts.toml file. |
| 72 | +# Return all possible artifact hashes (git-tree-sha1). |
| 73 | +# Note: This covers the hashes for all platforms (not just the user's current platform). |
| 74 | +function get_possible_artifact_hashes_from_artifacts_toml(artifacts_toml::String) |
| 75 | + possible_hashes = Base.SHA1[] |
| 76 | + artifacts_dict = TOML.parsefile(artifacts_toml) |
| 77 | + for (name, info) in pairs(artifacts_dict) |
| 78 | + vec = _get_possible_artifact_hashes_from_info(info) |
| 79 | + append!(possible_hashes, vec) |
| 80 | + end |
| 81 | + unique!(possible_hashes) |
| 82 | + return possible_hashes |
| 83 | +end |
| 84 | + |
| 85 | +# Take in the directory local_dir where a package lives. |
| 86 | +# Return all possible artifact hashes (git-tree-sha1). |
| 87 | +# Note: This covers the hashes for all platforms (not just the user's current platform). |
| 88 | +function get_possible_artifact_hashes_from_local_dir(local_dir::String; pkg) |
| 89 | + artifacts_toml_files = find_artifacts_toml_from_local_dir(local_dir) |
| 90 | + if isempty(artifacts_toml_files) |
| 91 | + msg = "Did not find any {,Julia}Artifacts.toml files for package: $(pkg)" |
| 92 | + error(msg) |
| 93 | + end |
| 94 | + possible_hashes = Base.SHA1[] |
| 95 | + for artifacts_toml in artifacts_toml_files |
| 96 | + hashes = get_possible_artifact_hashes_from_artifacts_toml(artifacts_toml::String) |
| 97 | + append!(possible_hashes, hashes) |
| 98 | + end |
| 99 | + unique!(possible_hashes) |
| 100 | + return possible_hashes |
| 101 | +end |
| 102 | + |
| 103 | +# Take in an artifact hash (git-tree-sha1). |
| 104 | +# Return all of the licenses that we find. |
| 105 | +function get_licenses_from_artifact_hash(hash::Base.SHA1) |
| 106 | + artifact_root_path = Artifacts.artifact_path(hash) |
| 107 | + licenses = ArtifactLicenseInfo[] |
| 108 | + for (root, dirs, files) in walkdir(artifact_root_path) |
| 109 | + for dir in dirs |
| 110 | + full_path = joinpath(root, dir) |
| 111 | + found = LicenseCheck.find_license(full_path) |
| 112 | + if !isnothing(found) |
| 113 | + new_info = ArtifactLicenseInfo(; |
| 114 | + found.license_filename, |
| 115 | + found.licenses_found, |
| 116 | + found.license_file_percent_covered, |
| 117 | + ) |
| 118 | + push!(licenses, new_info) |
| 119 | + end |
| 120 | + end |
| 121 | + end |
| 122 | + unique!(licenses) |
| 123 | + if isempty(licenses) |
| 124 | + msg = "No licenses found for artifact $(hash)" |
| 125 | + @error msg |
| 126 | + # error(msg) |
| 127 | + end |
| 128 | + return licenses |
| 129 | +end |
| 130 | + |
| 131 | +# Takes in two arguments: |
| 132 | +# 1. artifact_hash_to_licenses: a dict where the keys are artifact hashes (git-tree-sha1) |
| 133 | +# and the values are lists of licenses. |
| 134 | +# 2. available_hashes: a list of artifact hashes (git-tree-sha1) |
| 135 | +# |
| 136 | +# This function goes through the list of hashes in available_hashes. |
| 137 | +# For each hash in available_hashes, the function gets the list of all licenses, and then |
| 138 | +# mutates the dict artifact_hash_to_licenses to set |
| 139 | +# artifact_hash_to_licenses[$hash] = $listoflicenses |
| 140 | +function generate_artifact_hash_to_licenses!( |
| 141 | + artifact_hash_to_licenses::Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}, |
| 142 | + available_hashes::Vector{Base.SHA1}, |
| 143 | +) |
| 144 | + for hash in available_hashes |
| 145 | + licenses = get_licenses_from_artifact_hash(hash::Base.SHA1) |
| 146 | + artifact_hash_to_licenses[hash] = licenses |
| 147 | + end |
| 148 | + return nothing |
| 149 | +end |
| 150 | + |
| 151 | +# Takes in two arguments: |
| 152 | +# 1. artifact_hash_to_licenses: a dict where the keys are artifact hashes (git-tree-sha1) |
| 153 | +# and the values are lists of licenses. |
| 154 | +# 2. pkgs: a list of PkgSources. |
| 155 | +# |
| 156 | +# This function goes through the list of packages in pkgs. |
| 157 | +# For each pkg in pkgs, the function gets the list of all available artifact hashes for that |
| 158 | +# package, and then for each of those artifact hashes, get the list of licenses. Then, for each |
| 159 | +# artifact hash, mutate the dict artifact_hash_to_licenses to set |
| 160 | +# artifact_hash_to_licenses[$hash] = $listoflicenses |
| 161 | +# |
| 162 | +# Keyword arguments: |
| 163 | +# 1. allow_no_artifacts::Vector{Base.PkgId}. If a package has no artifacts, then we throw an |
| 164 | +# error if the package is not in the allow_no_artifacts list, but we print a debug message |
| 165 | +# (and don't throw an error) if the package is in the allow_no_artifacts list. |
| 166 | +function generate_artifact_hash_to_licenses!( |
| 167 | + artifact_hash_to_licenses::Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}, |
| 168 | + pkgs::Vector{<:PkgSource}; |
| 169 | + kwargs..., |
| 170 | +) |
| 171 | + available_hashes = Base.SHA1[] |
| 172 | + for pkg in pkgs |
| 173 | + hashes = generate_available_artifact_hashes_from_pkg(pkg::PkgSource; kwargs...) |
| 174 | + append!(available_hashes, hashes) |
| 175 | + end |
| 176 | + generate_artifact_hash_to_licenses!(artifact_hash_to_licenses, available_hashes;) |
| 177 | + return nothing |
| 178 | +end |
| 179 | + |
| 180 | +# Take in a PkgSource. |
| 181 | +# Return all possible artifact hashes for this package. |
| 182 | +function generate_possible_artifact_hashes_from_pkg(pkg::PkgSource) |
| 183 | + this_pkgid = _construct_pkgid(pkg) |
| 184 | + local_dir, reachable, version, _ = PackageAnalyzer.obtain_code(pkg) |
| 185 | + if !reachable |
| 186 | + msg = "Package is not reachable: $(pkg)" |
| 187 | + error(msg) |
| 188 | + end |
| 189 | + possible_hashes = get_possible_artifact_hashes_from_local_dir(local_dir::String; pkg) |
| 190 | + return possible_hashes |
| 191 | +end |
| 192 | + |
| 193 | +# Take in a PkgSource. |
| 194 | +# Return all available artifact hashes for this package. Note: This is the list of available |
| 195 | +# artifact hashes, not the list of all possible artifact hashes. The difference is this: |
| 196 | +# - Possible artifact hash = the hash is in the Artifacts.toml file, but it might be for a platform |
| 197 | +# that is different from the current platform. |
| 198 | +# - Available artifact hash = the hash actually exists locally (which means that the artifact's |
| 199 | +# platform is the same as the current platform.) |
| 200 | +# |
| 201 | +# Keyword arguments: |
| 202 | +# 1. allow_no_artifacts::Vector{Base.PkgId}. Same as documented above. |
| 203 | +function generate_available_artifact_hashes_from_pkg( |
| 204 | + pkg::PkgSource; |
| 205 | + allow_no_artifacts::Vector{Base.PkgId} = Base.PkgId[], |
| 206 | +) |
| 207 | + this_pkgid = _construct_pkgid(pkg) |
| 208 | + possible_hashes = generate_possible_artifact_hashes_from_pkg(pkg) |
| 209 | + available_hashes = filter(Artifacts.artifact_exists, possible_hashes) |
| 210 | + unique!(available_hashes) |
| 211 | + if isempty(available_hashes) |
| 212 | + msg = "No artifacts were found for package $(pkg) with PkgId $(this_pkgid)" |
| 213 | + if this_pkgid in allow_no_artifacts |
| 214 | + @debug msg |
| 215 | + else |
| 216 | + error(msg) |
| 217 | + end |
| 218 | + end |
| 219 | + return available_hashes |
| 220 | +end |
| 221 | + |
| 222 | +# Takes in a list of PkgSource. |
| 223 | +# Returns a dict pkgid_to_licenses. |
| 224 | +# The key of pkgid_to_licenses are Base.PkgId. |
| 225 | +# The value of pkgid_to_licenses is the list of licenses for that package. |
| 226 | +function generate_pkgid_to_licenses(pkgs::Vector{<:PkgSource}; kwargs...) |
| 227 | + artifact_hash_to_licenses = Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}() |
| 228 | + generate_artifact_hash_to_licenses!(artifact_hash_to_licenses, pkgs; kwargs...) |
| 229 | + pkgid_to_licenses = artifact_license_map(pkgs, artifact_hash_to_licenses; kwargs...) |
| 230 | + return pkgid_to_licenses |
| 231 | +end |
| 232 | + |
| 233 | +# Takes in two arguments: |
| 234 | +# 1. pkgs: list of PkgSources. |
| 235 | +# 2. artifact_hash_to_licenses: this is the :Dict{Base.SHA1,Vector{ArtifactLicenseInfo}} |
| 236 | +# that we get after running the following: |
| 237 | +# - artifact_hash_to_licenses = Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}() |
| 238 | +# - generate_artifact_hash_to_licenses!(artifact_hash_to_licenses, pkgs; kwargs...) |
| 239 | +# |
| 240 | +# Keyword arguments: |
| 241 | +# 1. allow_no_artifacts::Vector{Base.PkgId}. Same as documented above. |
| 242 | +function artifact_license_map( |
| 243 | + pkgs::Vector{<:PkgSource}, |
| 244 | + artifact_hash_to_licenses::Dict{Base.SHA1,Vector{ArtifactLicenseInfo}}; |
| 245 | + kwargs..., |
| 246 | +) |
| 247 | + pkguuid_to_licenses = Dict{Base.PkgId,Vector{ArtifactLicenseInfo}}() |
| 248 | + for pkg in pkgs |
| 249 | + licenses_for_this_pkg = ArtifactLicenseInfo[] |
| 250 | + hashes = generate_available_artifact_hashes_from_pkg(pkg::PkgSource; kwargs...) |
| 251 | + for hash in hashes |
| 252 | + licenses_for_this_hash = artifact_hash_to_licenses[hash] |
| 253 | + append!(licenses_for_this_pkg, licenses_for_this_hash) |
| 254 | + end |
| 255 | + pkguuid_to_licenses[_construct_pkgid(pkg)] = licenses_for_this_pkg |
| 256 | + end |
| 257 | + return pkguuid_to_licenses |
| 258 | +end |
0 commit comments