Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2497,7 +2497,10 @@ def macho_binaries(self):
)

def executable_binaries(self):
return self.union(self.win_exes(), self.macho_binaries(), self.elfs())
return self.win_exes().order_by().union(
self.macho_binaries().order_by(),
self.elfs().order_by()
)

def with_has_children(self):
"""
Expand Down
27 changes: 20 additions & 7 deletions scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,22 +164,35 @@ def map_checksum(project, checksum_field, logger=None):
def _map_jvm_to_class_resource(
to_resource, from_resources, from_classes_index, jvm_lang: jvm.JvmLanguage
):
"""
Map the ``to_resource`` .class file Resource with a Resource in
``from_resources`` source files, using the ``from_classes_index`` index of
from/ fully qualified binary files.
"""
for extension in jvm_lang.source_extensions:
normalized_path = jvm_lang.get_normalized_path(
path=to_resource.path, extension=extension
)

match = pathmap.find_paths(path=normalized_path, index=from_classes_index)

if not match and jvm_lang.name == "scala":
package_path = str(Path(to_resource.path).parent)
potential_sources = from_resources.filter(
path__startswith=package_path.replace("to/", "from/"),
extension__in=jvm_lang.source_extensions,
)
for from_resource in potential_sources:
from_source_root_parts = from_resource.path.strip("/").split("/")
from_source_root = "/".join(from_source_root_parts[:-1])
pipes.make_relation(
from_resource=from_resource,
to_resource=to_resource,
map_type=jvm_lang.binary_map_type,
extra_data={"from_source_root": f"{from_source_root}/"},
)
continue

if not match:
return
continue

for resource_id in match.resource_ids:
from_resource = from_resources.get(id=resource_id)
# compute the root of the packages on the source side
from_source_root_parts = from_resource.path.strip("/").split("/")
from_source_root = "/".join(
from_source_root_parts[: -match.matched_path_length]
Expand Down
16 changes: 16 additions & 0 deletions scanpipe/pipes/jvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,22 @@ class ScalaLanguage(JvmLanguage):
package_regex = re.compile(r"^\s*package\s+([\w\.]+)\s*;?")
binary_map_type = "scala_to_class"

@classmethod
def get_normalized_path(cls, path, extension):
if not path.endswith(cls.binary_extensions):
raise ValueError(
f"Only path ending with {cls.binary_extensions} are supported."
)
path_obj = Path(path.strip("/"))
class_name = path_obj.name

if "$" in class_name:
class_name, _, _ = class_name.partition("$")
else:
class_name, _, _ = class_name.partition(".")

return str(path_obj.parent / f"{class_name}{extension}")


class KotlinLanguage(JvmLanguage):
name = "kotlin"
Expand Down
94 changes: 77 additions & 17 deletions scanpipe/pipes/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,64 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model
)
return []

manifests_by_type = {}
for resource in manifest_resources:
packages = resolve_manifest_resources(resource, package_registry)
if packages:
resolved_packages.extend(packages)
if headers := get_manifest_headers(resource):
sboms_headers[resource.name] = headers
else:
project.add_error(
description="No packages could be resolved",
model=model,
object_instance=resource,
)
package_type = get_default_package_type(resource.location)
if package_type:
if package_type not in manifests_by_type:
manifests_by_type[package_type] = []
manifests_by_type[package_type].append(resource)

if "pypi" in manifests_by_type:
pypi_resources = manifests_by_type["pypi"]
pypi_locations = [resource.location for resource in pypi_resources]

resolver = package_registry.get("pypi")
if resolver:
try:
packages = resolver(input_locations=pypi_locations)
if packages:
for package_data in packages:
package_data["codebase_resources"] = pypi_resources
resolved_packages.extend(packages)

for resource in pypi_resources:
if headers := get_manifest_headers(resource):
sboms_headers[resource.name] = headers
else:
for resource in pypi_resources:
project.add_error(
description="No packages could be resolved",
model=model,
object_instance=resource,
)
except Exception as e:
for resource in pypi_resources:
project.add_error(
description=f"Error resolving packages: {e}",
model=model,
object_instance=resource,
)

del manifests_by_type["pypi"]

for package_type, resources in manifests_by_type.items():
for resource in resources:
packages = resolve_manifest_resources(resource, package_registry)
if packages:
resolved_packages.extend(packages)
if headers := get_manifest_headers(resource):
sboms_headers[resource.name] = headers
else:
project.add_error(
description="No packages could be resolved",
model=model,
object_instance=resource,
)

dependencies = get_dependencies_from_manifest(resource)
if dependencies:
resolved_dependencies.extend(dependencies)
dependencies = get_dependencies_from_manifest(resource)
if dependencies:
resolved_dependencies.extend(dependencies)

if sboms_headers:
project.update_extra_data({"sboms_headers": sboms_headers})
Expand Down Expand Up @@ -222,13 +264,31 @@ def get_manifest_resources(project):
return project.codebaseresources.filter(status=flag.APPLICATION_PACKAGE)


def resolve_pypi_packages(input_location):
"""Resolve the PyPI packages from the ``input_location`` requirements file."""
def resolve_pypi_packages(input_location=None, input_locations=None):
"""
Resolve the PyPI packages from requirement file(s).

Args:
input_location: Single requirement file path (for backward compatibility)
input_locations: List of requirement file paths (for batch processing)

Returns:
List of resolved package data dictionaries

"""
# Handle both single file and multiple files
if input_locations:
requirement_files = input_locations
elif input_location:
requirement_files = [input_location]
else:
raise ValueError("Either input_location or input_locations must be provided")

python_version = f"{sys.version_info.major}{sys.version_info.minor}"
operating_system = "linux"

resolution_output = python_inspector.resolve_dependencies(
requirement_files=[input_location],
requirement_files=requirement_files,
python_version=python_version,
operating_system=operating_system,
# Prefer source distributions over binary distributions,
Expand Down
37 changes: 37 additions & 0 deletions scanpipe/tests/pipes/test_d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,43 @@ def test_scanpipe_pipes_d2d_map_jar_to_scala_source(self):
self.assertEqual(from2, relation.from_resource)
self.assertEqual(to_jar, relation.to_resource)

def test_scanpipe_pipes_d2d_map_scala_case_classes_to_source(self):
from1 = make_resource_file(
self.project1,
path="from/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/"
"ClusterShardingQuery.scala",
extra_data={"scala_package": "org.apache.pekko.cluster.sharding.typed"},
)
to1 = make_resource_file(
self.project1,
path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/"
"GetClusterShardingStats.class",
)
to2 = make_resource_file(
self.project1,
path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/"
"GetShardRegionState.class",
)
to3 = make_resource_file(
self.project1,
path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/"
"ClusterShardingQuery.class",
)

buffer = io.StringIO()
d2d.map_jvm_to_class(
self.project1, logger=buffer.write, jvm_lang=jvm.ScalaLanguage
)

expected = "Mapping 3 .class resources to 1 ('.scala',)"
self.assertIn(expected, buffer.getvalue())
self.assertEqual(3, self.project1.codebaserelations.count())

for to_resource in [to1, to2, to3]:
relation = self.project1.codebaserelations.get(to_resource=to_resource)
self.assertEqual(from1, relation.from_resource)
self.assertEqual("scala_to_class", relation.map_type)

def test_scanpipe_pipes_d2d_map_jar_to_kotlin_source(self):
from1 = make_resource_file(
self.project1,
Expand Down
50 changes: 49 additions & 1 deletion scanpipe/tests/pipes/test_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_scanpipe_pipes_resolve_resolve_pypi_packages(self, mock_resolve):

mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"])

packages = resolve.resolve_pypi_packages("")
packages = resolve.resolve_pypi_packages("requirements.txt")
self.assertEqual(2, len(packages))
package_data = packages[0]
self.assertEqual("pip", package_data["name"])
Expand Down Expand Up @@ -373,3 +373,51 @@ def test_scanpipe_resolve_get_manifest_headers(self):
]
headers = resolve.get_manifest_headers(resource)
self.assertEqual(expected, list(headers.keys()))

@mock.patch("scanpipe.pipes.resolve.python_inspector.resolve_dependencies")
def test_scanpipe_pipes_resolve_pypi_packages_multiple_files(self, mock_resolve):
"""Test that resolve_pypi_packages can handle multiple requirement files."""
# Generated with:
# $ python-inspector --python-version 3.12 --operating-system linux \
# --specifier pip==25.0.1 --json -
inspector_output_location = (
self.data / "resolve" / "python_inspector_resolve_dependencies.json"
)
with open(inspector_output_location) as f:
inspector_output = json.loads(f.read())

mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"])

req_files = ["requirements1.txt", "requirements2.txt"]
packages = resolve.resolve_pypi_packages(input_locations=req_files)

mock_resolve.assert_called_once()
call_args = mock_resolve.call_args
self.assertEqual(req_files, call_args.kwargs["requirement_files"])

self.assertEqual(2, len(packages))
self.assertEqual("pip", packages[0]["name"])

@mock.patch("scanpipe.pipes.resolve.python_inspector.resolve_dependencies")
def test_scanpipe_pipes_resolve_pypi_packages_backward_compatibility(
self, mock_resolve
):
"""
Test that resolve_pypi_packages still works with single file
(backward compatibility).
"""
inspector_output_location = (
self.data / "resolve" / "python_inspector_resolve_dependencies.json"
)
with open(inspector_output_location) as f:
inspector_output = json.loads(f.read())

mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"])

packages = resolve.resolve_pypi_packages(input_location="requirements.txt")

mock_resolve.assert_called_once()
call_args = mock_resolve.call_args
self.assertEqual(["requirements.txt"], call_args.kwargs["requirement_files"])

self.assertEqual(2, len(packages))
Loading