Skip to content
11 changes: 11 additions & 0 deletions .pyroscope.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
source_code:
mappings:
- path:
- prefix: $GOROOT/src
language: go
source:
github:
owner: golang
repo: go
ref: go1.24.8
path: src
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
source_code:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This config file is great. Wondering if there's also an opportunity to auto-detect Java files w/o a config file too and support automatic 3rd party path resolution like we do with go? I imagine at Uber scale it would be a large lift to make a config file that captures everything they'd like.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the only opportunity we have is setting it on the client site (as a parameter to GetFile).

Then we could try to match a path in the repo with the package path. Still fairly tricky to detect the source code prefix (like src/main or <package>/src/main

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noting here that we discussed offline a tool for programmatically generating this config that I'm working on. Has the added benefit of moving the expensive logic of translating function names to git repos outside of the GetFile call path.

mappings:
- function_name:
- prefix: org/example/rideshare
language: java
source:
local:
path: src/main/java
- function_name:
- prefix: java
language: java
source:
github:
owner: openjdk
repo: jdk
ref: jdk-17+0
path: src/java.base/share/classes
- function_name:
- prefix: org/springframework/http
- prefix: org/springframework/web
language: java
source:
github:
owner: spring-projects
repo: spring-framework
ref: v5.3.20
path: spring-web/src/main/java
- function_name:
- prefix: org/springframework/web/servlet
language: java
source:
github:
owner: spring-projects
repo: spring-framework
ref: v5.3.20
path: spring-webmvc/src/main/java
32 changes: 28 additions & 4 deletions pkg/frontend/vcs/client/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,27 @@ func (gh *githubClient) GetCommit(ctx context.Context, owner, repo, ref string)
var githubErr *github.ErrorResponse
if errors.As(err, &githubErr) {
code := connectgrpc.HTTPToCode(int32(githubErr.Response.StatusCode))
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
sp.SetTag("http.status_code", githubErr.Response.StatusCode)
return nil, connect.NewError(code, err)
}
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
return nil, err
}
// error if message is nil
if commit.Commit == nil || commit.Commit.Message == nil {
return nil, connect.NewError(connect.CodeInternal, errors.New("commit contains no message"))
err := connect.NewError(connect.CodeInternal, errors.New("commit contains no message"))
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
return nil, err
}
if commit.Commit == nil || commit.Commit.Author == nil || commit.Commit.Author.Date == nil {
return nil, connect.NewError(connect.CodeInternal, errors.New("commit contains no date"))
err := connect.NewError(connect.CodeInternal, errors.New("commit contains no date"))
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
return nil, err
}

commitInfo := &vcsv1.CommitInfo{
Expand Down Expand Up @@ -89,22 +100,35 @@ func (gh *githubClient) GetFile(ctx context.Context, req FileRequest) (File, err
if err != nil {
var githubErr *github.ErrorResponse
if errors.As(err, &githubErr) && githubErr.Response.StatusCode == http.StatusNotFound {
return File{}, fmt.Errorf("%w: %s", ErrNotFound, err)
err := fmt.Errorf("%w: %s", ErrNotFound, err)
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
sp.SetTag("http.status_code", http.StatusNotFound)
return File{}, err
}
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
return File{}, err
}

if file == nil {
sp.SetTag("error", true)
sp.SetTag("error.message", ErrNotFound.Error())
return File{}, ErrNotFound
}

// We only support files retrieval.
if file.Type != nil && *file.Type != "file" {
return File{}, connect.NewError(connect.CodeInvalidArgument, errors.New("path is not a file"))
err := connect.NewError(connect.CodeInvalidArgument, errors.New("path is not a file"))
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
return File{}, err
}

content, err := file.GetContent()
if err != nil {
sp.SetTag("error", true)
sp.SetTag("error.message", err.Error())
return File{}, err
}

Expand Down
190 changes: 190 additions & 0 deletions pkg/frontend/vcs/config/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
package config

import (
"errors"
"fmt"
"slices"
"strings"

"gopkg.in/yaml.v3"
)

type Language string

const (
PyroscopeConfigPath = ".pyroscope.yaml"

LanguageUnknown = Language("")
LanguageGo = Language("go")
LanguageJava = Language("java")
)

var validLanguages = []Language{
LanguageGo,
LanguageJava,
}

// PyroscopeConfig represents the structure of .pyroscope.yaml configuration file
type PyroscopeConfig struct {
SourceCode SourceCodeConfig `yaml:"source_code"`
}
Comment on lines +28 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we also want a version tag here so we can continue to support older versions easily.


// SourceCodeConfig contains source code mapping configuration
type SourceCodeConfig struct {
Mappings []MappingConfig `yaml:"mappings"`
}

// MappingConfig represents a single source code path mapping
type MappingConfig struct {
Path []Match `yaml:"path"`
FunctionName []Match `yaml:"function_name"`
Language string `yaml:"language"`

Source Source `yaml:"source"`
}

// Match represents how mappings a single source code path mapping
type Match struct {
Prefix string `yaml:"prefix"`
}

// Source represents how mappings retrieve the source
type Source struct {
Local *LocalMappingConfig `yaml:"local,omitempty"`
GitHub *GitHubMappingConfig `yaml:"github,omitempty"`
}

// LocalMappingConfig contains configuration for local path mappings
type LocalMappingConfig struct {
Path string `yaml:"path"`
}

// GitHubMappingConfig contains configuration for GitHub repository mappings
type GitHubMappingConfig struct {
Owner string `yaml:"owner"`
Repo string `yaml:"repo"`
Ref string `yaml:"ref"`
Path string `yaml:"path"`
}

// ParsePyroscopeConfig parses a configuration from bytes
func ParsePyroscopeConfig(data []byte) (*PyroscopeConfig, error) {
var config PyroscopeConfig
if err := yaml.Unmarshal(data, &config); err != nil {
return nil, fmt.Errorf("failed to parse pyroscope config: %w", err)
}

// Validate the configuration
if err := config.Validate(); err != nil {
return nil, fmt.Errorf("invalid pyroscope config: %w", err)
}

return &config, nil
}

// Validate checks if the configuration is valid
func (c *PyroscopeConfig) Validate() error {
var errs []error
for i, mapping := range c.SourceCode.Mappings {
if err := mapping.Validate(); err != nil {
errs = append(errs, fmt.Errorf("mapping[%d]: %w", i, err))
}
}
return errors.Join(errs...)
}

// Validate checks if a mapping configuration is valid
func (m *MappingConfig) Validate() error {
var errs []error

if len(m.Path) == 0 && len(m.FunctionName) == 0 {
errs = append(errs, fmt.Errorf("at least one path or a function_name match is required"))
}

if !slices.Contains(validLanguages, Language(m.Language)) {
errs = append(errs, fmt.Errorf("language '%s' unsupported, valid languages are %v", m.Language, validLanguages))
}

if err := m.Source.Validate(); err != nil {
errs = append(errs, err)
}

return errors.Join(errs...)
}

// Validate checks if a source configuration is valid
func (m *Source) Validate() error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be helpful error message to surface if the supplied source is not supported (i.e. not github or local)

var (
instances int
errs []error
)

if m.GitHub != nil {
instances++
if err := m.GitHub.Validate(); err != nil {
errs = append(errs, err)
}
}
if m.Local != nil {
instances++
if err := m.Local.Validate(); err != nil {
errs = append(errs, err)
}
}

if instances == 0 {
errs = append(errs, errors.New("no source type supplied, you need to supply exactly one source type"))
} else if instances != 1 {
errs = append(errs, errors.New("more than one source type supplied, you need to supply exactly one source type"))
}

return errors.Join(errs...)
}

func (m *GitHubMappingConfig) Validate() error {
return nil
}

func (m *LocalMappingConfig) Validate() error {
return nil
}

type FileSpec struct {
Path string
FunctionName string
}

// FindMapping finds a mapping configuration that matches the given FileSpec
// Returns nil if no matching mapping is found
func (c *PyroscopeConfig) FindMapping(file FileSpec) *MappingConfig {
// Find the longest matching prefix
var bestMatch *MappingConfig
var bestMatchLen = -1
for _, m := range c.SourceCode.Mappings {
if result := m.Match(file); result > bestMatchLen {
bestMatch = &m
bestMatchLen = result
}
}
return bestMatch
}
Comment on lines +159 to +170
Copy link
Contributor

@bryanhuhta bryanhuhta Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'll be interesting to see how expensive this linear search is time-wise for very large projects. I suspect there could easily be many thousands of these mappings.

It's out of scope of this PR, but later we may need to improve this search heuristic.


// Returns -1 if no match, otherwise the number of characters that matched
func (m *MappingConfig) Match(file FileSpec) int {
result := -1
for _, fun := range m.FunctionName {
if strings.HasPrefix(file.FunctionName, fun.Prefix) {
if len(fun.Prefix) > result {
result = len(fun.Prefix)
}
}
}
for _, path := range m.Path {
if strings.HasPrefix(file.Path, path.Prefix) {
if len(path.Prefix) > result {
result = len(path.Prefix)
}
}
}
return result
}
Loading