From 7fb15a6744c5bd8a0c9ebc7a290b8f346f423f9d Mon Sep 17 00:00:00 2001 From: Alexis-Maurer Fortin Date: Mon, 29 Jun 2026 11:20:58 -0400 Subject: [PATCH 1/3] wip making parser more lenient so we don';t lose data when parsing --- models/github_actions.go | 76 ++++++++++---- models/github_actions_test.go | 169 +++++++++++++++++++++++++----- scanner/inventory_scanner_test.go | 31 ++++++ 3 files changed, 226 insertions(+), 50 deletions(-) diff --git a/models/github_actions.go b/models/github_actions.go index 4ca1ebf..0c4e227 100644 --- a/models/github_actions.go +++ b/models/github_actions.go @@ -216,7 +216,7 @@ func (o GithubActionsMetadata) IsValid() bool { func (o *GithubActionsJobs) UnmarshalYAML(node *yaml.Node) error { if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for jobs") + return nil } *o = make(GithubActionsJobs, 0, len(node.Content)/2) @@ -233,7 +233,8 @@ func (o *GithubActionsJobs) UnmarshalYAML(node *yaml.Node) error { } err := value.Decode(&job) if err != nil { - return err + // Skip the offending job, keep the rest of the workflow. + continue } for j := 0; j < len(value.Content); j += 2 { @@ -261,7 +262,7 @@ func (o *GithubActionsJobSecrets) UnmarshalYAML(node *yaml.Node) error { } if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for secrets") + return nil } for i := 0; i < len(node.Content); i += 2 { @@ -280,13 +281,15 @@ func (o *StringList) UnmarshalYAML(node *yaml.Node) error { } if node.Kind != yaml.SequenceNode { - return fmt.Errorf("invalid yaml node type %v for string list", node.Kind) + // Lenient: unexpected shapes leave an empty list rather than failing + // the entire workflow parse. + return nil } - var l []string = make([]string, len(node.Content)) + l := make([]string, len(node.Content)) err := node.Decode(&l) if err != nil { - return err + return nil } *o = l @@ -319,12 +322,14 @@ func (o *GithubActionsEvents) UnmarshalYAML(node *yaml.Node) error { err := value.Decode(&crons) if err != nil { - return err + // Skip a malformed schedule block, keep other events. + continue } for _, c := range crons { if c.Cron == "" { - return fmt.Errorf("invalid cron object") + // Skip empty/invalid cron entries individually. + continue } event.Cron = append(event.Cron, c.Cron) @@ -332,7 +337,8 @@ func (o *GithubActionsEvents) UnmarshalYAML(node *yaml.Node) error { } else { err := value.Decode(&event) if err != nil { - return err + // Skip the offending event, keep the rest. + continue } } @@ -345,7 +351,7 @@ func (o *GithubActionsEvents) UnmarshalYAML(node *yaml.Node) error { func (o *GithubActionsOutputs) UnmarshalYAML(node *yaml.Node) error { if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for outputs") + return nil } for i := 0; i < len(node.Content); i += 2 { @@ -360,7 +366,8 @@ func (o *GithubActionsOutputs) UnmarshalYAML(node *yaml.Node) error { output = GithubActionsOutput{Name: name} err := value.Decode(&output) if err != nil { - return err + // Skip the offending output, keep the rest. + continue } *o = append(*o, output) } @@ -372,7 +379,7 @@ func (o *GithubActionsOutputs) UnmarshalYAML(node *yaml.Node) error { func (o *GithubActionsInputs) UnmarshalYAML(node *yaml.Node) error { if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for inputs") + return nil } for i := 0; i < len(node.Content); i += 2 { @@ -382,7 +389,8 @@ func (o *GithubActionsInputs) UnmarshalYAML(node *yaml.Node) error { err := value.Decode(&input) if err != nil { - return err + // Skip the offending input, keep the rest. + continue } *o = append(*o, input) @@ -400,7 +408,7 @@ func (o *GithubActionsEnvs) UnmarshalYAML(node *yaml.Node) error { } if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for env") + return nil } for i := 0; i < len(node.Content); i += 2 { @@ -412,6 +420,24 @@ func (o *GithubActionsEnvs) UnmarshalYAML(node *yaml.Node) error { return nil } +func (o *GithubActionsSteps) UnmarshalYAML(node *yaml.Node) error { + // Be lenient: a single malformed step should only drop that step, not the + // whole job (and therefore not the whole workflow file). + if node.Kind != yaml.SequenceNode { + return nil + } + + for _, item := range node.Content { + var step GithubActionsStep + if err := item.Decode(&step); err != nil { + continue + } + *o = append(*o, step) + } + + return nil +} + func (o *GithubActionsStep) UnmarshalYAML(node *yaml.Node) error { type Alias GithubActionsStep t := Alias{ @@ -467,7 +493,9 @@ func (o *GithubActionsPermissions) UnmarshalYAML(node *yaml.Node) error { case "read-all": permission = PermissionRead default: - return fmt.Errorf("invalid permission %s", node.Value) + // Unknown scalar (e.g. a typo): leave permissions empty rather + // than failing the whole workflow parse. + return nil } *o = make(GithubActionsPermissions, 0, len(AllScopes)) @@ -478,7 +506,7 @@ func (o *GithubActionsPermissions) UnmarshalYAML(node *yaml.Node) error { } if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for permissions") + return nil } *o = make(GithubActionsPermissions, 0, len(node.Content)/2) @@ -497,7 +525,7 @@ func (o *GithubActionsJobRunsOn) UnmarshalYAML(node *yaml.Node) error { var runsOn StringList err := node.Decode(&runsOn) if err != nil { - return err + return nil } *o = GithubActionsJobRunsOn(runsOn) } @@ -510,18 +538,19 @@ func (o *GithubActionsJobRunsOn) UnmarshalYAML(node *yaml.Node) error { var runsOn RunsOn err := node.Decode(&runsOn) if err != nil { - return err + return nil } for _, group := range runsOn.Group { if group == "" { - return fmt.Errorf("unexpected empty group") + // Skip empty entries individually instead of failing. + continue } *o = append(*o, fmt.Sprintf("group:%s", group)) } for _, label := range runsOn.Labels { if label == "" { - return fmt.Errorf("unexpected empty label") + continue } *o = append(*o, fmt.Sprintf("label:%s", label)) } @@ -540,7 +569,8 @@ func (o *GithubActionsJobContainer) UnmarshalYAML(node *yaml.Node) error { var c container err := node.Decode(&c) if err != nil { - return err + // Lenient: leave the container empty rather than failing the parse. + return nil } *o = GithubActionsJobContainer(c) return nil @@ -553,13 +583,13 @@ func (o *GithubActionsJobEnvironments) UnmarshalYAML(node *yaml.Node) error { } if node.Kind != yaml.MappingNode { - return fmt.Errorf("invalid yaml node type for environment") + return nil } var env GithubActionsJobEnvironment err := node.Decode(&env) if err != nil { - return err + return nil } *o = GithubActionsJobEnvironments{env} diff --git a/models/github_actions_test.go b/models/github_actions_test.go index e203621..010cf2a 100644 --- a/models/github_actions_test.go +++ b/models/github_actions_test.go @@ -12,12 +12,16 @@ func TestGithubActionsWorkflowJobs(t *testing.T) { Name string Input string Expected GithubActionsJob - Error bool + // Error is only for genuinely malformed YAML (lexer/syntax errors). + Error bool + // Dropped means the parser leniently skips the whole job (or all jobs), + // yielding zero jobs without returning an error. + Dropped bool }{ { - Name: "empty", - Input: `[]`, - Error: true, + Name: "empty", + Input: `[]`, + Dropped: true, }, { Name: "empty job", @@ -66,56 +70,86 @@ func TestGithubActionsWorkflowJobs(t *testing.T) { }, }, { + // Lenient: a sequence containing a non-string entry can't populate + // labels, so runs-on ends up empty but the job is still parsed. Name: "runs-on with empty labels", Input: `build: {runs-on: { labels: [ {} ] }}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + Lines: map[string]int{"start": 1, "runs_on": 1}, + }, }, { Name: "runs-on with empty string labels", Input: `build: {runs-on: { labels: [ "" ] }}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + Lines: map[string]int{"start": 1, "runs_on": 1}, + }, }, { Name: "runs-on with empty string group", Input: `build: {runs-on: { group: [ "" ] }}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + Lines: map[string]int{"start": 1, "runs_on": 1}, + }, }, { Name: "runs-on with empty object", Input: `build: {runs-on: [ {}]}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + Lines: map[string]int{"start": 1, "runs_on": 1}, + }, }, { - Name: "empty build", - Input: `build: []`, - Error: true, + // A job whose body is a sequence can't decode into a job struct, so + // it is skipped entirely rather than aborting the whole workflow. + Name: "empty build", + Input: `build: []`, + Dropped: true, }, { + // Unknown permission scalar leaves permissions empty; job survives. Name: "invalid permissions", Input: `build: {permissions: foobar}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { Name: "invalid permissions list", Input: `build: {permissions: [foobar]}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { Name: "invalid env", Input: `build: {env: foobar}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { + // The malformed step is dropped; the job is still parsed. Name: "invalid steps", Input: `build: {steps: [foobar]}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { Name: "invalid secrets", Input: `build: {secrets: []}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { + // `[]]` is a YAML syntax error, surfaced by the lexer regardless of + // our lenient node handling. Name: "invalid outputs", Input: `build: {outputs: []]}`, Error: true, @@ -141,12 +175,15 @@ func TestGithubActionsWorkflowJobs(t *testing.T) { }, }, { + // Container that can't decode is left empty; job survives. Name: "invalid container empty list", Input: `build: {container: []}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { - Name: "invalid container empty list", + Name: "valid permissions object", Input: `build: {permissions: {contents: read}}`, Expected: GithubActionsJob{ ID: "build", @@ -184,9 +221,12 @@ func TestGithubActionsWorkflowJobs(t *testing.T) { }, }, { + // Environment that can't decode is left empty; job survives. Name: "invalid empty environment", Input: `build: {environment: []}`, - Error: true, + Expected: GithubActionsJob{ + ID: "build", + }, }, { Name: "single dimension matrix", @@ -290,6 +330,10 @@ func TestGithubActionsWorkflowJobs(t *testing.T) { return } require.NoError(t, err) + if tt.Dropped { + require.Empty(t, jobs) + return + } require.Len(t, jobs, 1) got := jobs[0] @@ -331,8 +375,11 @@ func TestGithubActionsWorkflowEvents(t *testing.T) { }, }, { + // branches as a mapping can't populate the list; left empty. Input: `push: {branches: {}}`, - Error: true, + Expected: GithubActionsEvents{ + {Name: "push"}, + }, }, { Input: `push: {branches: [main]}`, @@ -353,12 +400,17 @@ func TestGithubActionsWorkflowEvents(t *testing.T) { }, }, { + // cron entries without a `cron` key are skipped individually. Input: `schedule: [error: "s1"]`, - Error: true, + Expected: GithubActionsEvents{ + {Name: "schedule"}, + }, }, { - Input: `schedule: "* * * *"`, - Error: true, + // A scalar schedule can't decode into cron objects; the event is + // skipped, leaving no events (but no error). + Input: `schedule: "* * * *"`, + Expected: GithubActionsEvents{}, }, { Input: `workflow_run: {workflows: ["w1"], types: [requested]}`, @@ -372,15 +424,22 @@ func TestGithubActionsWorkflowEvents(t *testing.T) { }, { Input: `workflow_call: { inputs: [], }`, - Error: true, + Expected: GithubActionsEvents{ + {Name: "workflow_call"}, + }, }, { + // The malformed input is skipped; the event survives. Input: `workflow_call: { inputs: {name: []}, }`, - Error: true, + Expected: GithubActionsEvents{ + {Name: "workflow_call"}, + }, }, { Input: `workflow_call: { outputs: [], }`, - Error: true, + Expected: GithubActionsEvents{ + {Name: "workflow_call"}, + }, }, { Input: `workflow_call: { outputs: { name: asdf }, }`, @@ -397,8 +456,11 @@ func TestGithubActionsWorkflowEvents(t *testing.T) { }, }, { + // The malformed output is skipped; the event survives. Input: `workflow_call: { outputs: { name: { name: {} } }, }`, - Error: true, + Expected: GithubActionsEvents{ + {Name: "workflow_call"}, + }, }, { Input: `workflow_call: { @@ -596,6 +658,59 @@ jobs: assert.Equal(t, "alpine:latest", workflow.Jobs[2].Container.Image) } +// TestGithubActionsWorkflowLenientResilience verifies that locally-malformed +// sub-nodes are skipped without aborting the whole-file parse. Previously any +// one of these constructs caused yaml.Unmarshal to error, which made the +// scanner silently drop the entire workflow (a false-negative / scan-evasion +// risk). The workflow must still be valid and retain all of its well-formed +// content. +func TestGithubActionsWorkflowLenientResilience(t *testing.T) { + subject := ` +name: CI +on: + push: + branches: {} # wrong shape, ignored + schedule: + - cron: "" # empty cron, skipped individually + - cron: "0 0 * * 0" # kept +jobs: + good: + runs-on: ubuntu-latest + permissions: not-a-real-value # invalid scalar -> empty perms, job kept + steps: + - uses: actions/checkout@v4 + - foobar # malformed step -> dropped + - run: make build + broken: [] # whole job can't decode -> dropped +` + var wf GithubActionsWorkflow + err := yaml.Unmarshal([]byte(subject), &wf) + + // The file as a whole must still parse and be considered valid for scanning. + require.NoError(t, err) + assert.True(t, wf.IsValid(), "workflow should remain valid despite malformed sub-nodes") + + // Events: push survives (with empty branches) and schedule keeps only the + // well-formed cron entry. + require.Len(t, wf.Events, 2) + assert.Equal(t, "push", wf.Events[0].Name) + assert.Empty(t, wf.Events[0].Branches) + assert.Equal(t, "schedule", wf.Events[1].Name) + assert.Equal(t, []string{"0 0 * * 0"}, []string(wf.Events[1].Cron)) + + // The undecodable `broken` job is dropped; the good job is retained. + require.Len(t, wf.Jobs, 1) + good := wf.Jobs[0] + assert.Equal(t, "good", good.ID) + assert.Equal(t, GithubActionsJobRunsOn{"ubuntu-latest"}, good.RunsOn) + assert.Empty(t, good.Permissions, "invalid permission scalar should yield empty permissions, not drop the job") + + // The malformed step is dropped but the surrounding steps survive. + require.Len(t, good.Steps, 2) + assert.Equal(t, "actions/checkout@v4", good.Steps[0].Uses) + assert.Equal(t, "make build", good.Steps[1].Run) +} + func TestGithubActionMetadata(t *testing.T) { var actionMetadata GithubActionsMetadata subject := `name: "My GitHub Action" diff --git a/scanner/inventory_scanner_test.go b/scanner/inventory_scanner_test.go index af3566c..f63c3e1 100644 --- a/scanner/inventory_scanner_test.go +++ b/scanner/inventory_scanner_test.go @@ -49,6 +49,37 @@ func TestGithubWorkflowsNotFound(t *testing.T) { assert.Equal(t, 0, len(workflows)) } +// TestGithubWorkflowMalformedNotDropped verifies that a workflow containing a +// locally-malformed sub-node is still retained by the parser instead of being +// silently dropped from the scan. +func TestGithubWorkflowMalformedNotDropped(t *testing.T) { + data := []byte(` +name: CI +on: + push: + branches: {} +permissions: not-a-real-value +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: make build +`) + p := NewGithubActionWorkflowParser() + pkgInsights := &models.PackageInsights{} + + err := p.ParseFromMemory(data, ".github/workflows/ci.yml", pkgInsights) + require.NoError(t, err) + + require.Len(t, pkgInsights.GithubActionsWorkflows, 1) + wf := pkgInsights.GithubActionsWorkflows[0] + assert.Equal(t, ".github/workflows/ci.yml", wf.Path) + require.Len(t, wf.Jobs, 1) + assert.Equal(t, "build", wf.Jobs[0].ID) + require.Len(t, wf.Jobs[0].Steps, 2) +} + func TestGithubActionsMetadata(t *testing.T) { s := NewInventoryScanner("testdata") pkgInsights := &models.PackageInsights{} From 0be6a8173f2c5edbea0d9ffb4045a29f7ba4f201 Mon Sep 17 00:00:00 2001 From: Alexis-Maurer Fortin Date: Mon, 29 Jun 2026 15:09:49 -0400 Subject: [PATCH 2/3] flattening parallel steps so we can run our existing rego rules without change --- models/github_actions.go | 21 +++++++++++++++ models/github_actions_test.go | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/models/github_actions.go b/models/github_actions.go index 0c4e227..6adf826 100644 --- a/models/github_actions.go +++ b/models/github_actions.go @@ -428,6 +428,14 @@ func (o *GithubActionsSteps) UnmarshalYAML(node *yaml.Node) error { } for _, item := range node.Content { + // flatten parallel steps + if p := mappingValue(item, "parallel"); p != nil { + var nested GithubActionsSteps + _ = p.Decode(&nested) // recurses; lenient, handles nested parallel + *o = append(*o, nested...) + continue + } + var step GithubActionsStep if err := item.Decode(&step); err != nil { continue @@ -438,6 +446,19 @@ func (o *GithubActionsSteps) UnmarshalYAML(node *yaml.Node) error { return nil } +// mappingValue returns the value node for key in a mapping node, or nil. +func mappingValue(node *yaml.Node, key string) *yaml.Node { + if node.Kind != yaml.MappingNode { + return nil + } + for i := 0; i+1 < len(node.Content); i += 2 { + if node.Content[i].Value == key { + return node.Content[i+1] + } + } + return nil +} + func (o *GithubActionsStep) UnmarshalYAML(node *yaml.Node) error { type Alias GithubActionsStep t := Alias{ diff --git a/models/github_actions_test.go b/models/github_actions_test.go index 010cf2a..1405841 100644 --- a/models/github_actions_test.go +++ b/models/github_actions_test.go @@ -711,6 +711,54 @@ jobs: assert.Equal(t, "make build", good.Steps[1].Run) } +// TestGithubActionsParallelStepsFlattened verifies that `parallel:` step blocks +// are flattened inline so their nested run/uses sinks remain visible to rules. +func TestGithubActionsParallelStepsFlattened(t *testing.T) { + input := `build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - parallel: + - name: Build frontend + run: npm run build:frontend + - name: Build backend + run: npm run build:backend + - name: Run tests + run: npm test +` + var jobs GithubActionsJobs + require.NoError(t, yaml.Unmarshal([]byte(input), &jobs)) + require.Len(t, jobs, 1) + + steps := jobs[0].Steps + require.Len(t, steps, 4) + assert.Equal(t, "actions/checkout@v6", steps[0].Uses) + assert.Equal(t, "npm run build:frontend", steps[1].Run) + assert.Equal(t, "npm run build:backend", steps[2].Run) + assert.Equal(t, "npm test", steps[3].Run) + // Each flattened child keeps its own line number. + assert.Equal(t, 6, steps[1].Line) + assert.Equal(t, 8, steps[2].Line) + + t.Run("nested parallel", func(t *testing.T) { + nested := `build: + steps: + - parallel: + - parallel: + - run: a + - run: b + - run: c +` + var jobs GithubActionsJobs + require.NoError(t, yaml.Unmarshal([]byte(nested), &jobs)) + runs := make([]string, 0, len(jobs[0].Steps)) + for _, s := range jobs[0].Steps { + runs = append(runs, s.Run) + } + assert.Equal(t, []string{"a", "b", "c"}, runs) + }) +} + func TestGithubActionMetadata(t *testing.T) { var actionMetadata GithubActionsMetadata subject := `name: "My GitHub Action" From 7c22ea1c167fca2332b2a7adee4a0886b600ca7a Mon Sep 17 00:00:00 2001 From: Alexis-Maurer Fortin Date: Tue, 30 Jun 2026 15:28:39 -0400 Subject: [PATCH 3/3] simple parallel bool --- models/github_actions.go | 9 ++++++++- models/github_actions_test.go | 31 +++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/models/github_actions.go b/models/github_actions.go index 6adf826..17bee75 100644 --- a/models/github_actions.go +++ b/models/github_actions.go @@ -114,6 +114,10 @@ type GithubActionsStep struct { Line int `json:"line" yaml:"-"` Action string `json:"action,omitempty" yaml:"-"` + // Set when the step comes from a flattened `parallel:` block (steps that + // run concurrently). No order index: parallel steps have no defined order. + Parallel bool `json:"parallel,omitempty" yaml:"-"` + Lines map[string]int `json:"lines" yaml:"-"` } @@ -431,7 +435,10 @@ func (o *GithubActionsSteps) UnmarshalYAML(node *yaml.Node) error { // flatten parallel steps if p := mappingValue(item, "parallel"); p != nil { var nested GithubActionsSteps - _ = p.Decode(&nested) // recurses; lenient, handles nested parallel + _ = p.Decode(&nested) // recurses; handles nested parallel + for k := range nested { + nested[k].Parallel = true + } *o = append(*o, nested...) continue } diff --git a/models/github_actions_test.go b/models/github_actions_test.go index 1405841..0f6499a 100644 --- a/models/github_actions_test.go +++ b/models/github_actions_test.go @@ -1,10 +1,12 @@ package models import ( + "encoding/json" + "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" - "testing" ) func TestGithubActionsWorkflowJobs(t *testing.T) { @@ -740,7 +742,23 @@ func TestGithubActionsParallelStepsFlattened(t *testing.T) { assert.Equal(t, 6, steps[1].Line) assert.Equal(t, 8, steps[2].Line) - t.Run("nested parallel", func(t *testing.T) { + // Steps outside the parallel block are not tagged. + assert.False(t, steps[0].Parallel) + assert.False(t, steps[3].Parallel) + // Parallel children carry the flag. + assert.True(t, steps[1].Parallel) + assert.True(t, steps[2].Parallel) + + // The flag is present in the JSON handed to rego. + blob, err := json.Marshal(steps[1]) + require.NoError(t, err) + assert.Contains(t, string(blob), `"parallel":true`) + // Non-parallel steps omit it entirely. + blob, err = json.Marshal(steps[0]) + require.NoError(t, err) + assert.NotContains(t, string(blob), "parallel") + + t.Run("nested parallel all tagged", func(t *testing.T) { nested := `build: steps: - parallel: @@ -751,11 +769,12 @@ func TestGithubActionsParallelStepsFlattened(t *testing.T) { ` var jobs GithubActionsJobs require.NoError(t, yaml.Unmarshal([]byte(nested), &jobs)) - runs := make([]string, 0, len(jobs[0].Steps)) - for _, s := range jobs[0].Steps { - runs = append(runs, s.Run) + steps := jobs[0].Steps + require.Len(t, steps, 3) + assert.Equal(t, []string{"a", "b", "c"}, []string{steps[0].Run, steps[1].Run, steps[2].Run}) + for _, s := range steps { + assert.True(t, s.Parallel) } - assert.Equal(t, []string{"a", "b", "c"}, runs) }) }