Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chrome extensions support #534

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions binary/proto/proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/google/osv-scalibr/extractor/filesystem/language/python/setup"
"github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg"
"github.com/google/osv-scalibr/extractor/filesystem/os/apk"
chromeextensions "github.com/google/osv-scalibr/extractor/filesystem/os/chrome/extensions"
"github.com/google/osv-scalibr/extractor/filesystem/os/cos"
"github.com/google/osv-scalibr/extractor/filesystem/os/dpkg"
"github.com/google/osv-scalibr/extractor/filesystem/os/flatpak"
Expand Down Expand Up @@ -506,6 +507,19 @@ func setProtoMetadata(meta any, i *spb.Inventory) {
FullVersion: m.FullVersion,
},
}
case *chromeextensions.Metadata:
i.Metadata = &spb.Inventory_ChromeExtensionsMetadata{
ChromeExtensionsMetadata: &spb.ChromeExtensionsMetadata{
Name: m.Name,
Description: m.Description,
AuthorEmail: m.AuthorEmail,
HostPermissions: m.HostPermissions,
ManifestVersion: int32(m.ManifestVersion),
MinimumChromeVersion: m.MinimumChromeVersion,
Permissions: m.Permissions,
UpdateUrl: m.UpdateURL,
},
}
}
}

Expand Down
13 changes: 13 additions & 0 deletions binary/proto/scan_result.proto
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ message Inventory {
CDXPackageMetadata cdx_metadata = 30;
WindowsOSVersion windows_os_version_metadata = 33;
HomebrewPackageMetadata homebrew_metadata = 42;
ChromeExtensionsMetadata chrome_extensions_metadata = 45;
}

repeated AnnotationEnum annotations = 28;
Expand Down Expand Up @@ -470,3 +471,15 @@ message WindowsOSVersion {

// The additional data found in Homebrew packages.
message HomebrewPackageMetadata {}

// The additional data found in Chrome extensions.
message ChromeExtensionsMetadata {
string name = 1;
string description = 2;
string author_email = 3;
repeated string host_permissions = 4;
int32 manifest_version = 5;
string minimum_chrome_version = 6;
repeated string permissions = 7;
string update_url = 8;
}
1,039 changes: 592 additions & 447 deletions binary/proto/scan_result_go_proto/scan_result.pb.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/supported_inventory_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SCALIBR supports extracting software package information from a variety of OS an

* Alpine
* APK
* Chrome extensions
* COS
* cos-package-info.json
* DPKG (used by e.g. Debian, Ubuntu)
Expand Down
28 changes: 15 additions & 13 deletions extractor/filesystem/list/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
"github.com/google/osv-scalibr/extractor/filesystem/language/swift/podfilelock"
"github.com/google/osv-scalibr/extractor/filesystem/language/wordpress/plugins"
"github.com/google/osv-scalibr/extractor/filesystem/os/apk"
chromeextensions "github.com/google/osv-scalibr/extractor/filesystem/os/chrome/extensions"
"github.com/google/osv-scalibr/extractor/filesystem/os/cos"
"github.com/google/osv-scalibr/extractor/filesystem/os/dpkg"
"github.com/google/osv-scalibr/extractor/filesystem/os/flatpak"
Expand Down Expand Up @@ -176,19 +177,20 @@ var (

// OS extractors.
OS = InitMap{
dpkg.Name: {dpkg.NewDefault},
apk.Name: {apk.NewDefault},
rpm.Name: {rpm.NewDefault},
cos.Name: {cos.NewDefault},
snap.Name: {snap.NewDefault},
nix.Name: {nix.New},
module.Name: {module.NewDefault},
vmlinuz.Name: {vmlinuz.NewDefault},
pacman.Name: {pacman.NewDefault},
portage.Name: {portage.NewDefault},
flatpak.Name: {flatpak.NewDefault},
homebrew.Name: {homebrew.New},
macapps.Name: {macapps.NewDefault},
dpkg.Name: {dpkg.NewDefault},
apk.Name: {apk.NewDefault},
rpm.Name: {rpm.NewDefault},
cos.Name: {cos.NewDefault},
snap.Name: {snap.NewDefault},
nix.Name: {nix.New},
module.Name: {module.NewDefault},
vmlinuz.Name: {vmlinuz.NewDefault},
pacman.Name: {pacman.NewDefault},
portage.Name: {portage.NewDefault},
flatpak.Name: {flatpak.NewDefault},
homebrew.Name: {homebrew.New},
macapps.Name: {macapps.NewDefault},
chromeextensions.Name: {chromeextensions.New},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move into a separate Misc list

}

// Collections of extractors.
Expand Down
241 changes: 241 additions & 0 deletions extractor/filesystem/os/chrome/extensions/extensions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package extensions extracts chrome extensions.
package extensions
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move into filesystem/misc/


import (
"context"
"encoding/json"
"errors"
"fmt"
"path/filepath"
"regexp"
"runtime"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
)

// Name is the name for the Chrome extensions extractor
const Name = "chrome/extensions"

var (
windowsChromeExtensionsPattern = regexp.MustCompile(`(?m)\/Google\/Chrome(?: Beta| SxS| for Testing|)\/User Data\/Default\/Extensions\/[a-p]{32}\/[^\/]+\/manifest\.json$`)
windowsChromiumExtensionsPattern = regexp.MustCompile(`(?m)\/Chromium\/User Data\/Default\/Extensions\/[a-p]{32}\/[^\/]+\/manifest\.json$`)

macosChromeExtensionsPattern = regexp.MustCompile(`(?m)\/Google\/Chrome(?: Beta| SxS| for Testing| Canary|)\/Default\/Extensions\/[a-p]{32}\/[^\/]+\/manifest\.json$`)
macosChromiumExtensionsPattern = regexp.MustCompile(`(?m)\/Chromium\/Default\/Extensions\/[a-p]{32}\/[^\/]+\/manifest\.json$`)

linuxChromeExtensionsPattern = regexp.MustCompile(`(?m)\/google-chrome(?:-beta|-unstable|-for-testing|)\/Default\/Extensions\/[a-p]{32}\/[^\/]+\/manifest\.json$`)
linuxChromiumExtensionsPattern = regexp.MustCompile(`(?m)\/chromium\/Default\/Extensions\/[a-p]{32}\/[^\/]+\/manifest\.json$`)
)

type manifest struct {
Author struct {
Email string `json:"email"`
} `json:"author"`
DefaultLocale string `json:"default_locale"`
Description string `json:"description"`
HostPermissions []string `json:"host_permissions"`
ManifestVersion int `json:"manifest_version"`
MinimumChromeVersion string `json:"minimum_chrome_version"`
Name string `json:"name"`
Permissions []string `json:"permissions"`
UpdateURL string `json:"update_url"`
Version string `json:"version"`
}

func (m *manifest) validate() error {
if m.Name == "" {
return fmt.Errorf("field 'Name' must be specified")
}
if m.Version == "" {
return fmt.Errorf("field 'Version' must be specified")
}
return nil
}

type message struct {
Description string `json:"description"`
Message string `json:"message"`
}

// Extractor extracts chrome extensions
type Extractor struct{}

// New returns an chrome extractor.
func New() filesystem.Extractor {
return &Extractor{}
}

// Name of the extractor.
func (e Extractor) Name() string { return Name }

// Version of the extractor.
func (e Extractor) Version() int { return 0 }

// Requirements of the extractor.
func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }

// FileRequired returns true if the file is chrome manifest extension
func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is called on every single file of the filesystem and hence can be a bottleneck for performances. I would recommend performing a pre-check to return early before using regexp.

For example, just returning early if the filename is not manifest.json before doing the regexp matching. This should significantly improve performances.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 HasPrefix with manifest.json

path := api.Path()
path = filepath.ToSlash(path)

switch runtime.GOOS {
case "windows":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ScanConfig receives a Capabilities struct that contains which OS SCALIBR is running on.

One thing you could do to have OS-specific behavior is to have 3 separate extractors: One for Linux, Win, and Mac. Each would set Capabilities{OS: plugin.OSLinux}, etc. in their Requirements() function.

When a user enables extractors through the SCALIBR library, they either call list.FromCapabilities or list.FilterByCapabilities to only enable the one for the OS they're running SCALIBR on. Similarly, when running SCALIBR through the CLI, cli.go already calls FilterByCapabilities so the extractors not relevant to the OS will be disabled automatically.

You can avoid code duplication by moving the common parts of Extract, ToPURL, etc. into a helper library that they all call.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either this, or set the Requirement to RunningSystem. Else runtime.GOOS can be wrong.

return windowsChromeExtensionsPattern.MatchString(path) || windowsChromiumExtensionsPattern.MatchString(path)
case "linux":
return linuxChromeExtensionsPattern.MatchString(path) || linuxChromiumExtensionsPattern.MatchString(path)
case "darwin":
return macosChromeExtensionsPattern.MatchString(path) || macosChromiumExtensionsPattern.MatchString(path)
default:
return false
}
}

// Extract extracts chrome extensions
func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
var m manifest
if err := json.NewDecoder(input.Reader).Decode(&m); err != nil {
return nil, fmt.Errorf("could not extract manifest from %s: %w", input.Path, err)
}
if err := m.validate(); err != nil {
return nil, fmt.Errorf("bad format in manifest %s: %w", input.Path, err)
}

// extract the extensions ID from the path
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for this comment, the function name already says the same thing

id, err := extractExtensionsIDFromPath(input)
if err != nil {
return nil, fmt.Errorf("could not extract extension id from %s: %w", input.Path, err)
}

// if default locale is specified some fields of the manifest may be
// written inside the ./_locales/LOCALE_CODE/messages.json file
if m.DefaultLocale != "" {
if err := extractLocaleInfo(&m, input); err != nil {
return nil, fmt.Errorf("could not extract locale info from %s: %w", input.Path, err)
}
}

ivs := []*extractor.Inventory{
{
Name: id,
Version: m.Version,
Metadata: &Metadata{
AuthorEmail: m.Author.Email,
Description: m.Description,
HostPermissions: m.HostPermissions,
ManifestVersion: m.ManifestVersion,
MinimumChromeVersion: m.MinimumChromeVersion,
Name: m.Name,
Permissions: m.Permissions,
UpdateURL: m.UpdateURL,
},
},
}

return ivs, nil
}

// extractExtensionsIDFromPath extracts the extensions id from the path
//
// expected path is:
//
// /extensionID/version/manifest.json
func extractExtensionsIDFromPath(input *filesystem.ScanInput) (string, error) {
path, err := filepath.Abs(input.Path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

file paths should already be absolute when receiving them through the ScanInput so no need for this.

if err != nil {
return "", fmt.Errorf("could not extract full path: %w", err)
}
parts := strings.Split(filepath.ToSlash(path), "/")
if len(parts) < 3 {
return "", errors.New("cold not find id expected path format '/extensionID/version/manifest.json'")
}
id := parts[len(parts)-3]
// no more validation on the id is required since the path has been checked during FileRequired
return id, nil
}

// extractLocaleInfo extract locale information from the _locales/LOCALE_CODE/messages.json
// following manifest.json v3 specification
func extractLocaleInfo(m *manifest, input *filesystem.ScanInput) error {
messagePath := filepath.Join(filepath.Dir(input.Path), "/_locales/", m.DefaultLocale, "message.json")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for the leading and ending slashes in "/_locales/" if you use filepath.Join


f, err := input.FS.Open(messagePath)
if err != nil {
return err
}

// using a map to decode since the keys are determined by the values
// of the manifest.json fields
//
// ex:
//
// manifest.json:
// "name" : "__MSG_43ry328yr932__"
// en/message.json
// "43ry328yr932" : "Extension name"
var messages map[string]message
if err := json.NewDecoder(f).Decode(&messages); err != nil {
return err
}

lowerCase := map[string]message{}
for k, v := range messages {
lowerCase[strings.ToLower(k)] = v
}

if v, ok := cutPrefixSuffix(m.Name, "__MSG_", "__"); ok {
if msg, ok := lowerCase[strings.ToLower(v)]; ok {
m.Name = msg.Message
}
}

if v, ok := cutPrefixSuffix(m.Description, "__MSG_", "__"); ok {
if msg, ok := lowerCase[strings.ToLower(v)]; ok {
m.Description = msg.Message
}
}

return nil
}

// cutPrefixSuffix cuts the specified prefix and suffix
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"...if they exist, returns false otherwise"

func cutPrefixSuffix(s string, prefix string, suffix string) (string, bool) {
if !strings.HasPrefix(s, prefix) {
return "", false
}
if !strings.HasSuffix(s, suffix) {
return "", false
}
s = s[len(prefix) : len(s)-len(suffix)]
return s, true
}

// ToPURL converts an inventory created by this extractor into a PURL.
func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL {
return &purl.PackageURL{
Type: purl.TypeGeneric,
Name: i.Name,
Version: i.Version,
}
}

// Ecosystem is not defined.
func (Extractor) Ecosystem(i *extractor.Inventory) string { return "" }
Loading