gitpod/components/scrubber/sanitisation.go
Pudong 6986a4e459
[scrubber] Add more allowed keywords in context url (#19783)
* [scrubber] Add more allowed keywords in context url

* add test cases and comment

---------

Co-authored-by: Huiwen <mhqnwt@gmail.com>
2024-05-28 15:18:09 +08:00

144 lines
3.1 KiB
Go

// Copyright (c) 2023 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License.AGPL.txt in the project root for license information.
package scrubber
import (
"crypto/md5"
"fmt"
"net/url"
"strconv"
"strings"
)
// sanitiserOption provides additional options to a sanitiser
type SanitiserOption func(*sanitiserOptions)
// SanitiseWithKeyName adds the keyname as metadata to the sanitised value
func SanitiseWithKeyName(keyName string) SanitiserOption {
return func(so *sanitiserOptions) {
so.keyName = keyName
}
}
type sanitiserOptions struct {
keyName string
}
// Sanitiser turns a potentially sensitive value into a non-sensitive value
type Sanitisatiser func(value string, opts ...SanitiserOption) string
// SanitiseRedact sanitises a single value by replacing it with a fixed string
func SanitiseRedact(value string, opts ...SanitiserOption) string {
options := mergeSanitiserOpts(opts)
if options.keyName != "" {
return "[redacted:" + options.keyName + "]"
}
return "[redacted]"
}
// SanitiseHash sanitises a single value by hashing it using MD5
func SanitiseHash(value string, opts ...SanitiserOption) string {
options := mergeSanitiserOpts(opts)
hash := md5.New()
_, _ = hash.Write([]byte(value))
res := fmt.Sprintf("[redacted:md5:%x", hash.Sum(nil))
if options.keyName != "" {
res += ":" + options.keyName
}
res += "]"
return res
}
// SanitiseHashURLPathSegments hashes the URL paths separately using SanitiseHash
func SanitiseHashURLPathSegments(value string, opts ...SanitiserOption) string {
options := mergeSanitiserOpts(opts)
u, err := url.Parse(value)
if err != nil {
// cannot parse as URL, treat as string
return SanitiseHash(value, opts...)
}
path := u.Path
query := u.RawQuery
u.Path = ""
u.RawQuery = ""
u.Fragment = ""
pathSegmentAllowList := []string{
"-",
"blob",
"blobs",
"commit",
"commits",
"issue",
"issues",
"merge_request",
"merge_requests",
"pull-request",
"pull-requests",
"pull",
"release",
"releases",
"src",
"tag",
"tags",
"tree",
// Bitbucket ENT-126
"users",
"projects",
"scm",
"repos",
"browse",
"branches",
}
var pathSegements []string
SEGMENTS:
for _, p := range strings.Split(path, "/") {
if len(p) <= 0 {
continue SEGMENTS
}
if _, err := strconv.Atoi(p); err == nil {
// it's a number, don't hash it
pathSegements = append(pathSegements, p)
continue SEGMENTS
}
p = strings.TrimPrefix(p, "~")
p = strings.TrimSuffix(p, ".git")
for _, a := range pathSegmentAllowList {
if p == a {
pathSegements = append(pathSegements, p)
continue SEGMENTS
}
}
pathSegements = append(pathSegements, SanitiseHash(p))
}
res := fmt.Sprintf("%s/%s", SanitiseHash(u.String()), strings.Join(pathSegements, "/"))
if len(query) > 0 {
res += fmt.Sprintf("?%s", SanitiseHash(query))
}
if options.keyName != "" {
res += " [" + options.keyName + "]"
}
return res
}
func mergeSanitiserOpts(opts []SanitiserOption) sanitiserOptions {
var res sanitiserOptions
for _, opt := range opts {
opt(&res)
}
return res
}