mirror of
https://github.com/gitpod-io/gitpod.git
synced 2025-12-08 17:36:30 +00:00
590 lines
16 KiB
Go
590 lines
16 KiB
Go
// Copyright (c) 2023 Gitpod GmbH. All rights reserved.
|
|
// Licensed under the GNU Affero General Public License (AGPL).
|
|
// See License.AGPL.txt in the project root for license information.
|
|
|
|
package scrubber
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"reflect"
|
|
"regexp"
|
|
"strings"
|
|
"unsafe"
|
|
|
|
lru "github.com/hashicorp/golang-lru"
|
|
"github.com/mitchellh/reflectwalk"
|
|
)
|
|
|
|
/*
|
|
TrustedValue defines a value that should be treated as trusted and not subjected to scrubbing.
|
|
|
|
When a TrustedValue is encountered during the scrubbing process, it is skipped over.
|
|
This allows specific values to be exempted from the scrubbing process when necessary.
|
|
|
|
Example:
|
|
|
|
type Example struct {
|
|
Username string
|
|
Email string
|
|
Password string
|
|
}
|
|
|
|
type TrustedExample struct {
|
|
Example
|
|
}
|
|
|
|
func (TrustedExample) IsTrustedValue() {}
|
|
|
|
func scrubExample(e *Example) *TrustedExample {
|
|
return &TrustedExample{
|
|
Example: Example{
|
|
Username: e.Username,
|
|
Email: "trusted:" + Default.Value(e.Email),
|
|
Password: "trusted:" + Default.KeyValue("password", e.Password),
|
|
},
|
|
}
|
|
}
|
|
*/
|
|
type TrustedValue interface {
|
|
IsTrustedValue()
|
|
}
|
|
|
|
// Scrubber defines the interface for a scrubber, which can sanitise various types of data.
|
|
// The scrubbing process involves removing or replacing sensitive data to prevent it from being exposed.
|
|
//
|
|
// The scrubbing process respects instances of TrustedValue. When a TrustedValue is encountered,
|
|
// the scrubber does not attempt to scrub it and instead skips over it. This can be used to mark
|
|
// specific values that should not be scrubbed.
|
|
type Scrubber interface {
|
|
// Value scrubs a single value, by trying to detect the kind of data it may contain.
|
|
// This is an entirely heuristic effort with the lowest likelihood of success. Prefer
|
|
// the other methods over this one. No assumptions about the structure of the data are made,
|
|
// e.g. that the value is a JSON string.
|
|
Value(value string) string
|
|
|
|
// KeyValue scrubs a key-value pair. The key is never changed, assuming that it's a hardcoded,
|
|
// well choosen identifier. The value however is sanitisied much like Value() would, except with the
|
|
// additional hint of the key name itself.
|
|
KeyValue(key, value string) (sanitisedValue string)
|
|
|
|
// JSON scrubs a JSON structure using a combination of KeyValue() and Value(). If the msg
|
|
// is not valid JSON, an error is returned.
|
|
JSON(msg json.RawMessage) (json.RawMessage, error)
|
|
|
|
// Struct scrubes a struct. val must be a pointer, otherwise an error is returned.
|
|
// It mutates the struct in-place.
|
|
// By default only string and json.RawMessage fields are scrubbed.
|
|
// The `scrub` struct tag can be used to influnce the scrubber. The struct tag takes the following values:
|
|
// - `ignore` which causes the scrubber to ignore the field
|
|
// - `hash` which makes the scrubber hash the field value
|
|
// - `redact` which makes the scrubber redact the field value
|
|
//
|
|
// Example:
|
|
// type Example struct {
|
|
// Username string `scrub:"ignore"`
|
|
// Password string
|
|
// Inconspicuous string `scrub:"redact"`
|
|
// }
|
|
//
|
|
Struct(val any) error
|
|
|
|
// DeepCopyStruct scrubes a struct with a deep copy.
|
|
// The difference between `DeepCopyStruct` and `Struct`` is that DeepCopyStruct does not modify the structure directly,
|
|
// but creates a deep copy instead.
|
|
// Also, val can be a pointer or a structure.
|
|
DeepCopyStruct(val any) any
|
|
}
|
|
|
|
type ScrubberImplConfig struct {
|
|
HashedFieldNames []string
|
|
HashedURLPathsFieldNames []string
|
|
RedactedFieldNames []string
|
|
HashedValues map[string]*regexp.Regexp
|
|
RedactedValues map[string]*regexp.Regexp
|
|
}
|
|
|
|
// CreateCustomScrubber creates a new scrubber with the given configuration
|
|
// !!! Only use this if you know what you're doing. For all logging purposes, use the "Default" impl !!!
|
|
func CreateCustomScrubber(cfg *ScrubberImplConfig) Scrubber {
|
|
return createScrubberImpl(cfg)
|
|
}
|
|
|
|
// Default is the default scrubber consumers of this package should use
|
|
var Default Scrubber = newScrubberImpl()
|
|
|
|
func newScrubberImpl() *scrubberImpl {
|
|
defaultCfg := ScrubberImplConfig{
|
|
HashedFieldNames: HashedFieldNames,
|
|
HashedURLPathsFieldNames: HashedURLPathsFieldNames,
|
|
RedactedFieldNames: RedactedFieldNames,
|
|
HashedValues: HashedValues,
|
|
RedactedValues: RedactedValues,
|
|
}
|
|
return createScrubberImpl(&defaultCfg)
|
|
}
|
|
|
|
func createScrubberImpl(cfg *ScrubberImplConfig) *scrubberImpl {
|
|
var (
|
|
lowerSanitiseHash []string
|
|
lowerSanitiseHashURLPaths []string
|
|
lowerSanitiseRedact []string
|
|
)
|
|
for _, v := range cfg.HashedFieldNames {
|
|
lowerSanitiseHash = append(lowerSanitiseHash, strings.ToLower(v))
|
|
}
|
|
for _, v := range cfg.HashedURLPathsFieldNames {
|
|
lowerSanitiseHashURLPaths = append(lowerSanitiseHashURLPaths, strings.ToLower(v))
|
|
}
|
|
for _, v := range cfg.RedactedFieldNames {
|
|
lowerSanitiseRedact = append(lowerSanitiseRedact, strings.ToLower(v))
|
|
}
|
|
|
|
cache, err := lru.New(1000)
|
|
if err != nil {
|
|
panic(fmt.Errorf("cannot create cache: %w", err))
|
|
}
|
|
|
|
res := &scrubberImpl{
|
|
LowerSanitiseHash: lowerSanitiseHash,
|
|
LowerSanitiseHashURLPaths: lowerSanitiseHashURLPaths,
|
|
LowerSanitiseRedact: lowerSanitiseRedact,
|
|
HashedValues: cfg.HashedValues,
|
|
RedactedValues: cfg.RedactedValues,
|
|
KeySanitiserCache: cache,
|
|
}
|
|
res.Walker = &structScrubber{Parent: res}
|
|
|
|
return res
|
|
}
|
|
|
|
type scrubberImpl struct {
|
|
Walker *structScrubber
|
|
LowerSanitiseHash []string
|
|
LowerSanitiseHashURLPaths []string
|
|
LowerSanitiseRedact []string
|
|
HashedValues map[string]*regexp.Regexp
|
|
RedactedValues map[string]*regexp.Regexp
|
|
KeySanitiserCache *lru.Cache
|
|
}
|
|
|
|
// JSON implements Scrubber
|
|
func (s *scrubberImpl) JSON(msg json.RawMessage) (json.RawMessage, error) {
|
|
var content any
|
|
err := json.Unmarshal(msg, &content)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot scrub JSON: %w", err)
|
|
}
|
|
err = s.scrubJsonValue(&content)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot scrub JSON: %w", err)
|
|
}
|
|
res, err := json.Marshal(content)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot scrub JSON: %w", err)
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
// KeyValue implements Scrubber
|
|
func (s *scrubberImpl) KeyValue(key string, value string) (sanitisedValue string) {
|
|
sanitisatiser := s.getSanitisatiser(key)
|
|
if sanitisatiser == nil {
|
|
return value
|
|
}
|
|
return sanitisatiser(value)
|
|
}
|
|
|
|
type keySanitiser struct {
|
|
s Sanitisatiser
|
|
}
|
|
|
|
var (
|
|
sanitiseIgnore keySanitiser = keySanitiser{s: nil}
|
|
sanitiseHash keySanitiser = keySanitiser{s: SanitiseHash}
|
|
sanitiseHashURLPathSegments keySanitiser = keySanitiser{s: SanitiseHashURLPathSegments}
|
|
sanitiseRedact keySanitiser = keySanitiser{s: SanitiseRedact}
|
|
)
|
|
|
|
// getSanitisatiser implements
|
|
func (s *scrubberImpl) getSanitisatiser(key string) Sanitisatiser {
|
|
lower := strings.ToLower(key)
|
|
san, ok := s.KeySanitiserCache.Get(lower)
|
|
if ok {
|
|
w := san.(keySanitiser)
|
|
return w.s
|
|
}
|
|
|
|
for _, f := range s.LowerSanitiseRedact {
|
|
if strings.Contains(lower, f) {
|
|
s.KeySanitiserCache.Add(lower, sanitiseRedact)
|
|
return SanitiseRedact
|
|
}
|
|
}
|
|
// Give sanitiseHashURLPathSegments precedence over sanitiseHash
|
|
for _, f := range s.LowerSanitiseHashURLPaths {
|
|
if strings.Contains(lower, f) {
|
|
s.KeySanitiserCache.Add(lower, sanitiseHashURLPathSegments)
|
|
return SanitiseHashURLPathSegments
|
|
}
|
|
}
|
|
for _, f := range s.LowerSanitiseHash {
|
|
if strings.Contains(lower, f) {
|
|
s.KeySanitiserCache.Add(lower, sanitiseHash)
|
|
return SanitiseHash
|
|
}
|
|
}
|
|
|
|
s.KeySanitiserCache.Add(lower, sanitiseIgnore)
|
|
return nil
|
|
}
|
|
|
|
func (s *scrubberImpl) scrubJsonValue(val *any) error {
|
|
if val == nil {
|
|
return nil
|
|
}
|
|
if v, ok := (*val).(string); ok {
|
|
*val = s.Value(v)
|
|
return nil
|
|
}
|
|
return s.Struct(*val)
|
|
}
|
|
|
|
// Struct implements Scrubber
|
|
func (s *scrubberImpl) Struct(val any) error {
|
|
if val == nil {
|
|
return nil
|
|
}
|
|
switch v := val.(type) {
|
|
case map[string]interface{}:
|
|
err := s.scrubJsonObject(v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case []interface{}:
|
|
err := s.scrubJsonSlice(v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
default:
|
|
return reflectwalk.Walk(val, s.Walker)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *scrubberImpl) deepCopyStruct(fieldName string, src reflect.Value, scrubTag string, skipScrub bool) reflect.Value {
|
|
if src.Kind() == reflect.Ptr && src.IsNil() {
|
|
return reflect.New(src.Type()).Elem()
|
|
}
|
|
|
|
if src.CanInterface() {
|
|
value := src.Interface()
|
|
if _, ok := value.(TrustedValue); ok {
|
|
skipScrub = true
|
|
}
|
|
}
|
|
|
|
if src.Kind() == reflect.String && !skipScrub {
|
|
dst := reflect.New(src.Type())
|
|
var (
|
|
setExplicitValue bool
|
|
explicitValue string
|
|
)
|
|
switch scrubTag {
|
|
case "ignore":
|
|
dst.Elem().SetString(src.String())
|
|
if !dst.CanInterface() {
|
|
return dst
|
|
}
|
|
return dst.Elem()
|
|
case "hash":
|
|
setExplicitValue = true
|
|
explicitValue = SanitiseHash(src.String())
|
|
case "redact":
|
|
setExplicitValue = true
|
|
explicitValue = SanitiseRedact(src.String())
|
|
}
|
|
|
|
if setExplicitValue {
|
|
dst.Elem().SetString(explicitValue)
|
|
} else {
|
|
sanitisatiser := s.getSanitisatiser(fieldName)
|
|
if sanitisatiser != nil {
|
|
dst.Elem().SetString(sanitisatiser(src.String()))
|
|
} else {
|
|
dst.Elem().SetString(s.Value(src.String()))
|
|
}
|
|
}
|
|
if !dst.CanInterface() {
|
|
return dst
|
|
}
|
|
return dst.Elem()
|
|
}
|
|
|
|
switch src.Kind() {
|
|
case reflect.Struct:
|
|
dst := reflect.New(src.Type())
|
|
t := src.Type()
|
|
|
|
for i := 0; i < t.NumField(); i++ {
|
|
f := t.Field(i)
|
|
srcValue := src.Field(i)
|
|
dstValue := dst.Elem().Field(i)
|
|
|
|
if !srcValue.CanInterface() {
|
|
dstValue = reflect.NewAt(dstValue.Type(), unsafe.Pointer(dstValue.UnsafeAddr())).Elem()
|
|
|
|
if !srcValue.CanAddr() {
|
|
switch {
|
|
case srcValue.CanInt():
|
|
dstValue.SetInt(srcValue.Int())
|
|
case srcValue.CanUint():
|
|
dstValue.SetUint(srcValue.Uint())
|
|
case srcValue.CanFloat():
|
|
dstValue.SetFloat(srcValue.Float())
|
|
case srcValue.CanComplex():
|
|
dstValue.SetComplex(srcValue.Complex())
|
|
case srcValue.Kind() == reflect.Bool:
|
|
dstValue.SetBool(srcValue.Bool())
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
srcValue = reflect.NewAt(srcValue.Type(), unsafe.Pointer(srcValue.UnsafeAddr())).Elem()
|
|
}
|
|
|
|
tagValue := f.Tag.Get("scrub")
|
|
copied := s.deepCopyStruct(f.Name, srcValue, tagValue, skipScrub)
|
|
dstValue.Set(copied)
|
|
}
|
|
return dst.Elem()
|
|
|
|
case reflect.Map:
|
|
dst := reflect.MakeMap(src.Type())
|
|
keys := src.MapKeys()
|
|
for i := 0; i < src.Len(); i++ {
|
|
mValue := src.MapIndex(keys[i])
|
|
dst.SetMapIndex(keys[i], s.deepCopyStruct(keys[i].String(), mValue, "", skipScrub))
|
|
}
|
|
return dst
|
|
|
|
case reflect.Slice:
|
|
dst := reflect.MakeSlice(src.Type(), src.Len(), src.Cap())
|
|
for i := 0; i < src.Len(); i++ {
|
|
dst.Index(i).Set(s.deepCopyStruct(fieldName, src.Index(i), "", skipScrub))
|
|
}
|
|
return dst
|
|
|
|
case reflect.Array:
|
|
if src.Len() == 0 {
|
|
return src
|
|
}
|
|
|
|
dst := reflect.New(src.Type()).Elem()
|
|
for i := 0; i < src.Len(); i++ {
|
|
dst.Index(i).Set(s.deepCopyStruct(fieldName, src.Index(i), "", skipScrub))
|
|
}
|
|
return dst
|
|
|
|
case reflect.Interface:
|
|
if src.IsNil() {
|
|
return src
|
|
}
|
|
dst := reflect.New(src.Elem().Type())
|
|
copied := s.deepCopyStruct(fieldName, src.Elem(), scrubTag, skipScrub)
|
|
dst.Elem().Set(copied)
|
|
return dst.Elem()
|
|
|
|
case reflect.Ptr:
|
|
dst := reflect.New(src.Elem().Type())
|
|
copied := s.deepCopyStruct(fieldName, src.Elem(), scrubTag, skipScrub)
|
|
dst.Elem().Set(copied)
|
|
return dst
|
|
|
|
default:
|
|
dst := reflect.New(src.Type())
|
|
dst.Elem().Set(src)
|
|
return dst.Elem()
|
|
}
|
|
}
|
|
|
|
// Struct implements Scrubber
|
|
func (s *scrubberImpl) DeepCopyStruct(val any) any {
|
|
return s.deepCopyStruct("", reflect.ValueOf(val), "", false).Interface()
|
|
}
|
|
|
|
func (s *scrubberImpl) scrubJsonObject(val map[string]interface{}) error {
|
|
// fix https://github.com/gitpod-io/security/issues/64
|
|
name, _ := val["name"].(string)
|
|
value, _ := val["value"].(string)
|
|
if name != "" && value != "" {
|
|
val["value"] = s.KeyValue(name, value)
|
|
}
|
|
|
|
for k, v := range val {
|
|
if str, ok := v.(string); ok {
|
|
val[k] = s.KeyValue(k, str)
|
|
} else {
|
|
err := s.scrubJsonValue(&v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *scrubberImpl) scrubJsonSlice(val []interface{}) error {
|
|
for i := range val {
|
|
err := s.scrubJsonValue(&(val[i]))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Value implements Scrubber
|
|
func (s *scrubberImpl) Value(value string) string {
|
|
for key, expr := range s.HashedValues {
|
|
value = expr.ReplaceAllStringFunc(value, func(s string) string {
|
|
return SanitiseHash(s, SanitiseWithKeyName(key))
|
|
})
|
|
}
|
|
for key, expr := range s.RedactedValues {
|
|
value = expr.ReplaceAllStringFunc(value, func(s string) string {
|
|
return SanitiseRedact(s, SanitiseWithKeyName(key))
|
|
})
|
|
}
|
|
|
|
return value
|
|
}
|
|
|
|
type structScrubber struct {
|
|
Parent *scrubberImpl
|
|
}
|
|
|
|
var (
|
|
_ reflectwalk.MapWalker = &structScrubber{}
|
|
_ reflectwalk.StructWalker = &structScrubber{}
|
|
_ reflectwalk.PrimitiveWalker = &structScrubber{}
|
|
_ reflectwalk.PointerValueWalker = &structScrubber{}
|
|
)
|
|
|
|
// Pointer implements reflectwalk.PointerValueWalker
|
|
func (s *structScrubber) Pointer(val reflect.Value) error {
|
|
if !val.CanInterface() {
|
|
return nil
|
|
}
|
|
value := val.Interface()
|
|
if _, ok := value.(TrustedValue); ok {
|
|
return reflectwalk.SkipEntry
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Primitive implements reflectwalk.PrimitiveWalker
|
|
func (s *structScrubber) Primitive(val reflect.Value) error {
|
|
if val.Kind() == reflect.String && val.CanSet() {
|
|
val.SetString(s.Parent.Value(val.String()))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Struct implements reflectwalk.StructWalker
|
|
func (s *structScrubber) Struct(val reflect.Value) error {
|
|
return nil
|
|
}
|
|
|
|
// StructField implements reflectwalk.StructWalker
|
|
func (s *structScrubber) StructField(field reflect.StructField, val reflect.Value) error {
|
|
if val.Kind() == reflect.String {
|
|
var (
|
|
setExplicitValue bool
|
|
explicitValue string
|
|
)
|
|
tag := field.Tag.Get("scrub")
|
|
switch tag {
|
|
case "ignore":
|
|
return reflectwalk.SkipEntry
|
|
case "hash":
|
|
setExplicitValue = true
|
|
explicitValue = SanitiseHash(val.String())
|
|
case "redact":
|
|
setExplicitValue = true
|
|
explicitValue = SanitiseRedact(val.String())
|
|
}
|
|
|
|
if setExplicitValue {
|
|
if !val.CanSet() {
|
|
return fmt.Errorf("cannot set %s", field.PkgPath)
|
|
}
|
|
val.SetString(explicitValue)
|
|
} else {
|
|
sanitisatiser := s.Parent.getSanitisatiser(field.Name)
|
|
if sanitisatiser != nil {
|
|
if !val.CanSet() {
|
|
return fmt.Errorf("cannot set %s", field.PkgPath)
|
|
}
|
|
val.SetString(sanitisatiser(val.String()))
|
|
}
|
|
}
|
|
return reflectwalk.SkipEntry
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Map implements reflectwalk.MapWalker
|
|
func (s *structScrubber) Map(m reflect.Value) error {
|
|
// fix https://github.com/gitpod-io/security/issues/64
|
|
var (
|
|
nameV reflect.Value
|
|
valueK reflect.Value
|
|
valueV reflect.Value
|
|
)
|
|
for _, k := range m.MapKeys() {
|
|
kv := m.MapIndex(k)
|
|
if k.String() == "name" {
|
|
nameV = kv
|
|
} else if k.String() == "value" {
|
|
valueK = k
|
|
valueV = kv
|
|
}
|
|
}
|
|
if nameV.Kind() == reflect.Interface {
|
|
nameV = nameV.Elem()
|
|
}
|
|
if valueV.Kind() == reflect.Interface {
|
|
valueV = valueV.Elem()
|
|
}
|
|
|
|
if nameV.Kind() == reflect.String && valueV.Kind() == reflect.String {
|
|
sanitisatiser := s.Parent.getSanitisatiser(nameV.String())
|
|
if sanitisatiser != nil {
|
|
value := sanitisatiser(valueV.String())
|
|
m.SetMapIndex(valueK, reflect.ValueOf(value))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MapElem implements reflectwalk.MapWalker
|
|
func (s *structScrubber) MapElem(m reflect.Value, k reflect.Value, v reflect.Value) error {
|
|
kind := v.Kind()
|
|
if kind == reflect.Interface {
|
|
v = v.Elem()
|
|
kind = v.Kind()
|
|
}
|
|
if k.Kind() == reflect.Interface {
|
|
k = k.Elem()
|
|
}
|
|
if kind == reflect.String {
|
|
m.SetMapIndex(k, reflect.ValueOf(s.Parent.KeyValue(k.String(), v.String())))
|
|
}
|
|
|
|
return nil
|
|
}
|