690 lines
18 KiB
Go

// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License.AGPL.txt in the project root for license information.
package main
import (
"fmt"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"unsafe"
cli "github.com/urfave/cli/v2"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
"github.com/gitpod-io/gitpod/common-go/log"
_ "github.com/gitpod-io/gitpod/common-go/nsenter"
"github.com/google/nftables"
"github.com/google/nftables/binaryutil"
"github.com/google/nftables/expr"
"github.com/vishvananda/netlink"
)
func main() {
app := &cli.App{
Commands: []*cli.Command{
{
Name: "move-mount",
Usage: "calls move_mount with the pipe-fd to target",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "target",
Required: true,
},
&cli.IntFlag{
Name: "pipe-fd",
Required: true,
},
},
Action: func(c *cli.Context) error {
return syscallMoveMount(c.Int("pipe-fd"), "", unix.AT_FDCWD, c.String("target"), flagMoveMountFEmptyPath)
},
},
{
Name: "open-tree",
Usage: "opens a and writes the resulting mountfd to the Unix pipe on the pipe-fd",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "target",
Required: true,
},
&cli.IntFlag{
Name: "pipe-fd",
Required: true,
},
},
Action: func(c *cli.Context) error {
fd, err := syscallOpenTree(unix.AT_FDCWD, c.String("target"), flagOpenTreeClone|flagAtRecursive)
if err != nil {
return err
}
err = unix.Sendmsg(c.Int("pipe-fd"), nil, unix.UnixRights(int(fd)), nil, 0)
if err != nil {
return err
}
return nil
},
},
{
Name: "make-shared",
Usage: "makes a mount point shared",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "target",
Required: true,
},
},
Action: func(c *cli.Context) error {
return unix.Mount("none", c.String("target"), "", unix.MS_SHARED, "")
},
},
{
Name: "mount-fusefs-mark",
Usage: "mounts a fusefs mark",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "source",
Required: true,
},
&cli.StringFlag{
Name: "merged",
Required: true,
},
&cli.StringFlag{
Name: "upper",
Required: true,
},
&cli.StringFlag{
Name: "work",
Required: true,
},
&cli.StringFlag{
Name: "uidmapping",
Required: false,
},
&cli.StringFlag{
Name: "gidmapping",
Required: false,
},
},
Action: func(c *cli.Context) error {
target := filepath.Clean(c.String("merged"))
upper := filepath.Clean(c.String("upper"))
work := filepath.Clean(c.String("work"))
source := filepath.Clean(c.String("source"))
args := []string{
fmt.Sprintf("lowerdir=%s,upperdir=%v,workdir=%v", source, upper, work),
}
if len(c.String("uidmapping")) > 0 {
args = append(args, fmt.Sprintf("uidmapping=%v", c.String("uidmapping")))
}
if len(c.String("gidmapping")) > 0 {
args = append(args, fmt.Sprintf("gidmapping=%v", c.String("gidmapping")))
}
cmd := exec.Command(
fmt.Sprintf("%v/.supervisor/fuse-overlayfs", source),
"-o",
strings.Join(args, ","),
"none",
target,
)
cmd.Dir = source
out, err := cmd.CombinedOutput()
if err != nil {
return xerrors.Errorf("fuse-overlayfs (%v) failed: %q\n%v",
cmd.Args,
string(out),
err,
)
}
return nil
},
},
{
Name: "mount-shiftfs-mark",
Usage: "mounts a shiftfs mark",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "source",
Required: true,
},
&cli.StringFlag{
Name: "target",
Required: true,
},
},
Action: func(c *cli.Context) error {
return unix.Mount(c.String("source"), c.String("target"), "shiftfs", 0, "mark")
},
},
{
Name: "mount-proc",
Usage: "mounts proc",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "target",
Required: true,
},
},
Action: func(c *cli.Context) error {
return unix.Mount("proc", c.String("target"), "proc", 0, "")
},
},
{
Name: "mount-sysfs",
Usage: "mounts sysfs",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "target",
Required: true,
},
},
Action: func(c *cli.Context) error {
return unix.Mount("sysfs", c.String("target"), "sysfs", 0, "")
},
},
{
Name: "unmount",
Usage: "unmounts a mountpoint",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "target",
Required: true,
},
},
Action: func(c *cli.Context) error {
return unix.Unmount(c.String("target"), 0)
},
},
{
Name: "prepare-dev",
Usage: "prepares a workspaces /dev directory",
Flags: []cli.Flag{
&cli.IntFlag{
Name: "uid",
Required: true,
},
&cli.IntFlag{
Name: "gid",
Required: true,
},
},
Action: func(c *cli.Context) error {
err := ioutil.WriteFile("/dev/kmsg", nil, 0644)
if err != nil {
return err
}
_ = os.MkdirAll("/dev/net", 0755)
err = unix.Mknod("/dev/net/tun", 0666|unix.S_IFCHR, int(unix.Mkdev(10, 200)))
if err != nil {
return err
}
err = os.Chmod("/dev/net/tun", os.FileMode(0666))
if err != nil {
return err
}
err = os.Chown("/dev/net/tun", c.Int("uid"), c.Int("gid"))
if err != nil {
return err
}
err = unix.Mknod("/dev/fuse", 0666|unix.S_IFCHR, int(unix.Mkdev(10, 229)))
if err != nil {
return err
}
err = os.Chmod("/dev/fuse", os.FileMode(0666))
if err != nil {
return err
}
err = os.Chown("/dev/fuse", c.Int("uid"), c.Int("gid"))
if err != nil {
return err
}
return nil
},
},
{
Name: "setup-pair-veths",
Usage: "set up a pair of veths",
Flags: []cli.Flag{
&cli.IntFlag{
Name: "target-pid",
Required: true,
},
},
Action: func(c *cli.Context) error {
containerIf, vethIf, cethIf := "eth0", "veth0", "eth0"
mask := net.IPv4Mask(255, 255, 255, 0)
vethIp := net.IPNet{
IP: net.IPv4(10, 0, 5, 1),
Mask: mask,
}
cethIp := net.IPNet{
IP: net.IPv4(10, 0, 5, 2),
Mask: mask,
}
masqueradeAddr := net.IPNet{
IP: vethIp.IP.Mask(mask),
Mask: mask,
}
targetPid := c.Int("target-pid")
eth0, err := netlink.LinkByName(containerIf)
if err != nil {
return xerrors.Errorf("cannot get container network device %s: %w", containerIf, err)
}
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: vethIf,
Flags: net.FlagUp,
MTU: eth0.Attrs().MTU,
},
PeerName: cethIf,
PeerNamespace: netlink.NsPid(targetPid),
}
if err := netlink.LinkAdd(veth); err != nil {
return xerrors.Errorf("link %q-%q netns failed: %v", vethIf, cethIf, err)
}
vethLink, err := netlink.LinkByName(vethIf)
if err != nil {
return xerrors.Errorf("cannot found %q netns failed: %v", vethIf, err)
}
if err := netlink.AddrAdd(vethLink, &netlink.Addr{IPNet: &vethIp}); err != nil {
return xerrors.Errorf("failed to add IP address to %q: %v", vethIf, err)
}
if err := netlink.LinkSetUp(vethLink); err != nil {
return xerrors.Errorf("failed to enable %q: %v", vethIf, err)
}
nc := &nftables.Conn{}
nat := nc.AddTable(&nftables.Table{
Family: nftables.TableFamilyIPv4,
Name: "nat",
})
postrouting := nc.AddChain(&nftables.Chain{
Name: "postrouting",
Hooknum: nftables.ChainHookPostrouting,
Priority: nftables.ChainPriorityNATSource,
Table: nat,
Type: nftables.ChainTypeNAT,
})
// ip saddr 10.0.5.0/24 oifname "eth0" masquerade
nc.AddRule(&nftables.Rule{
Table: nat,
Chain: postrouting,
Exprs: []expr.Any{
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: 12,
Len: net.IPv4len,
},
&expr.Bitwise{
SourceRegister: 1,
DestRegister: 1,
Len: net.IPv4len,
Mask: masqueradeAddr.Mask,
Xor: net.IPv4Mask(0, 0, 0, 0),
},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: masqueradeAddr.IP.To4(),
},
&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte(fmt.Sprintf("%s\x00", containerIf)),
},
&expr.Masq{},
},
})
prerouting := nc.AddChain(&nftables.Chain{
Name: "prerouting",
Hooknum: nftables.ChainHookPrerouting,
Priority: nftables.ChainPriorityNATDest,
Table: nat,
Type: nftables.ChainTypeNAT,
})
// iif $containerIf tcp dport 1-65535 dnat to $cethIp:tcp dport
nc.AddRule(&nftables.Rule{
Table: nat,
Chain: prerouting,
Exprs: []expr.Any{
&expr.Meta{Key: expr.MetaKeyIIFNAME, Register: 1},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte(containerIf + "\x00"),
},
&expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte{unix.IPPROTO_TCP},
},
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseTransportHeader,
Offset: 2,
Len: 2,
},
&expr.Cmp{
Op: expr.CmpOpGte,
Register: 1,
Data: []byte{0x00, 0x01},
},
&expr.Cmp{
Op: expr.CmpOpLte,
Register: 1,
Data: []byte{0xff, 0xff},
},
&expr.Immediate{
Register: 2,
Data: cethIp.IP.To4(),
},
&expr.NAT{
Type: expr.NATTypeDestNAT,
Family: unix.NFPROTO_IPV4,
RegAddrMin: 2,
RegProtoMin: 1,
},
},
})
if err := nc.Flush(); err != nil {
return xerrors.Errorf("failed to apply nftables: %v", err)
}
return nil
},
},
{
Name: "setup-peer-veth",
Usage: "set up a peer veth",
Action: func(c *cli.Context) error {
cethIf := "eth0"
mask := net.IPv4Mask(255, 255, 255, 0)
cethIp := net.IPNet{
IP: net.IPv4(10, 0, 5, 2),
Mask: mask,
}
vethIp := net.IPNet{
IP: net.IPv4(10, 0, 5, 1),
Mask: mask,
}
cethLink, err := netlink.LinkByName(cethIf)
if err != nil {
return xerrors.Errorf("cannot found %q netns failed: %v", cethIf, err)
}
if err := netlink.AddrAdd(cethLink, &netlink.Addr{IPNet: &cethIp}); err != nil {
return xerrors.Errorf("failed to add IP address to %q: %v", cethIf, err)
}
if err := netlink.LinkSetUp(cethLink); err != nil {
return xerrors.Errorf("failed to enable %q: %v", cethIf, err)
}
lo, err := netlink.LinkByName("lo")
if err != nil {
return xerrors.Errorf("cannot found lo: %v", err)
}
if err := netlink.LinkSetUp(lo); err != nil {
return xerrors.Errorf("failed to enable lo: %v", err)
}
defaultGw := netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Gw: vethIp.IP,
}
if err := netlink.RouteReplace(&defaultGw); err != nil {
return xerrors.Errorf("failed to set up deafult gw: %v", err)
}
return nil
},
},
{
Name: "enable-ip-forward",
Usage: "enable IPv4 forwarding",
Action: func(c *cli.Context) error {
return os.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte("1"), 0644)
},
},
{
Name: "setup-connection-limit",
Usage: "set up network connection rate limiting",
Flags: []cli.Flag{
&cli.IntFlag{
Name: "limit",
Required: true,
},
&cli.IntFlag{
Name: "bucketsize",
Required: false,
},
&cli.BoolFlag{
Name: "enforce",
Required: false,
},
},
Action: func(c *cli.Context) error {
const drop_stats = "ws-connection-drop-stats"
nftcon := nftables.Conn{}
connLimit := c.Int("limit")
bucketSize := c.Int("bucketsize")
if bucketSize == 0 {
bucketSize = 1000
}
enforce := c.Bool("enforce")
// nft add table ip gitpod
gitpodTable := nftcon.AddTable(&nftables.Table{
Family: nftables.TableFamilyIPv4,
Name: "gitpod",
})
// nft add chain ip gitpod ratelimit { type filter hook postrouting priority 0 \; }
ratelimit := nftcon.AddChain(&nftables.Chain{
Table: gitpodTable,
Name: "ratelimit",
Type: nftables.ChainTypeFilter,
Hooknum: nftables.ChainHookPostrouting,
Priority: nftables.ChainPriorityFilter,
})
// nft add counter gitpod connection_drop_stats
nftcon.AddObject(&nftables.CounterObj{
Table: gitpodTable,
Name: drop_stats,
})
// nft add set gitpod ws-connections { type ipv4_addr; flags timeout, dynamic; }
set := &nftables.Set{
Table: gitpodTable,
Name: "ws-connections",
KeyType: nftables.TypeIPAddr,
Dynamic: true,
HasTimeout: true,
}
if err := nftcon.AddSet(set, nil); err != nil {
return err
}
verdict := expr.VerdictAccept
if enforce {
verdict = expr.VerdictDrop
}
// nft add rule ip gitpod ratelimit ip protocol tcp ct state new meter ws-connections
// '{ ip daddr & 0.0.0.0 timeout 1m limit rate over 3000/minute burst 1000 packets }' counter name ws-connection-drop-stats drop
nftcon.AddRule(&nftables.Rule{
// ip gitpod ratelimit
Table: gitpodTable,
Chain: ratelimit,
Exprs: []expr.Any{
// ip protocol tcp
// get offset into network header and check if tcp
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: uint32(9),
Len: uint32(1),
},
&expr.Cmp{
Register: 1,
Op: expr.CmpOpEq,
Data: []byte{unix.IPPROTO_TCP},
},
// ct state new
// get state from conntrack entry and check for 'new' (0x00000008)
&expr.Ct{
Key: expr.CtKeySTATE,
Register: 1,
SourceRegister: false,
},
&expr.Bitwise{
DestRegister: 1,
SourceRegister: 1,
Len: 4,
Mask: binaryutil.NativeEndian.PutUint32(expr.CtStateBitNEW),
Xor: binaryutil.NativeEndian.PutUint32(0),
},
&expr.Cmp{
Register: 1,
Op: expr.CmpOpNeq,
Data: []byte{0, 0, 0, 0},
},
// ip daddr & 0.0.0.0
// get the destination address and AND every address with zero
// to ensure that every address is placed into the same bucket
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: uint32(16),
Len: uint32(4),
},
&expr.Bitwise{
DestRegister: 1,
SourceRegister: 1,
Len: 1,
Mask: []byte{0x00},
Xor: []byte{0x00},
},
// timeout 1m limit rate over 3000/minute burst 1000 packets
&expr.Dynset{
SrcRegKey: 1,
SetName: set.Name,
Operation: uint32(unix.NFT_DYNSET_OP_ADD),
Timeout: time.Duration(60 * time.Second),
Exprs: []expr.Any{
&expr.Limit{
Type: expr.LimitTypePkts,
Rate: uint64(connLimit),
Unit: expr.LimitTimeMinute,
Burst: uint32(bucketSize),
Over: true,
},
},
},
// counter name "ws-connection-drop-stats"
&expr.Objref{
Type: 1,
Name: drop_stats,
},
// drop
&expr.Verdict{
Kind: verdict,
},
},
})
if err := nftcon.Flush(); err != nil {
return xerrors.Errorf("failed to apply connection limit: %v", err)
}
return nil
},
},
},
}
log.Init("nsinsider", "", true, false)
err := app.Run(os.Args)
if err != nil {
log.WithField("instanceId", os.Getenv("GITPOD_INSTANCE_ID")).WithField("args", os.Args).Fatal(err)
}
}
func syscallMoveMount(fromDirFD int, fromPath string, toDirFD int, toPath string, flags uintptr) error {
fromPathP, err := unix.BytePtrFromString(fromPath)
if err != nil {
return err
}
toPathP, err := unix.BytePtrFromString(toPath)
if err != nil {
return err
}
_, _, errno := unix.Syscall6(unix.SYS_MOVE_MOUNT, uintptr(fromDirFD), uintptr(unsafe.Pointer(fromPathP)), uintptr(toDirFD), uintptr(unsafe.Pointer(toPathP)), flags, 0)
if errno != 0 {
return errno
}
return nil
}
const (
// FlagMoveMountFEmptyPath: empty from path permitted: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/mount.h#L70
flagMoveMountFEmptyPath = 0x00000004
)
func syscallOpenTree(dfd int, path string, flags uintptr) (fd uintptr, err error) {
p1, err := unix.BytePtrFromString(path)
if err != nil {
return 0, err
}
fd, _, errno := unix.Syscall(unix.SYS_OPEN_TREE, uintptr(dfd), uintptr(unsafe.Pointer(p1)), flags)
if errno != 0 {
return 0, errno
}
return fd, nil
}
const (
// FlagOpenTreeClone: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/mount.h#L62
flagOpenTreeClone = 1
// FlagAtRecursive: Apply to the entire subtree: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fcntl.h#L112
flagAtRecursive = 0x8000
)