Use a more general (and faster) method to sanitize URLs with credentials (#19239)

Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster /  Remove all credentials in all URLs
tokarchuk/v1.17
wxiaoguang 3 years ago committed by GitHub
parent 84038f33f4
commit c83168104b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      models/migrations/v180.go
  2. 2
      models/task.go
  3. 2
      modules/git/command.go
  4. 4
      modules/git/repo.go
  5. 88
      modules/util/sanitize.go
  6. 139
      modules/util/sanitize_test.go
  7. 2
      routers/api/v1/repo/migrate.go
  8. 3
      routers/web/repo/migrate.go
  9. 45
      services/mirror/mirror_pull.go
  10. 6
      services/mirror/mirror_push.go
  11. 2
      services/task/migrate.go
  12. 2
      services/task/task.go

@ -112,7 +112,7 @@ func removeCredentials(payload string) (string, error) {
opts.AuthPassword = ""
opts.AuthToken = ""
opts.CloneAddr = util.NewStringURLSanitizer(opts.CloneAddr, true).Replace(opts.CloneAddr)
opts.CloneAddr = util.SanitizeCredentialURLs(opts.CloneAddr)
confBytes, err := json.Marshal(opts)
if err != nil {

@ -245,7 +245,7 @@ func FinishMigrateTask(task *Task) error {
}
conf.AuthPassword = ""
conf.AuthToken = ""
conf.CloneAddr = util.NewStringURLSanitizer(conf.CloneAddr, true).Replace(conf.CloneAddr)
conf.CloneAddr = util.SanitizeCredentialURLs(conf.CloneAddr)
conf.AuthPasswordEncrypted = ""
conf.AuthTokenEncrypted = ""
conf.CloneAddrEncrypted = ""

@ -154,7 +154,7 @@ func (c *Command) RunWithContext(rc *RunContext) error {
args = make([]string, len(c.args))
copy(args, c.args)
for _, urlArgIndex := range argSensitiveURLIndexes {
args[urlArgIndex] = util.NewStringURLSanitizer(args[urlArgIndex], true).Replace(args[urlArgIndex])
args[urlArgIndex] = util.SanitizeCredentialURLs(args[urlArgIndex])
}
}
desc = fmt.Sprintf("%s %s [repo_path: %s]", c.name, strings.Join(args, " "), rc.Dir)

@ -156,7 +156,7 @@ func CloneWithArgs(ctx context.Context, from, to string, args []string, opts Clo
cmd.AddArguments("--", from, to)
if strings.Contains(from, "://") && strings.Contains(from, "@") {
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.NewStringURLSanitizer(from, true).Replace(from), to, opts.Shared, opts.Mirror, opts.Depth))
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.SanitizeCredentialURLs(from), to, opts.Shared, opts.Mirror, opts.Depth))
} else {
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, from, to, opts.Shared, opts.Mirror, opts.Depth))
}
@ -209,7 +209,7 @@ func Push(ctx context.Context, repoPath string, opts PushOptions) error {
cmd.AddArguments(opts.Branch)
}
if strings.Contains(opts.Remote, "://") && strings.Contains(opts.Remote, "@") {
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.NewStringURLSanitizer(opts.Remote, true).Replace(opts.Remote), opts.Force, opts.Mirror))
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.SanitizeCredentialURLs(opts.Remote), opts.Force, opts.Mirror))
} else {
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, opts.Remote, opts.Force, opts.Mirror))
}

@ -5,59 +5,71 @@
package util
import (
"net/url"
"strings"
)
"bytes"
"unicode"
const (
userPlaceholder = "sanitized-credential"
unparsableURL = "(unparsable url)"
"github.com/yuin/goldmark/util"
)
type sanitizedError struct {
err error
replacer *strings.Replacer
err error
}
func (err sanitizedError) Error() string {
return err.replacer.Replace(err.err.Error())
return SanitizeCredentialURLs(err.err.Error())
}
// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text.
func NewSanitizedError(err error, oldnew ...string) error {
return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)}
func (err sanitizedError) Unwrap() error {
return err.err
}
// NewURLSanitizedError wraps an error and replaces the url credential or removes them.
func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error {
return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)}
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
func SanitizeErrorCredentialURLs(err error) error {
return sanitizedError{err: err}
}
// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them.
// If the url can't get parsed it gets replaced with a placeholder string.
func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error {
return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)}
}
const userPlaceholder = "sanitized-credential"
// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed.
func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer {
old := u.String()
var schemeSep = []byte("://")
if u.User != nil && usePlaceholder {
u.User = url.User(userPlaceholder)
} else {
u.User = nil
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
func SanitizeCredentialURLs(s string) string {
bs := util.StringToReadOnlyBytes(s)
schemeSepPos := bytes.Index(bs, schemeSep)
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
return s // fast return if there is no URL scheme or no userinfo
}
return strings.NewReplacer(old, u.String())
}
// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed.
// If the url can't get parsed it gets replaced with a placeholder string
func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer {
u, err := url.Parse(unsanitizedURL)
if err != nil {
// don't log the error, since it might contain unsanitized URL.
return strings.NewReplacer(unsanitizedURL, unparsableURL)
out := make([]byte, 0, len(bs)+len(userPlaceholder))
for schemeSepPos != -1 {
schemeSepPos += 3 // skip the "://"
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
sepLoop:
for ; sepEndPos < len(bs); sepEndPos++ {
c := bs[sepEndPos]
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
continue
}
switch c {
case '@':
sepAtPos = sepEndPos
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
default:
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
}
}
// if there is '@', and the string is like "s://u@h", then hide the "u" part
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
out = append(out, bs[:schemeSepPos]...)
out = append(out, userPlaceholder...)
out = append(out, bs[sepAtPos:sepEndPos]...)
} else {
out = append(out, bs[:sepEndPos]...)
}
bs = bs[sepEndPos:]
schemeSepPos = bytes.Index(bs, schemeSep)
}
return NewURLSanitizer(u, usePlaceholder)
out = append(out, bs...)
return util.BytesToReadOnlyString(out)
}

@ -11,154 +11,65 @@ import (
"github.com/stretchr/testify/assert"
)
func TestNewSanitizedError(t *testing.T) {
err := errors.New("error while secret on test")
err2 := NewSanitizedError(err)
assert.Equal(t, err.Error(), err2.Error())
cases := []struct {
input error
oldnew []string
expected string
}{
// case 0
{
errors.New("error while secret on test"),
[]string{"secret", "replaced"},
"error while replaced on test",
},
// case 1
{
errors.New("error while sec-ret on test"),
[]string{"secret", "replaced"},
"error while sec-ret on test",
},
}
for n, c := range cases {
err := NewSanitizedError(c.input, c.oldnew...)
assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n)
}
func TestSanitizeErrorCredentialURLs(t *testing.T) {
err := errors.New("error with https://a@b.com")
se := SanitizeErrorCredentialURLs(err)
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
}
func TestNewStringURLSanitizer(t *testing.T) {
func TestSanitizeCredentialURLs(t *testing.T) {
cases := []struct {
input string
placeholder bool
expected string
input string
expected string
}{
// case 0
{
"https://github.com/go-gitea/test_repo.git",
true,
"https://github.com/go-gitea/test_repo.git",
},
// case 1
{
"https://github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 2
{
"https://mytoken@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 3
{
"https://mytoken@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 4
{
"https://user:password@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 5
{
"https://user:password@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"ftp://x@",
"ftp://" + userPlaceholder + "@",
},
// case 6
{
"https://gi\nthub.com/go-gitea/test_repo.git",
false,
unparsableURL,
"ftp://x/@",
"ftp://x/@",
},
}
for n, c := range cases {
// uses NewURLSanitizer internally
result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input)
assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}
func TestNewStringURLSanitizedError(t *testing.T) {
cases := []struct {
input string
placeholder bool
expected string
}{
// case 0
{
"https://github.com/go-gitea/test_repo.git",
true,
"https://github.com/go-gitea/test_repo.git",
},
// case 1
{
"https://github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"ftp://u@x/@", // test multiple @ chars
"ftp://" + userPlaceholder + "@x/@",
},
// case 2
{
"https://mytoken@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"😊ftp://u@x😊", // test unicode
"😊ftp://" + userPlaceholder + "@x😊",
},
// case 3
{
"https://mytoken@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"://@",
"://@",
},
// case 4
{
"https://user:password@github.com/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
"//u:p@h",
},
// case 5
{
"https://user:password@github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"s://u@h", // the minimal pattern to be sanitized
"s://" + userPlaceholder + "@h",
},
// case 6
{
"https://gi\nthub.com/go-gitea/test_repo.git",
false,
unparsableURL,
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
},
}
encloseText := func(input string) string {
return "test " + input + " test"
}
for n, c := range cases {
err := errors.New(encloseText(c.input))
result := NewStringURLSanitizedError(err, c.input, c.placeholder)
assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n)
result := SanitizeCredentialURLs(c.input)
assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}

@ -236,7 +236,7 @@ func handleMigrateError(ctx *context.APIContext, repoOwner *user_model.User, rem
case base.IsErrNotSupported(err):
ctx.Error(http.StatusUnprocessableEntity, "", err)
default:
err = util.NewStringURLSanitizedError(err, remoteAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "Bad credentials") ||
strings.Contains(err.Error(), "could not read Username") {

@ -106,8 +106,7 @@ func handleMigrateError(ctx *context.Context, owner *user_model.User, err error,
ctx.Data["Err_RepoName"] = true
ctx.RenderWithErr(ctx.Tr("repo.form.name_pattern_not_allowed", err.(db.ErrNamePatternNotAllowed).Pattern), tpl, form)
default:
remoteAddr, _ := forms.ParseRemoteAddr(form.CloneAddr, form.AuthUsername, form.AuthPassword)
err = util.NewStringURLSanitizedError(err, remoteAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "Bad credentials") ||
strings.Contains(err.Error(), "could not read Username") {

@ -40,7 +40,7 @@ func UpdateAddress(ctx context.Context, m *repo_model.Mirror, addr string) error
cmd := git.NewCommand(ctx, "remote", "add", remoteName, "--mirror=fetch", addr)
if strings.Contains(addr, "://") && strings.Contains(addr, "@") {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=fetch %s [repo_path: %s]", remoteName, util.NewStringURLSanitizer(addr, true).Replace(addr), repoPath))
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=fetch %s [repo_path: %s]", remoteName, util.SanitizeCredentialURLs(addr), repoPath))
} else {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=fetch %s [repo_path: %s]", remoteName, addr, repoPath))
}
@ -60,7 +60,7 @@ func UpdateAddress(ctx context.Context, m *repo_model.Mirror, addr string) error
cmd = git.NewCommand(ctx, "remote", "add", remoteName, "--mirror=fetch", wikiRemotePath)
if strings.Contains(wikiRemotePath, "://") && strings.Contains(wikiRemotePath, "@") {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=fetch %s [repo_path: %s]", remoteName, util.NewStringURLSanitizer(wikiRemotePath, true).Replace(wikiRemotePath), wikiPath))
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=fetch %s [repo_path: %s]", remoteName, util.SanitizeCredentialURLs(wikiRemotePath), wikiPath))
} else {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=fetch %s [repo_path: %s]", remoteName, wikiRemotePath, wikiPath))
}
@ -160,7 +160,6 @@ func pruneBrokenReferences(ctx context.Context,
repoPath string,
timeout time.Duration,
stdoutBuilder, stderrBuilder *strings.Builder,
sanitizer *strings.Replacer,
isWiki bool,
) error {
wiki := ""
@ -184,8 +183,8 @@ func pruneBrokenReferences(ctx context.Context,
// sanitize the output, since it may contain the remote address, which may
// contain a password
stderrMessage := sanitizer.Replace(stderr)
stdoutMessage := sanitizer.Replace(stdout)
stderrMessage := util.SanitizeCredentialURLs(stderr)
stdoutMessage := util.SanitizeCredentialURLs(stdout)
log.Error("Failed to prune mirror repository %s%-v references:\nStdout: %s\nStderr: %s\nErr: %v", wiki, m.Repo, stdoutMessage, stderrMessage, pruneErr)
desc := fmt.Sprintf("Failed to prune mirror repository %s'%s' references: %s", wiki, repoPath, stderrMessage)
@ -229,11 +228,9 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
stdout := stdoutBuilder.String()
stderr := stderrBuilder.String()
// sanitize the output, since it may contain the remote address, which may
// contain a password
sanitizer := util.NewURLSanitizer(remoteAddr, true)
stderrMessage := sanitizer.Replace(stderr)
stdoutMessage := sanitizer.Replace(stdout)
// sanitize the output, since it may contain the remote address, which may contain a password
stderrMessage := util.SanitizeCredentialURLs(stderr)
stdoutMessage := util.SanitizeCredentialURLs(stdout)
// Now check if the error is a resolve reference due to broken reference
if strings.Contains(stderr, "unable to resolve reference") && strings.Contains(stderr, "reference broken") {
@ -241,7 +238,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
err = nil
// Attempt prune
pruneErr := pruneBrokenReferences(ctx, m, repoPath, timeout, &stdoutBuilder, &stderrBuilder, sanitizer, false)
pruneErr := pruneBrokenReferences(ctx, m, repoPath, timeout, &stdoutBuilder, &stderrBuilder, false)
if pruneErr == nil {
// Successful prune - reattempt mirror
stderrBuilder.Reset()
@ -259,8 +256,8 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
// sanitize the output, since it may contain the remote address, which may
// contain a password
stderrMessage = sanitizer.Replace(stderr)
stdoutMessage = sanitizer.Replace(stdout)
stderrMessage = util.SanitizeCredentialURLs(stderr)
stdoutMessage = util.SanitizeCredentialURLs(stdout)
}
}
}
@ -322,19 +319,9 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
stdout := stdoutBuilder.String()
stderr := stderrBuilder.String()
// sanitize the output, since it may contain the remote address, which may
// contain a password
remoteAddr, remoteErr := git.GetRemoteAddress(ctx, wikiPath, m.GetRemoteName())
if remoteErr != nil {
log.Error("SyncMirrors [repo: %-v Wiki]: unable to get GetRemoteAddress Error %v", m.Repo, remoteErr)
}
// sanitize the output, since it may contain the remote address, which may
// contain a password
sanitizer := util.NewURLSanitizer(remoteAddr, true)
stderrMessage := sanitizer.Replace(stderr)
stdoutMessage := sanitizer.Replace(stdout)
// sanitize the output, since it may contain the remote address, which may contain a password
stderrMessage := util.SanitizeCredentialURLs(stderr)
stdoutMessage := util.SanitizeCredentialURLs(stdout)
// Now check if the error is a resolve reference due to broken reference
if strings.Contains(stderrMessage, "unable to resolve reference") && strings.Contains(stderrMessage, "reference broken") {
@ -342,7 +329,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
err = nil
// Attempt prune
pruneErr := pruneBrokenReferences(ctx, m, repoPath, timeout, &stdoutBuilder, &stderrBuilder, sanitizer, true)
pruneErr := pruneBrokenReferences(ctx, m, repoPath, timeout, &stdoutBuilder, &stderrBuilder, true)
if pruneErr == nil {
// Successful prune - reattempt mirror
stderrBuilder.Reset()
@ -358,8 +345,8 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
}); err != nil {
stdout := stdoutBuilder.String()
stderr := stderrBuilder.String()
stderrMessage = sanitizer.Replace(stderr)
stdoutMessage = sanitizer.Replace(stdout)
stderrMessage = util.SanitizeCredentialURLs(stderr)
stdoutMessage = util.SanitizeCredentialURLs(stdout)
}
}
}

@ -31,7 +31,7 @@ func AddPushMirrorRemote(ctx context.Context, m *repo_model.PushMirror, addr str
addRemoteAndConfig := func(addr, path string) error {
cmd := git.NewCommand(ctx, "remote", "add", "--mirror=push", m.RemoteName, addr)
if strings.Contains(addr, "://") && strings.Contains(addr, "@") {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, util.NewStringURLSanitizer(addr, true).Replace(addr), path))
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, util.SanitizeCredentialURLs(addr), path))
} else {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, addr, path))
}
@ -147,7 +147,7 @@ func runPushSync(ctx context.Context, m *repo_model.PushMirror) error {
endpoint := lfs.DetermineEndpoint(remoteAddr.String(), "")
lfsClient := lfs.NewClient(endpoint, nil)
if err := pushAllLFSObjects(ctx, gitRepo, lfsClient); err != nil {
return util.NewURLSanitizedError(err, remoteAddr, true)
return util.SanitizeErrorCredentialURLs(err)
}
}
@ -161,7 +161,7 @@ func runPushSync(ctx context.Context, m *repo_model.PushMirror) error {
}); err != nil {
log.Error("Error pushing %s mirror[%d] remote %s: %v", path, m.ID, m.RemoteName, err)
return util.NewURLSanitizedError(err, remoteAddr, true)
return util.SanitizeErrorCredentialURLs(err)
}
return nil

@ -129,7 +129,7 @@ func runMigrateTask(t *models.Task) (err error) {
}
// remoteAddr may contain credentials, so we sanitize it
err = util.NewStringURLSanitizedError(err, opts.CloneAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "could not read Username") {
return fmt.Errorf("Authentication failed: %v", err.Error())

@ -77,7 +77,7 @@ func CreateMigrateTask(doer, u *user_model.User, opts base.MigrateOptions) (*mod
if err != nil {
return nil, err
}
opts.CloneAddr = util.NewStringURLSanitizer(opts.CloneAddr, true).Replace(opts.CloneAddr)
opts.CloneAddr = util.SanitizeCredentialURLs(opts.CloneAddr)
opts.AuthPasswordEncrypted, err = secret.EncryptSecret(setting.SecretKey, opts.AuthPassword)
if err != nil {
return nil, err

Loading…
Cancel
Save