Merge pull request #5998 from thaJeztah/lazy_regexp

use lazyregexp to compile regexes on first use
This commit is contained in:
Sebastiaan van Stijn 2025-04-11 12:29:53 +02:00 committed by GitHub
commit e578f156c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 184 additions and 34 deletions

View File

@ -7,6 +7,7 @@ linters:
- dupword # Detects duplicate words.
- durationcheck
- errchkjson
- forbidigo
- gocritic # Metalinter; detects bugs, performance, and styling issues.
- gocyclo
- gofumpt # Detects whether code was gofumpt-ed.
@ -66,6 +67,11 @@ linters-settings:
desc: Use github.com/google/uuid instead.
- pkg: "io/ioutil"
desc: The io/ioutil package has been deprecated, see https://go.dev/doc/go1.16#ioutil
forbidigo:
forbid:
- pkg: ^regexp$
p: ^regexp\.MustCompile
msg: Use internal/lazyregexp.New instead.
gocyclo:
min-complexity: 16
gosec:

View File

@ -8,14 +8,14 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"github.com/docker/cli/cli-plugins/metadata"
"github.com/docker/cli/internal/lazyregexp"
"github.com/spf13/cobra"
)
var pluginNameRe = regexp.MustCompile("^[a-z][a-z0-9]*$")
var pluginNameRe = lazyregexp.New("^[a-z][a-z0-9]*$")
// Plugin represents a potential plugin with all it's metadata.
type Plugin struct {

View File

@ -8,12 +8,12 @@ import (
"path"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
"time"
"github.com/docker/cli/cli/compose/loader"
"github.com/docker/cli/internal/lazyregexp"
"github.com/docker/cli/opts"
"github.com/docker/docker/api/types/container"
mounttypes "github.com/docker/docker/api/types/mount"
@ -40,7 +40,7 @@ const (
seccompProfileUnconfined = "unconfined"
)
var deviceCgroupRuleRegexp = regexp.MustCompile(`^[acb] ([0-9]+|\*):([0-9]+|\*) [rwm]{1,3}$`)
var deviceCgroupRuleRegexp = lazyregexp.New(`^[acb] ([0-9]+|\*):([0-9]+|\*) [rwm]{1,3}$`)
// containerOptions is a data object with all the options for creating a container
type containerOptions struct {

View File

@ -10,7 +10,6 @@ import (
"io"
"os"
"path/filepath"
"regexp"
"runtime"
"strings"
@ -23,6 +22,7 @@ import (
"github.com/docker/cli/cli/internal/jsonstream"
"github.com/docker/cli/cli/streams"
"github.com/docker/cli/cli/trust"
"github.com/docker/cli/internal/lazyregexp"
"github.com/docker/cli/opts"
"github.com/docker/docker/api"
"github.com/docker/docker/api/types"
@ -432,7 +432,7 @@ func validateTag(rawRepo string) (string, error) {
return rawRepo, nil
}
var dockerfileFromLinePattern = regexp.MustCompile(`(?i)^[\s]*FROM[ \f\r\t\v]+(?P<image>[^ \f\r\t\v\n#]+)`)
var dockerfileFromLinePattern = lazyregexp.New(`(?i)^[\s]*FROM[ \f\r\t\v]+(?P<image>[^ \f\r\t\v\n#]+)`)
// resolvedTag records the repository, tag, and resolved digest reference
// from a Dockerfile rewrite.

View File

@ -8,7 +8,6 @@ import (
"errors"
"fmt"
"io"
"regexp"
"sort"
"strings"
@ -19,6 +18,7 @@ import (
"github.com/docker/cli/cli/command/formatter"
"github.com/docker/cli/cli/debug"
flagsHelper "github.com/docker/cli/cli/flags"
"github.com/docker/cli/internal/lazyregexp"
"github.com/docker/cli/templates"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/api/types/system"
@ -142,7 +142,7 @@ func addServerInfo(ctx context.Context, dockerCli command.Cli, format string, in
// placeHolders does a rudimentary match for possible placeholders in a
// template, matching a '.', followed by an letter (a-z/A-Z).
var placeHolders = regexp.MustCompile(`\.[a-zA-Z]`)
var placeHolders = lazyregexp.New(`\.[a-zA-Z]`)
// needsServerInfo detects if the given template uses any server information.
// If only client-side information is used in the template, we can skip

View File

@ -5,12 +5,12 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/cli/trust"
"github.com/docker/cli/internal/lazyregexp"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/theupdateframework/notary"
@ -41,7 +41,7 @@ func newKeyGenerateCommand(dockerCli command.Streams) *cobra.Command {
}
// key names can use lowercase alphanumeric + _ + - characters
var validKeyName = regexp.MustCompile(`^[a-z0-9][a-z0-9\_\-]*$`).MatchString
var validKeyName = lazyregexp.New(`^[a-z0-9][a-z0-9\_\-]*$`).MatchString
// validate that all of the key names are unique and are alphanumeric + _ + -
// and that we do not already have public key files in the target dir on disk

View File

@ -6,13 +6,13 @@ import (
"io"
"os"
"path"
"regexp"
"strings"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
"github.com/docker/cli/cli/command/image"
"github.com/docker/cli/cli/trust"
"github.com/docker/cli/internal/lazyregexp"
"github.com/docker/cli/opts"
"github.com/pkg/errors"
"github.com/spf13/cobra"
@ -45,7 +45,7 @@ func newSignerAddCommand(dockerCLI command.Cli) *cobra.Command {
return cmd
}
var validSignerName = regexp.MustCompile(`^[a-z0-9][a-z0-9\_\-]*$`).MatchString
var validSignerName = lazyregexp.New(`^[a-z0-9][a-z0-9\_\-]*$`).MatchString
func addSigner(ctx context.Context, dockerCLI command.Cli, options signerAddOptions) error {
signerName := options.signer

View File

@ -7,6 +7,8 @@ import (
"fmt"
"regexp"
"strings"
"github.com/docker/cli/internal/lazyregexp"
)
const (
@ -14,11 +16,21 @@ const (
subst = "[_a-z][_a-z0-9]*(?::?[-?][^}]*)?"
)
var defaultPattern = regexp.MustCompile(fmt.Sprintf(
var defaultPattern = lazyregexp.New(fmt.Sprintf(
"%s(?i:(?P<escaped>%s)|(?P<named>%s)|{(?P<braced>%s)}|(?P<invalid>))",
delimiter, delimiter, subst, subst,
))
// regexper is an internal interface to allow passing a [lazyregexp.Regexp]
// in places where a custom ("regular") [regexp.Regexp] is accepted. It defines
// only the methods we currently use.
type regexper interface {
FindAllStringSubmatch(s string, n int) [][]string
FindStringSubmatch(s string) []string
ReplaceAllStringFunc(src string, repl func(string) string) string
SubexpNames() []string
}
// DefaultSubstituteFuncs contains the default SubstituteFunc used by the docker cli
var DefaultSubstituteFuncs = []SubstituteFunc{
softDefault,
@ -51,10 +63,16 @@ type SubstituteFunc func(string, Mapping) (string, bool, error)
// SubstituteWith substitutes variables in the string with their values.
// It accepts additional substitute function.
func SubstituteWith(template string, mapping Mapping, pattern *regexp.Regexp, subsFuncs ...SubstituteFunc) (string, error) {
return substituteWith(template, mapping, pattern, subsFuncs...)
}
// SubstituteWith substitutes variables in the string with their values.
// It accepts additional substitute function.
func substituteWith(template string, mapping Mapping, pattern regexper, subsFuncs ...SubstituteFunc) (string, error) {
var err error
result := pattern.ReplaceAllStringFunc(template, func(substring string) string {
matches := pattern.FindStringSubmatch(substring)
groups := matchGroups(matches, pattern)
groups := matchGroups(matches, defaultPattern)
if escaped := groups["escaped"]; escaped != "" {
return escaped
}
@ -93,38 +111,42 @@ func SubstituteWith(template string, mapping Mapping, pattern *regexp.Regexp, su
// Substitute variables in the string with their values
func Substitute(template string, mapping Mapping) (string, error) {
return SubstituteWith(template, mapping, defaultPattern, DefaultSubstituteFuncs...)
return substituteWith(template, mapping, defaultPattern, DefaultSubstituteFuncs...)
}
// ExtractVariables returns a map of all the variables defined in the specified
// composefile (dict representation) and their default value if any.
func ExtractVariables(configDict map[string]any, pattern *regexp.Regexp) map[string]string {
return extractVariables(configDict, pattern)
}
func extractVariables(configDict map[string]any, pattern regexper) map[string]string {
if pattern == nil {
pattern = defaultPattern
}
return recurseExtract(configDict, pattern)
}
func recurseExtract(value any, pattern *regexp.Regexp) map[string]string {
func recurseExtract(value any, pattern regexper) map[string]string {
m := map[string]string{}
switch value := value.(type) {
switch val := value.(type) {
case string:
if values, is := extractVariable(value, pattern); is {
if values, is := extractVariable(val, pattern); is {
for _, v := range values {
m[v.name] = v.value
}
}
case map[string]any:
for _, elem := range value {
for _, elem := range val {
submap := recurseExtract(elem, pattern)
for key, value := range submap {
m[key] = value
for k, v := range submap {
m[k] = v
}
}
case []any:
for _, elem := range value {
for _, elem := range val {
if values, is := extractVariable(elem, pattern); is {
for _, v := range values {
m[v.name] = v.value
@ -141,7 +163,7 @@ type extractedValue struct {
value string
}
func extractVariable(value any, pattern *regexp.Regexp) ([]extractedValue, bool) {
func extractVariable(value any, pattern regexper) ([]extractedValue, bool) {
sValue, ok := value.(string)
if !ok {
return []extractedValue{}, false
@ -227,7 +249,7 @@ func withRequired(substitution string, mapping Mapping, sep string, valid func(s
return value, true, nil
}
func matchGroups(matches []string, pattern *regexp.Regexp) map[string]string {
func matchGroups(matches []string, pattern regexper) map[string]string {
groups := make(map[string]string)
for i, name := range pattern.SubexpNames()[1:] {
groups[name] = matches[i+1]

View File

@ -169,15 +169,15 @@ func TestSubstituteWithCustomFunc(t *testing.T) {
return value, true, nil
}
result, err := SubstituteWith("ok ${FOO}", defaultMapping, defaultPattern, errIsMissing)
result, err := substituteWith("ok ${FOO}", defaultMapping, defaultPattern, errIsMissing)
assert.NilError(t, err)
assert.Check(t, is.Equal("ok first", result))
result, err = SubstituteWith("ok ${BAR}", defaultMapping, defaultPattern, errIsMissing)
result, err = substituteWith("ok ${BAR}", defaultMapping, defaultPattern, errIsMissing)
assert.NilError(t, err)
assert.Check(t, is.Equal("ok ", result))
_, err = SubstituteWith("ok ${NOTHERE}", defaultMapping, defaultPattern, errIsMissing)
_, err = substituteWith("ok ${NOTHERE}", defaultMapping, defaultPattern, errIsMissing)
assert.Check(t, is.ErrorContains(err, "required variable"))
}
@ -278,7 +278,7 @@ func TestExtractVariables(t *testing.T) {
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actual := ExtractVariables(tc.dict, defaultPattern)
actual := extractVariables(tc.dict, defaultPattern)
assert.Check(t, is.DeepEqual(actual, tc.expected))
})
}

View File

@ -14,9 +14,9 @@ import (
"net/http"
"path"
"path/filepath"
"regexp"
"strings"
"github.com/docker/cli/internal/lazyregexp"
"github.com/docker/docker/errdefs"
"github.com/opencontainers/go-digest"
"github.com/pkg/errors"
@ -24,7 +24,7 @@ import (
const restrictedNamePattern = "^[a-zA-Z0-9][a-zA-Z0-9_.+-]+$"
var restrictedNameRegEx = regexp.MustCompile(restrictedNamePattern)
var restrictedNameRegEx = lazyregexp.New(restrictedNamePattern)
// Store provides a context store for easily remembering endpoints configuration
type Store interface {

View File

@ -42,7 +42,8 @@ func TestGlobalHelp(t *testing.T) {
`Invalid Plugins:`,
`\s+badmeta\s+invalid metadata: invalid character 'i' looking for beginning of object key string`,
} {
expected := regexp.MustCompile(`(?m)^` + s + `$`)
expected, err := regexp.Compile(`(?m)^` + s + `$`)
assert.NilError(t, err)
matches := expected.FindAllString(output, -1)
assert.Equal(t, len(matches), 1, "Did not find expected number of matches for %q in `docker help` output", expected)
}

View File

@ -0,0 +1,98 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code below was largely copied from golang.org/x/mod@v0.22;
// https://github.com/golang/mod/blob/v0.22.0/internal/lazyregexp/lazyre.go
// with some additional methods added.
// Package lazyregexp is a thin wrapper over regexp, allowing the use of global
// regexp variables without forcing them to be compiled at init.
package lazyregexp
import (
"os"
"regexp"
"strings"
"sync"
)
// Regexp is a wrapper around [regexp.Regexp], where the underlying regexp will be
// compiled the first time it is needed.
type Regexp struct {
str string
once sync.Once
rx *regexp.Regexp
}
func (r *Regexp) re() *regexp.Regexp {
r.once.Do(r.build)
return r.rx
}
func (r *Regexp) build() {
r.rx = regexp.MustCompile(r.str)
r.str = ""
}
func (r *Regexp) FindSubmatch(s []byte) [][]byte {
return r.re().FindSubmatch(s)
}
func (r *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
return r.re().FindAllStringSubmatch(s, n)
}
func (r *Regexp) FindStringSubmatch(s string) []string {
return r.re().FindStringSubmatch(s)
}
func (r *Regexp) FindStringSubmatchIndex(s string) []int {
return r.re().FindStringSubmatchIndex(s)
}
func (r *Regexp) ReplaceAllString(src, repl string) string {
return r.re().ReplaceAllString(src, repl)
}
func (r *Regexp) FindString(s string) string {
return r.re().FindString(s)
}
func (r *Regexp) FindAllString(s string, n int) []string {
return r.re().FindAllString(s, n)
}
func (r *Regexp) MatchString(s string) bool {
return r.re().MatchString(s)
}
func (r *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
return r.re().ReplaceAllStringFunc(src, repl)
}
func (r *Regexp) ReplaceAllLiteralString(src, repl string) string {
return r.re().ReplaceAllLiteralString(src, repl)
}
func (r *Regexp) String() string {
return r.re().String()
}
func (r *Regexp) SubexpNames() []string {
return r.re().SubexpNames()
}
var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
// New creates a new lazy regexp, delaying the compiling work until it is first
// needed. If the code is being run as part of tests, the regexp compiling will
// happen immediately.
func New(str string) *Regexp {
lr := &Regexp{str: str}
if inTest {
// In tests, always compile the regexps early.
lr.re()
}
return lr
}

View File

@ -0,0 +1,23 @@
package lazyregexp
import (
"testing"
)
func TestCompileOnce(t *testing.T) {
t.Run("invalid regexp", func(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Errorf("expected a panic")
}
}()
_ = New("[")
})
t.Run("valid regexp", func(t *testing.T) {
re := New("[a-z]")
ok := re.MatchString("hello")
if !ok {
t.Errorf("expected a match")
}
})
}

View File

@ -6,16 +6,16 @@ import (
"math/big"
"net"
"path"
"regexp"
"strings"
"github.com/docker/cli/internal/lazyregexp"
"github.com/docker/docker/api/types/filters"
"github.com/docker/go-units"
)
var (
alphaRegexp = regexp.MustCompile(`[a-zA-Z]`)
domainRegexp = regexp.MustCompile(`^(:?(:?[a-zA-Z0-9]|(:?[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9]))(:?\.(:?[a-zA-Z0-9]|(:?[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])))*)\.?\s*$`)
alphaRegexp = lazyregexp.New(`[a-zA-Z]`)
domainRegexp = lazyregexp.New(`^(:?(:?[a-zA-Z0-9]|(:?[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9]))(:?\.(:?[a-zA-Z0-9]|(:?[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])))*)\.?\s*$`)
)
// ListOpts holds a list of values and a validation function.