From 3234c4e8018be0fea97553c087f20c2c047d0a87 Mon Sep 17 00:00:00 2001 From: WeebDataHoarder Date: Sun, 27 Apr 2025 21:40:59 +0200 Subject: [PATCH] feature: Implement tag fetcher from backends with allow-listed entries to prevent unwanted keys to pass --- embed/templates/challenge-anubis.gohtml | 10 +- embed/templates/challenge-forgejo.gohtml | 24 ++--- examples/forgejo.yml | 8 ++ lib/challenge/data.go | 30 ++++++ lib/challenge/refresh/refresh.go | 7 +- lib/http.go | 112 ++++++++++++++++++++++- lib/state.go | 18 ++++ lib/template.go | 41 ++++++++- utils/decaymap.go | 6 +- utils/tagfetcher.go | 54 +++++++++++ 10 files changed, 280 insertions(+), 30 deletions(-) create mode 100644 utils/tagfetcher.go diff --git a/embed/templates/challenge-anubis.gohtml b/embed/templates/challenge-anubis.gohtml index c086e2a..ca03d94 100644 --- a/embed/templates/challenge-anubis.gohtml +++ b/embed/templates/challenge-anubis.gohtml @@ -5,15 +5,11 @@ - {{ range $key, $value := .Meta }} - {{ if eq $key "refresh"}} - - {{else}} - - {{end}} + {{ range .Meta }} + {{ end }} {{ range .HeaderTags }} - {{ . }} + {{ . }} {{ end }} diff --git a/embed/templates/challenge-forgejo.gohtml b/embed/templates/challenge-forgejo.gohtml index 0dafdbf..ffc6739 100644 --- a/embed/templates/challenge-forgejo.gohtml +++ b/embed/templates/challenge-forgejo.gohtml @@ -1,24 +1,16 @@ -{{$theme := "forgejo-auto"}} -{{ if .Theme }} - {{$theme = .Theme}} -{{ end }} +{{$theme := "forgejo-auto"}}{{ if .Theme }}{{$theme = .Theme}}{{ end }} {{ .Title }} - - {{ range $key, $value := .Meta }} - {{ if eq $key "refresh"}} - - {{else}} - - {{end}} + {{ range .Meta }} + {{ end }} {{ range .HeaderTags }} - {{ . }} + {{ . }} {{ end }} @@ -80,9 +72,11 @@ {{end}} - + {{if .EndTags }} + + {{end}}

{{ .Strings.Get "details_contact_admin_with_request_id" }}: {{ .Id }}

diff --git a/examples/forgejo.yml b/examples/forgejo.yml index 5a9dec7..3e430c6 100644 --- a/examples/forgejo.yml +++ b/examples/forgejo.yml @@ -287,6 +287,14 @@ rules: conditions: - '!(method == "HEAD" || method == "GET")' + # Enable fetching OpenGraph and other tags from backend on these paths + - name: enable-meta-tags + action: context + settings: + context-set: + proxy-meta-tags: "true" + + - name: plaintext-browser action: challenge settings: diff --git a/lib/challenge/data.go b/lib/challenge/data.go index b4c1841..82749a8 100644 --- a/lib/challenge/data.go +++ b/lib/challenge/data.go @@ -144,6 +144,11 @@ func (d *RequestData) NetworkPrefix() netip.Addr { } } +const ( + RequestOptBackendHost = "backend-host" + RequestOptCacheMetaTags = "proxy-meta-tags" +) + func (d *RequestData) SetOpt(n, v string) { d.opts[n] = v } @@ -156,6 +161,31 @@ func (d *RequestData) GetOpt(n, def string) string { return v } +func (d *RequestData) GetOptBool(n string, def bool) bool { + v, ok := d.opts[n] + if !ok { + return def + } + switch v { + case "true", "t", "1", "yes", "yep", "y", "ok": + return true + case "false", "f", "0", "no", "nope", "n", "err": + return false + default: + return def + } +} + +func (d *RequestData) BackendHost() (http.Handler, string) { + host := d.r.Host + + if opt := d.GetOpt(RequestOptBackendHost, ""); opt != "" && opt != host { + host = d.r.Host + } + + return d.State.GetBackend(host), host +} + func (d *RequestData) EvaluateChallenges(w http.ResponseWriter, r *http.Request) { q := r.URL.Query() var issuedChallenge string diff --git a/lib/challenge/refresh/refresh.go b/lib/challenge/refresh/refresh.go index 2feca9a..a8ec6a5 100644 --- a/lib/challenge/refresh/refresh.go +++ b/lib/challenge/refresh/refresh.go @@ -47,8 +47,11 @@ func FillRegistration(state challenge.StateInterface, reg *challenge.Registratio if params.Mode == "meta" { state.ChallengePage(w, r, state.Settings().ChallengeResponseCode, reg, map[string]any{ - "Meta": map[string]string{ - "refresh": "0; url=" + uri.String(), + "Meta": []map[string]string{ + { + "http-equiv": "refresh", + "content": "0; url=" + uri.String(), + }, }, }) } else { diff --git a/lib/http.go b/lib/http.go index eace837..9bfaff4 100644 --- a/lib/http.go +++ b/lib/http.go @@ -8,9 +8,12 @@ import ( "git.gammaspectra.live/git/go-away/lib/challenge" "git.gammaspectra.live/git/go-away/lib/policy" "git.gammaspectra.live/git/go-away/utils" + "golang.org/x/net/html" "log/slog" "net/http" + "slices" "strings" + "time" ) func GetLoggerForRequest(r *http.Request) *slog.Logger { @@ -35,6 +38,98 @@ func GetLoggerForRequest(r *http.Request) *slog.Logger { return slog.With(args...) } +func (state *State) fetchMetaTags(host string, backend http.Handler, r *http.Request) []html.Node { + uri := *r.URL + q := uri.Query() + for k := range q { + if strings.HasPrefix(k, challenge.QueryArgPrefix) { + q.Del(k) + } + } + uri.RawQuery = q.Encode() + + key := fmt.Sprintf("%s:%s", host, uri.String()) + + if v, ok := state.tagCache.Get(key); ok { + return v + } + + result := utils.FetchTags(backend, &uri, "meta") + if result == nil { + return nil + } + + entries := make([]html.Node, 0, len(result)) + + safeAttributes := []string{"name", "property", "content"} + for _, n := range result { + if n.Namespace != "" { + continue + } + + var name string + for _, attr := range n.Attr { + if attr.Namespace != "" { + continue + } + if attr.Key == "name" { + name = attr.Val + break + } + if attr.Key == "property" && name == "" { + name = attr.Val + } + } + + // prevent unwanted keys like CSRF and other internal entries to pass through as much as possible + + var keep bool + if strings.HasPrefix("og:", name) || strings.HasPrefix("fb:", name) || strings.HasPrefix("twitter:", name) || strings.HasPrefix("profile:", name) { + // social / OpenGraph tags + keep = true + } else if name == "vcs" || strings.HasPrefix("vcs:", name) { + // source tags + keep = true + } else if name == "forge" || strings.HasPrefix("forge:", name) { + // forge tags + keep = true + } else { + switch name { + // standard content tags + case "application-name", "author", "description", "keywords", "robots", "thumbnail": + keep = true + case "go-import", "go-source": + // golang tags + keep = true + case "apple-itunes-app": + } + } + + // prevent other arbitrary arguments + if keep { + newNode := html.Node{ + Type: html.ElementNode, + Data: n.Data, + } + for _, attr := range n.Attr { + if attr.Namespace != "" { + continue + } + if slices.Contains(safeAttributes, attr.Key) { + newNode.Attr = append(newNode.Attr, attr) + } + } + if len(newNode.Attr) == 0 { + continue + } + entries = append(entries, newNode) + } + } + + state.tagCache.Set(key, entries, time.Hour*6) + return entries +} + func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) { host := r.Host @@ -46,6 +141,19 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) { return } + getBackend := func() http.Handler { + if opt := data.GetOpt(challenge.RequestOptBackendHost, ""); opt != "" && opt != host { + b := state.GetBackend(host) + if b == nil { + http.Error(w, http.StatusText(http.StatusServiceUnavailable), http.StatusServiceUnavailable) + // return empty backend + return http.HandlerFunc(func(http.ResponseWriter, *http.Request) {}) + } + return b + } + return backend + } + lg := state.Logger(r) cleanupRequest := func(r *http.Request, fromChallenge bool) { @@ -81,7 +189,7 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) { for _, rule := range state.rules { next, err := rule.Evaluate(lg, w, r, func() http.Handler { cleanupRequest(r, true) - return backend + return getBackend() }) if err != nil { state.ErrorPage(w, r, http.StatusInternalServerError, err, "") @@ -103,7 +211,7 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) { r.Header.Set("X-Away-Action", "PASS") cleanupRequest(r, false) - return backend + return getBackend() }) } diff --git a/lib/state.go b/lib/state.go index 49d6c06..d688125 100644 --- a/lib/state.go +++ b/lib/state.go @@ -12,6 +12,7 @@ import ( "git.gammaspectra.live/git/go-away/utils" "github.com/google/cel-go/cel" "github.com/yl2chen/cidranger" + "golang.org/x/net/html" "log/slog" "net" "net/http" @@ -43,6 +44,8 @@ type State struct { close chan struct{} + tagCache *utils.DecayMap[string, []html.Node] + Mux *http.ServeMux } @@ -231,5 +234,20 @@ func NewState(p policy.Policy, opt settings.Settings, settings policy.StateSetti return nil, err } + state.tagCache = utils.NewDecayMap[string, []html.Node]() + + go func() { + ticker := time.NewTicker(time.Minute * 37) + defer ticker.Stop() + for { + select { + case <-ticker.C: + state.tagCache.Decay() + case <-state.close: + return + } + } + }() + return state, nil } diff --git a/lib/template.go b/lib/template.go index c21d708..93243ed 100644 --- a/lib/template.go +++ b/lib/template.go @@ -36,7 +36,14 @@ func init() { } func initTemplate(name, data string) error { - tpl := template.New(name) + tpl := template.New(name).Funcs(template.FuncMap{ + "attr": func(s string) template.HTMLAttr { + return template.HTMLAttr(s) + }, + "safe": func(s string) template.HTML { + return template.HTML(s) + }, + }) _, err := tpl.Parse(data) if err != nil { return err @@ -68,6 +75,22 @@ func (state *State) ChallengePage(w http.ResponseWriter, r *http.Request, status input["Title"] = state.Options().Strings.Get("title_challenge") } + if data.GetOptBool(challenge.RequestOptCacheMetaTags, false) { + backend, host := data.BackendHost() + if tags := state.fetchMetaTags(host, backend, r); len(tags) > 0 { + tagMap, _ := input["Meta"].([]map[string]string) + + for _, tag := range tags { + tagAttrs := make(map[string]string, len(tag.Attr)) + for _, v := range tag.Attr { + tagAttrs[v.Key] = v.Val + } + tagMap = append(tagMap, tagAttrs) + } + input["Meta"] = tagMap + } + } + w.Header().Set("Content-Type", "text/html; charset=utf-8") buf := bytes.NewBuffer(make([]byte, 0, 8192)) @@ -103,6 +126,22 @@ func (state *State) ErrorPage(w http.ResponseWriter, r *http.Request, status int input[k] = v } + if data.GetOptBool(challenge.RequestOptCacheMetaTags, false) { + backend, host := data.BackendHost() + if tags := state.fetchMetaTags(host, backend, r); len(tags) > 0 { + tagMap, _ := input["Meta"].([]map[string]string) + + for _, tag := range tags { + tagAttrs := make(map[string]string, len(tag.Attr)) + for _, v := range tag.Attr { + tagAttrs[v.Key] = v.Val + } + tagMap = append(tagMap, tagAttrs) + } + input["Meta"] = tagMap + } + } + err2 := templates["challenge-"+state.Options().ChallengeTemplate+".gohtml"].Execute(buf, input) if err2 != nil { // nested errors! diff --git a/utils/decaymap.go b/utils/decaymap.go index f712bdb..4f9b11f 100644 --- a/utils/decaymap.go +++ b/utils/decaymap.go @@ -10,17 +10,17 @@ func zilch[T any]() T { return zero } -type DecayMap[K, V comparable] struct { +type DecayMap[K comparable, V any] struct { data map[K]DecayMapEntry[V] lock sync.RWMutex } -type DecayMapEntry[V comparable] struct { +type DecayMapEntry[V any] struct { Value V expiry time.Time } -func NewDecayMap[K, V comparable]() *DecayMap[K, V] { +func NewDecayMap[K comparable, V any]() *DecayMap[K, V] { return &DecayMap[K, V]{ data: make(map[K]DecayMapEntry[V]), } diff --git a/utils/tagfetcher.go b/utils/tagfetcher.go new file mode 100644 index 0000000..6ee90bd --- /dev/null +++ b/utils/tagfetcher.go @@ -0,0 +1,54 @@ +package utils + +import ( + "golang.org/x/net/html" + "mime" + "net/http" + "net/http/httptest" + "net/url" +) + +func FetchTags(backend http.Handler, uri *url.URL, kind string) (result []html.Node) { + writer := httptest.NewRecorder() + backend.ServeHTTP(writer, &http.Request{ + Method: http.MethodGet, + URL: uri, + Header: http.Header{ + "User-Agent": []string{"Mozilla 5.0 (compatible; go-away/1.0 fetch-tags) TwitterBot/1.0"}, + "Accept": []string{"text/html,application/xhtml+xml"}, + }, + Close: true, + }) + response := writer.Result() + if response == nil { + return nil + } + defer response.Body.Close() + if response.StatusCode != http.StatusOK { + return nil + } + + if contentType, _, _ := mime.ParseMediaType(response.Header.Get("Content-Type")); contentType != "text/html" && contentType != "application/xhtml+xml" { + return nil + } + + //TODO: handle non UTF-8 documents + node, err := html.ParseWithOptions(response.Body, html.ParseOptionEnableScripting(false)) + if err != nil { + return nil + } + + for n := range node.Descendants() { + if n.Type == html.ElementNode && n.Data == kind { + result = append(result, html.Node{ + Type: n.Type, + DataAtom: n.DataAtom, + Data: n.Data, + Namespace: n.Namespace, + Attr: n.Attr, + }) + } + } + + return result +}