feature: Implement <meta> tag fetcher from backends with allow-listed entries to prevent unwanted keys to pass

This commit is contained in:
WeebDataHoarder
2025-04-27 21:40:59 +02:00
parent 957303bbca
commit 3234c4e801
10 changed files with 280 additions and 30 deletions

View File

@@ -5,12 +5,8 @@
<link rel="stylesheet" href="{{ .Path }}/assets/static/anubis/style.css?cacheBust={{ .Random }}"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<meta name="referrer" content="origin"/>
{{ range $key, $value := .Meta }}
{{ if eq $key "refresh"}}
<meta http-equiv="{{ $key }}" content="{{ $value }}"/>
{{else}}
<meta name="{{ $key }}" content="{{ $value }}"/>
{{end}}
{{ range .Meta }}
<meta {{ range $key, $value := . }}{{ $key | attr }}="{{ $value }}" {{end}}/>
{{ end }}
{{ range .HeaderTags }}
{{ . }}

View File

@@ -1,21 +1,13 @@
<!DOCTYPE html>
{{$theme := "forgejo-auto"}}
{{ if .Theme }}
{{$theme = .Theme}}
{{ end }}
{{$theme := "forgejo-auto"}}{{ if .Theme }}{{$theme = .Theme}}{{ end }}
<html lang="en-US" data-theme="{{ $theme }}">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{{ .Title }}</title>
<meta name="referrer" content="origin">
{{ range $key, $value := .Meta }}
{{ if eq $key "refresh"}}
<meta http-equiv="{{ $key }}" content="{{ $value }}"/>
{{else}}
<meta name="{{ $key }}" content="{{ $value }}"/>
{{end}}
{{ range .Meta }}
<meta {{ range $key, $value := . }}{{ $key | attr }}="{{ $value }}" {{end}}/>
{{ end }}
{{ range .HeaderTags }}
{{ . }}
@@ -80,9 +72,11 @@
</div>
{{end}}
{{if .EndTags }}
<noscript>
{{ .Strings.Get "noscript" }}
{{ .Strings.Get "noscript_warning" }}
</noscript>
{{end}}
<p><small>{{ .Strings.Get "details_contact_admin_with_request_id" }}: <em>{{ .Id }}</em></small></p>
</div>

View File

@@ -287,6 +287,14 @@ rules:
conditions:
- '!(method == "HEAD" || method == "GET")'
# Enable fetching OpenGraph and other tags from backend on these paths
- name: enable-meta-tags
action: context
settings:
context-set:
proxy-meta-tags: "true"
- name: plaintext-browser
action: challenge
settings:

View File

@@ -144,6 +144,11 @@ func (d *RequestData) NetworkPrefix() netip.Addr {
}
}
const (
RequestOptBackendHost = "backend-host"
RequestOptCacheMetaTags = "proxy-meta-tags"
)
func (d *RequestData) SetOpt(n, v string) {
d.opts[n] = v
}
@@ -156,6 +161,31 @@ func (d *RequestData) GetOpt(n, def string) string {
return v
}
func (d *RequestData) GetOptBool(n string, def bool) bool {
v, ok := d.opts[n]
if !ok {
return def
}
switch v {
case "true", "t", "1", "yes", "yep", "y", "ok":
return true
case "false", "f", "0", "no", "nope", "n", "err":
return false
default:
return def
}
}
func (d *RequestData) BackendHost() (http.Handler, string) {
host := d.r.Host
if opt := d.GetOpt(RequestOptBackendHost, ""); opt != "" && opt != host {
host = d.r.Host
}
return d.State.GetBackend(host), host
}
func (d *RequestData) EvaluateChallenges(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
var issuedChallenge string

View File

@@ -47,8 +47,11 @@ func FillRegistration(state challenge.StateInterface, reg *challenge.Registratio
if params.Mode == "meta" {
state.ChallengePage(w, r, state.Settings().ChallengeResponseCode, reg, map[string]any{
"Meta": map[string]string{
"refresh": "0; url=" + uri.String(),
"Meta": []map[string]string{
{
"http-equiv": "refresh",
"content": "0; url=" + uri.String(),
},
},
})
} else {

View File

@@ -8,9 +8,12 @@ import (
"git.gammaspectra.live/git/go-away/lib/challenge"
"git.gammaspectra.live/git/go-away/lib/policy"
"git.gammaspectra.live/git/go-away/utils"
"golang.org/x/net/html"
"log/slog"
"net/http"
"slices"
"strings"
"time"
)
func GetLoggerForRequest(r *http.Request) *slog.Logger {
@@ -35,6 +38,98 @@ func GetLoggerForRequest(r *http.Request) *slog.Logger {
return slog.With(args...)
}
func (state *State) fetchMetaTags(host string, backend http.Handler, r *http.Request) []html.Node {
uri := *r.URL
q := uri.Query()
for k := range q {
if strings.HasPrefix(k, challenge.QueryArgPrefix) {
q.Del(k)
}
}
uri.RawQuery = q.Encode()
key := fmt.Sprintf("%s:%s", host, uri.String())
if v, ok := state.tagCache.Get(key); ok {
return v
}
result := utils.FetchTags(backend, &uri, "meta")
if result == nil {
return nil
}
entries := make([]html.Node, 0, len(result))
safeAttributes := []string{"name", "property", "content"}
for _, n := range result {
if n.Namespace != "" {
continue
}
var name string
for _, attr := range n.Attr {
if attr.Namespace != "" {
continue
}
if attr.Key == "name" {
name = attr.Val
break
}
if attr.Key == "property" && name == "" {
name = attr.Val
}
}
// prevent unwanted keys like CSRF and other internal entries to pass through as much as possible
var keep bool
if strings.HasPrefix("og:", name) || strings.HasPrefix("fb:", name) || strings.HasPrefix("twitter:", name) || strings.HasPrefix("profile:", name) {
// social / OpenGraph tags
keep = true
} else if name == "vcs" || strings.HasPrefix("vcs:", name) {
// source tags
keep = true
} else if name == "forge" || strings.HasPrefix("forge:", name) {
// forge tags
keep = true
} else {
switch name {
// standard content tags
case "application-name", "author", "description", "keywords", "robots", "thumbnail":
keep = true
case "go-import", "go-source":
// golang tags
keep = true
case "apple-itunes-app":
}
}
// prevent other arbitrary arguments
if keep {
newNode := html.Node{
Type: html.ElementNode,
Data: n.Data,
}
for _, attr := range n.Attr {
if attr.Namespace != "" {
continue
}
if slices.Contains(safeAttributes, attr.Key) {
newNode.Attr = append(newNode.Attr, attr)
}
}
if len(newNode.Attr) == 0 {
continue
}
entries = append(entries, newNode)
}
}
state.tagCache.Set(key, entries, time.Hour*6)
return entries
}
func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
host := r.Host
@@ -46,6 +141,19 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
return
}
getBackend := func() http.Handler {
if opt := data.GetOpt(challenge.RequestOptBackendHost, ""); opt != "" && opt != host {
b := state.GetBackend(host)
if b == nil {
http.Error(w, http.StatusText(http.StatusServiceUnavailable), http.StatusServiceUnavailable)
// return empty backend
return http.HandlerFunc(func(http.ResponseWriter, *http.Request) {})
}
return b
}
return backend
}
lg := state.Logger(r)
cleanupRequest := func(r *http.Request, fromChallenge bool) {
@@ -81,7 +189,7 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
for _, rule := range state.rules {
next, err := rule.Evaluate(lg, w, r, func() http.Handler {
cleanupRequest(r, true)
return backend
return getBackend()
})
if err != nil {
state.ErrorPage(w, r, http.StatusInternalServerError, err, "")
@@ -103,7 +211,7 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
r.Header.Set("X-Away-Action", "PASS")
cleanupRequest(r, false)
return backend
return getBackend()
})
}

View File

@@ -12,6 +12,7 @@ import (
"git.gammaspectra.live/git/go-away/utils"
"github.com/google/cel-go/cel"
"github.com/yl2chen/cidranger"
"golang.org/x/net/html"
"log/slog"
"net"
"net/http"
@@ -43,6 +44,8 @@ type State struct {
close chan struct{}
tagCache *utils.DecayMap[string, []html.Node]
Mux *http.ServeMux
}
@@ -231,5 +234,20 @@ func NewState(p policy.Policy, opt settings.Settings, settings policy.StateSetti
return nil, err
}
state.tagCache = utils.NewDecayMap[string, []html.Node]()
go func() {
ticker := time.NewTicker(time.Minute * 37)
defer ticker.Stop()
for {
select {
case <-ticker.C:
state.tagCache.Decay()
case <-state.close:
return
}
}
}()
return state, nil
}

View File

@@ -36,7 +36,14 @@ func init() {
}
func initTemplate(name, data string) error {
tpl := template.New(name)
tpl := template.New(name).Funcs(template.FuncMap{
"attr": func(s string) template.HTMLAttr {
return template.HTMLAttr(s)
},
"safe": func(s string) template.HTML {
return template.HTML(s)
},
})
_, err := tpl.Parse(data)
if err != nil {
return err
@@ -68,6 +75,22 @@ func (state *State) ChallengePage(w http.ResponseWriter, r *http.Request, status
input["Title"] = state.Options().Strings.Get("title_challenge")
}
if data.GetOptBool(challenge.RequestOptCacheMetaTags, false) {
backend, host := data.BackendHost()
if tags := state.fetchMetaTags(host, backend, r); len(tags) > 0 {
tagMap, _ := input["Meta"].([]map[string]string)
for _, tag := range tags {
tagAttrs := make(map[string]string, len(tag.Attr))
for _, v := range tag.Attr {
tagAttrs[v.Key] = v.Val
}
tagMap = append(tagMap, tagAttrs)
}
input["Meta"] = tagMap
}
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
buf := bytes.NewBuffer(make([]byte, 0, 8192))
@@ -103,6 +126,22 @@ func (state *State) ErrorPage(w http.ResponseWriter, r *http.Request, status int
input[k] = v
}
if data.GetOptBool(challenge.RequestOptCacheMetaTags, false) {
backend, host := data.BackendHost()
if tags := state.fetchMetaTags(host, backend, r); len(tags) > 0 {
tagMap, _ := input["Meta"].([]map[string]string)
for _, tag := range tags {
tagAttrs := make(map[string]string, len(tag.Attr))
for _, v := range tag.Attr {
tagAttrs[v.Key] = v.Val
}
tagMap = append(tagMap, tagAttrs)
}
input["Meta"] = tagMap
}
}
err2 := templates["challenge-"+state.Options().ChallengeTemplate+".gohtml"].Execute(buf, input)
if err2 != nil {
// nested errors!

View File

@@ -10,17 +10,17 @@ func zilch[T any]() T {
return zero
}
type DecayMap[K, V comparable] struct {
type DecayMap[K comparable, V any] struct {
data map[K]DecayMapEntry[V]
lock sync.RWMutex
}
type DecayMapEntry[V comparable] struct {
type DecayMapEntry[V any] struct {
Value V
expiry time.Time
}
func NewDecayMap[K, V comparable]() *DecayMap[K, V] {
func NewDecayMap[K comparable, V any]() *DecayMap[K, V] {
return &DecayMap[K, V]{
data: make(map[K]DecayMapEntry[V]),
}

54
utils/tagfetcher.go Normal file
View File

@@ -0,0 +1,54 @@
package utils
import (
"golang.org/x/net/html"
"mime"
"net/http"
"net/http/httptest"
"net/url"
)
func FetchTags(backend http.Handler, uri *url.URL, kind string) (result []html.Node) {
writer := httptest.NewRecorder()
backend.ServeHTTP(writer, &http.Request{
Method: http.MethodGet,
URL: uri,
Header: http.Header{
"User-Agent": []string{"Mozilla 5.0 (compatible; go-away/1.0 fetch-tags) TwitterBot/1.0"},
"Accept": []string{"text/html,application/xhtml+xml"},
},
Close: true,
})
response := writer.Result()
if response == nil {
return nil
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return nil
}
if contentType, _, _ := mime.ParseMediaType(response.Header.Get("Content-Type")); contentType != "text/html" && contentType != "application/xhtml+xml" {
return nil
}
//TODO: handle non UTF-8 documents
node, err := html.ParseWithOptions(response.Body, html.ParseOptionEnableScripting(false))
if err != nil {
return nil
}
for n := range node.Descendants() {
if n.Type == html.ElementNode && n.Data == kind {
result = append(result, html.Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Namespace: n.Namespace,
Attr: n.Attr,
})
}
}
return result
}