feature: Implement <meta> tag fetcher from backends with allow-listed entries to prevent unwanted keys to pass

This commit is contained in:
WeebDataHoarder
2025-04-27 21:40:59 +02:00
parent 957303bbca
commit 3234c4e801
10 changed files with 280 additions and 30 deletions

View File

@@ -10,17 +10,17 @@ func zilch[T any]() T {
return zero
}
type DecayMap[K, V comparable] struct {
type DecayMap[K comparable, V any] struct {
data map[K]DecayMapEntry[V]
lock sync.RWMutex
}
type DecayMapEntry[V comparable] struct {
type DecayMapEntry[V any] struct {
Value V
expiry time.Time
}
func NewDecayMap[K, V comparable]() *DecayMap[K, V] {
func NewDecayMap[K comparable, V any]() *DecayMap[K, V] {
return &DecayMap[K, V]{
data: make(map[K]DecayMapEntry[V]),
}

54
utils/tagfetcher.go Normal file
View File

@@ -0,0 +1,54 @@
package utils
import (
"golang.org/x/net/html"
"mime"
"net/http"
"net/http/httptest"
"net/url"
)
func FetchTags(backend http.Handler, uri *url.URL, kind string) (result []html.Node) {
writer := httptest.NewRecorder()
backend.ServeHTTP(writer, &http.Request{
Method: http.MethodGet,
URL: uri,
Header: http.Header{
"User-Agent": []string{"Mozilla 5.0 (compatible; go-away/1.0 fetch-tags) TwitterBot/1.0"},
"Accept": []string{"text/html,application/xhtml+xml"},
},
Close: true,
})
response := writer.Result()
if response == nil {
return nil
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return nil
}
if contentType, _, _ := mime.ParseMediaType(response.Header.Get("Content-Type")); contentType != "text/html" && contentType != "application/xhtml+xml" {
return nil
}
//TODO: handle non UTF-8 documents
node, err := html.ParseWithOptions(response.Body, html.ParseOptionEnableScripting(false))
if err != nil {
return nil
}
for n := range node.Descendants() {
if n.Type == html.ElementNode && n.Data == kind {
result = append(result, html.Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Namespace: n.Namespace,
Attr: n.Attr,
})
}
}
return result
}