Implement nested rules and check vs challenge, list policies

This commit is contained in:
WeebDataHoarder
2025-03-31 21:25:07 +02:00
parent 06bc5107d6
commit c0726c2ffb
4 changed files with 162 additions and 64 deletions

13
http.go
View File

@@ -84,6 +84,7 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
}
for _, rule := range state.Rules {
nextRule:
if out, _, err := rule.Program.Eval(env); err != nil {
//TODO error
panic(err)
@@ -93,10 +94,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
default:
panic(fmt.Errorf("unknown action %s", rule.Action))
case PolicyRuleActionPASS:
//fallback, proxy!
state.Backend.ServeHTTP(w, r)
return
case PolicyRuleActionCHALLENGE:
case PolicyRuleActionCHALLENGE, PolicyRuleActionCHECK:
expiry := time.Now().UTC().Add(DefaultValidity).Round(DefaultValidity)
for _, challengeName := range rule.Challenges {
@@ -107,6 +107,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
ClearCookie(CookiePrefix+challengeName, w)
}
} else {
if rule.Action == PolicyRuleActionCHECK {
goto nextRule
}
// we passed the challenge!
//TODO log?
state.Backend.ServeHTTP(w, r)
@@ -125,6 +128,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
case ChallengeResultContinue:
continue
case ChallengeResultPass:
if rule.Action == PolicyRuleActionCHECK {
goto nextRule
}
// we pass the challenge early!
state.Backend.ServeHTTP(w, r)
return
@@ -145,6 +151,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
}
}
}
state.Backend.ServeHTTP(w, r)
return
}
func (state *State) setupRoutes() error {

View File

@@ -40,8 +40,7 @@ func parseCIDROrIP(value string) (net.IPNet, error) {
}
type Policy struct {
// UserAgents map of a list of user-agent regex
UserAgents map[string][]string `yaml:"user-agents"`
// Networks map of networks and prefixes to be loaded
Networks map[string][]PolicyNetwork `yaml:"networks"`
@@ -59,6 +58,7 @@ const (
PolicyRuleActionDENY PolicyRuleAction = "DENY"
PolicyRuleActionBLOCK PolicyRuleAction = "BLOCK"
PolicyRuleActionCHALLENGE PolicyRuleAction = "CHALLENGE"
PolicyRuleActionCHECK PolicyRuleAction = "CHECK"
)
type PolicyRule struct {

View File

@@ -1,22 +1,3 @@
# Define groups of useragents to use later below for matching
user-agents:
default-browser:
- "^Mozilla/"
- "^Opera/"
bad-crawlers:
- "Amazonbot"
headless-browser:
- "HeadlessChrome"
- "HeadlessChromium"
- "^Lightpanda/"
- "^$"
rss:
- "FeedFetcher-Google"
git:
- "^git/"
- "^go-git/"
- "^JGit[/-]"
- "^GoModuleMirror/"
# Define networks to be used later below
networks:
@@ -70,23 +51,6 @@ networks:
regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
conditions:
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- 'headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium")'
- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (headers["Accept-Language"] == "" || headers["Accept-Encoding"] == "")'
is-static-asset:
- 'path == "/robots.txt"'
- 'path == "/favicon.ico"'
- 'path == "/apple-touch-icon.png"'
- 'path == "/apple-touch-icon-precomposed.png"'
- 'path.startsWith("/assets/")'
- 'path.startsWith("/repo-avatars/")'
- 'path.startsWith("/avatars/")'
- 'path.startsWith("/avatar/")'
# todo: define interface
challenges:
js-pow-sha256:
@@ -95,7 +59,7 @@ challenges:
mode: js
asset: load.mjs
parameters:
difficulty: 4
difficulty: 5
runtime:
mode: wasm
# Verify must be under challenges/{name}/runtime/{asset}
@@ -131,15 +95,139 @@ challenges:
http-method: GET
http-code: 200
conditions:
# Conditions will get replaced on rules AST when found as ($condition-name)
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
is-static-asset:
- 'path == "/robots.txt"'
- 'path == "/favicon.ico"'
- 'path == "/apple-touch-icon.png"'
- 'path == "/apple-touch-icon-precomposed.png"'
- 'path.startsWith("/assets/")'
- 'path.startsWith("/repo-avatars/")'
- 'path.startsWith("/avatars/")'
- 'path.startsWith("/avatar/")'
is-git-ua:
- 'userAgent.startsWith("git/")'
- 'userAgent.startsWith("go-git")'
- 'userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")'
# Golang proxy and initial fetch
- 'userAgent.startsWith("GoModuleMirror/")'
- 'userAgent.startsWith("Go-http-client/") && "go-get" in query && query["go-get"] == "1" && (path.matches("^/[^/]+/[^/]+$") || path.matches("^/[^/]+/[^/]+/v[0-9]+$"))'
is-git-path:
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
rules:
- name: blocked-networks
- name: undesired-networks
conditions:
- 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress)'
action: deny
- name: golang-proxy
- name: undesired-crawlers
conditions:
- 'userAgent.startsWith("GoModuleMirror/") || (userAgent.startsWith("Go-http-client/") && query["go-get"] == "1")'
- '($is-headless-chromium)'
- 'userAgent == ""'
- 'userAgent.startsWith("Lightpanda/")'
# Typo'd opera botnet
- 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
# AI bullshit stuff, they do not respect robots.txt even while they read it
- 'userAgent.contains("Amazonbot") || userAgent.contains("Bytespider") || userAgent.contains("ClaudeBot") || userAgent.contains("meta-externalagent/")'
action: deny
- name: suspicious-crawlers
conditions:
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
# Old IE browsers
- 'userAgent.matches("MSIE ([4-9]|10|11)\\.")'
# Old Linux browsers
- 'userAgent.contains("Linux i686")'
# Old Windows browsers
- 'userAgent.matches("Windows (95|98)") || userAgent.matches("Windows NT [1-4]\\.")'
# Old mobile browsers
- 'userAgent.matches("Android [1-9]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
# Old Opera browsers
- 'userAgent.startsWith("Opera/")'
# check to continue below
action: check
challenges: [js-pow-sha256, http-cookie-check]
- name: always-pow-challenge
conditions:
- 'path.startsWith("/user/sign_up") || path.startsWith("/user/login")'
# Match archive downloads from browsers and not tools
- 'path.matches("^/[^/]+/[^/]+/archive/.*\\.(bundle|zip|tar\\.gz)") && (userAgent.startsWith("Opera/") || userAgent.startsWith("Mozilla/"))'
action: challenge
challenges: [js-pow-sha256]
- name: allow-static-resources
conditions:
- '($is-static-asset)'
action: pass
- name: allow-git-operations
conditions:
- '($is-git-path)'
- 'path.matches("^/[^/]+/[^/]+\\.git")'
- 'path.matches("^/[^/]+/[^/]+/") && ($is-git-ua)'
action: pass
- name: sitemap
conditions:
- 'path == "/sitemap.xml" || path.matches("^/explore/(users|repos)/sitemap-[0-9]+\\.xml$")'
action: pass
# TODO: rss
- name: source-download
conditions:
- 'path.matches("^/[^/]+/[^/]+/raw/branch/")'
- 'path.matches("^/[^/]+/[^/]+/archive/")'
- 'path.matches("^/[^/]+/[^/]+/media/")'
action: pass
- name: api-call
conditions:
- 'path.startsWith("/.well-known")'
- 'path.startsWith("/api/v1/") || path.startsWith("/api/forgejo/v1/")'
- 'path.startsWith("/login/oauth/")'
- 'path.startsWith("/captcha/")'
- 'path.startsWith("/metrics/")'
# todo: post only
- 'path == "/-/markup"'
- 'path == "/user/events"'
- 'path == "/ssh_info"'
- 'path == "/api/healthz"'
# user pubkeys
- 'path.matches("^/[^/]+\\.keys$")'
- 'path.matches("^/[^/]+\\.gpg")'
action: pass
- name: preview-fetchers
conditions:
- 'path.endsWith("/-/summary-card")'
- 'userAgent.contains("facebookexternalhit/") || userAgent.contains("Twitterbot/")'
- '"X-Purpose" in headers && headers["X-Purpose"] == "preview"'
action: pass
- name: desired-crawlers
conditions:
- 'userAgent.contains("+https://kagi.com/bot") && inNetwork("kagibot", remoteAddress)'
- 'userAgent.contains("+http://www.google.com/bot.html") && inNetwork("googlebot", remoteAddress)'
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && inNetwork("bingbot", remoteAddress)'
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && inNetwork("duckduckbot", remoteAddress)'
- 'userAgent.contains("+https://help.qwant.com/bot/") && inNetwork("qwantbot", remoteAddress)'
- 'userAgent.contains("+http://yandex.com/bots") && inNetwork("yandexbot", remoteAddress)'
action: pass
- name: homesite
conditions:
- 'path == "/"'
- 'path.matches("(?i)^/(WeebDataHoarder|P2Pool|mirror|git|S\\.O\\.N\\.G|FM10K|Sillycom|pwgen2155|kaitou|metonym)/[^/]+$")'
action: pass
- name: standard-browser

View File

@@ -23,7 +23,6 @@ import (
"net"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
@@ -38,15 +37,12 @@ type State struct {
Networks map[string]cidranger.Ranger
UserAgents map[string][]*regexp.Regexp
WasmRuntime wazero.Runtime
WasmContext context.Context
Challenges map[string]ChallengeState
RulesEnv *cel.Env
Conditions map[string]*cel.Ast
RulesEnv *cel.Env
Rules []RuleState
@@ -59,6 +55,7 @@ type RuleState struct {
Program cel.Program
Action PolicyRuleAction
Continue bool
Challenges []string
}
@@ -100,16 +97,6 @@ func NewState(policy Policy, packagePath string, backend http.Handler) (state *S
state.UrlPath = "/.well-known/." + state.PackagePath
state.Backend = backend
state.UserAgents = make(map[string][]*regexp.Regexp)
for k, v := range policy.UserAgents {
for _, str := range v {
expr, err := regexp.Compile(str)
if err != nil {
return nil, fmt.Errorf("user-agent %s: invalid regex expression %s: %v", k, str, err)
}
state.UserAgents[k] = append(state.UserAgents[k], expr)
}
}
state.Networks = make(map[string]cidranger.Ranger)
for k, network := range policy.Networks {
ranger := cidranger.NewPCTrieRanger()
@@ -459,14 +446,22 @@ func NewState(policy Policy, packagePath string, backend http.Handler) (state *S
return nil, err
}
state.Conditions = make(map[string]*cel.Ast)
var replacements []string
for k, entries := range policy.Conditions {
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, entries...)
if err != nil {
return nil, fmt.Errorf("conditions %s: error compiling conditions: %v", k, err)
}
state.Conditions[k] = ast
cond, err := cel.AstToString(ast)
if err != nil {
return nil, fmt.Errorf("conditions %s: error printing condition: %v", k, err)
}
replacements = append(replacements, fmt.Sprintf("($%s)", k))
replacements = append(replacements, "("+cond+")")
}
conditionReplacer := strings.NewReplacer(replacements...)
for _, rule := range policy.Rules {
r := RuleState{
@@ -475,12 +470,18 @@ func NewState(policy Policy, packagePath string, backend http.Handler) (state *S
Challenges: rule.Challenges,
}
if r.Action == PolicyRuleActionCHALLENGE && len(r.Challenges) == 0 {
if (r.Action == PolicyRuleActionCHALLENGE || r.Action == PolicyRuleActionCHECK) && len(r.Challenges) == 0 {
return nil, fmt.Errorf("no challenges found in rule %s", rule.Name)
}
//TODO: nesting conditions via decorator!
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, rule.Conditions...)
// allow nesting
var conditions []string
for _, cond := range rule.Conditions {
cond = conditionReplacer.Replace(cond)
conditions = append(conditions, cond)
}
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, conditions...)
if err != nil {
return nil, fmt.Errorf("rules %s: error compiling conditions: %v", rule.Name, err)
}