Implement policy snippets

This commit is contained in:
WeebDataHoarder
2025-04-23 17:16:26 +02:00
parent d83fe3653a
commit 3b11792594
18 changed files with 343 additions and 349 deletions

View File

@@ -39,6 +39,7 @@ ENV GOAWAY_BIND=":8080"
ENV GOAWAY_BIND_NETWORK="tcp" ENV GOAWAY_BIND_NETWORK="tcp"
ENV GOAWAY_SOCKET_MODE="0770" ENV GOAWAY_SOCKET_MODE="0770"
ENV GOAWAY_POLICY="/policy.yml" ENV GOAWAY_POLICY="/policy.yml"
ENV GOAWAY_POLICY_SNIPPETS="/policy/snippets"
ENV GOAWAY_CHALLENGE_TEMPLATE="anubis" ENV GOAWAY_CHALLENGE_TEMPLATE="anubis"
ENV GOAWAY_CHALLENGE_TEMPLATE_THEME="" ENV GOAWAY_CHALLENGE_TEMPLATE_THEME=""
ENV GOAWAY_SLOG_LEVEL="WARN" ENV GOAWAY_SLOG_LEVEL="WARN"
@@ -56,7 +57,8 @@ EXPOSE 8080/udp
ENV JWT_PRIVATE_KEY_SEED="${GOAWAY_JWT_PRIVATE_KEY_SEED}" ENV JWT_PRIVATE_KEY_SEED="${GOAWAY_JWT_PRIVATE_KEY_SEED}"
ENTRYPOINT /bin/go-away --bind "${GOAWAY_BIND}" --bind-network "${GOAWAY_BIND_NETWORK}" --socket-mode "${GOAWAY_SOCKET_MODE}" \ ENTRYPOINT /bin/go-away --bind "${GOAWAY_BIND}" --bind-network "${GOAWAY_BIND_NETWORK}" --socket-mode "${GOAWAY_SOCKET_MODE}" \
--policy ${GOAWAY_POLICY} --client-ip-header "${GOAWAY_CLIENT_IP_HEADER}" --backend-ip-header "${GOAWAY_BACKEND_IP_HEADER}" \ --policy "${GOAWAY_POLICY}" --policy-snippets "${GOAWAY_POLICY_SNIPPETS}" \
--client-ip-header "${GOAWAY_CLIENT_IP_HEADER}" --backend-ip-header "${GOAWAY_BACKEND_IP_HEADER}" \
--cache "${GOAWAY_CACHE}" \ --cache "${GOAWAY_CACHE}" \
--dnsbl "${GOAWAY_DNSBL}" \ --dnsbl "${GOAWAY_DNSBL}" \
--challenge-template "${GOAWAY_CHALLENGE_TEMPLATE}" --challenge-template-theme "${GOAWAY_CHALLENGE_TEMPLATE_THEME}" \ --challenge-template "${GOAWAY_CHALLENGE_TEMPLATE}" --challenge-template-theme "${GOAWAY_CHALLENGE_TEMPLATE_THEME}" \

View File

@@ -357,6 +357,7 @@ services:
volumes: volumes:
- "goaway_cache:/cache" - "goaway_cache:/cache"
- "./examples/forgejo.yml:/policy.yml:ro" - "./examples/forgejo.yml:/policy.yml:ro"
- "./examples/snippets/:/policy/snippets/:ro"
environment: environment:
#GOAWAY_BIND: ":8080" #GOAWAY_BIND: ":8080"
# Supported tcp, unix, and proxy (for enabling PROXY module for request unwrapping) # Supported tcp, unix, and proxy (for enabling PROXY module for request unwrapping)
@@ -391,6 +392,8 @@ services:
#GOAWAY_BACKEND_IP_HEADER: "" #GOAWAY_BACKEND_IP_HEADER: ""
GOAWAY_POLICY: "/policy.yml" GOAWAY_POLICY: "/policy.yml"
GOAWAY_POLICY_SNIPPETS: "/policy/snippets"
# Template, and theme for the template to pick. defaults to an anubis-like one # Template, and theme for the template to pick. defaults to an anubis-like one
# An file path can be specified. See embed/templates for a few examples # An file path can be specified. See embed/templates for a few examples

View File

@@ -1,6 +1,7 @@
package main package main
import ( import (
"bytes"
"context" "context"
"crypto/ed25519" "crypto/ed25519"
"crypto/rand" "crypto/rand"
@@ -12,13 +13,11 @@ import (
"git.gammaspectra.live/git/go-away/lib" "git.gammaspectra.live/git/go-away/lib"
"git.gammaspectra.live/git/go-away/lib/policy" "git.gammaspectra.live/git/go-away/lib/policy"
"git.gammaspectra.live/git/go-away/utils" "git.gammaspectra.live/git/go-away/utils"
"github.com/goccy/go-yaml"
"github.com/pires/go-proxyproto" "github.com/pires/go-proxyproto"
"golang.org/x/crypto/acme" "golang.org/x/crypto/acme"
"golang.org/x/crypto/acme/autocert" "golang.org/x/crypto/acme/autocert"
"log" "log"
"log/slog" "log/slog"
"maps"
"net" "net"
"net/http" "net/http"
"os" "os"
@@ -137,6 +136,7 @@ func main() {
cachePath := flag.String("cache", path.Join(os.TempDir(), "go_away_cache"), "path to temporary cache directory") cachePath := flag.String("cache", path.Join(os.TempDir(), "go_away_cache"), "path to temporary cache directory")
policyFile := flag.String("policy", "", "path to policy YAML file") policyFile := flag.String("policy", "", "path to policy YAML file")
policySnippets := flag.String("policy-snippets", "", "path to YAML snippets folder")
challengeTemplate := flag.String("challenge-template", "anubis", "name or path of the challenge template to use (anubis, forgejo)") challengeTemplate := flag.String("challenge-template", "anubis", "name or path of the challenge template to use (anubis, forgejo)")
challengeTemplateTheme := flag.String("challenge-template-theme", "", "name of the challenge template theme to use (forgejo => [forgejo-auto, forgejo-dark, forgejo-light, gitea...])") challengeTemplateTheme := flag.String("challenge-template-theme", "", "name of the challenge template theme to use (forgejo => [forgejo-auto, forgejo-dark, forgejo-light, gitea...])")
@@ -203,17 +203,14 @@ func main() {
log.Fatal(fmt.Errorf("failed to read policy file: %w", err)) log.Fatal(fmt.Errorf("failed to read policy file: %w", err))
} }
var p policy.Policy p, err := policy.NewPolicy(bytes.NewReader(policyData), *policySnippets)
if err != nil {
if err = yaml.Unmarshal(policyData, &p); err != nil {
log.Fatal(fmt.Errorf("failed to parse policy file: %w", err)) log.Fatal(fmt.Errorf("failed to parse policy file: %w", err))
} }
createdBackends := make(map[string]http.Handler) createdBackends := make(map[string]http.Handler)
parsedBackends := make(map[string]string) parsedBackends := make(map[string]string)
//TODO: deprecate
maps.Copy(parsedBackends, p.Backends)
for _, backend := range backends { for _, backend := range backends {
parts := strings.Split(backend, "=") parts := strings.Split(backend, "=")
if len(parts) != 2 { if len(parts) != 2 {
@@ -232,6 +229,10 @@ func main() {
createdBackends[k] = backend createdBackends[k] = backend
} }
if len(createdBackends) == 0 {
log.Fatal(fmt.Errorf("no backends defined in policy file"))
}
if *cachePath != "" { if *cachePath != "" {
err = os.MkdirAll(*cachePath, 0755) err = os.MkdirAll(*cachePath, 0755)
if err != nil { if err != nil {
@@ -328,7 +329,7 @@ func main() {
ChallengeResponseCode: http.StatusTeapot, ChallengeResponseCode: http.StatusTeapot,
} }
state, err := lib.NewState(p, settings) state, err := lib.NewState(*p, settings)
if err != nil { if err != nil {
log.Fatal(fmt.Errorf("failed to create state: %w", err)) log.Fatal(fmt.Errorf("failed to create state: %w", err))

View File

@@ -1,12 +1,12 @@
# Example cmdline (forward requests from upstream to port :8080) # Example cmdline (forward requests from upstream to port :8080)
# $ go-away --bind :8080 --backend git.example.com=http://forgejo:3000 --policy examples/forgejo.yml --challenge-template forgejo --challenge-template-theme forgejo-auto # $ go-away --bind :8080 --backend git.example.com=http://forgejo:3000 --policy examples/forgejo.yml --policy-snippets example/snippets/ --challenge-template forgejo --challenge-template-theme forgejo-auto
# Define networks to be used later below # Define networks to be used later below
networks: networks:
# todo: support direct ASN lookups # Networks will get included from snippets
# todo: cache these values
huawei-cloud: huawei-cloud:
# AS136907 # AS136907
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/136907/aggregated.json - url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/136907/aggregated.json
@@ -19,123 +19,10 @@ networks:
# AS21859 # AS21859
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/21859/aggregated.json - url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/21859/aggregated.json
jq-path: '.subnets.ipv4[], .subnets.ipv6[]' jq-path: '.subnets.ipv4[], .subnets.ipv6[]'
# aws-cloud:
# - url: https://ip-ranges.amazonaws.com/ip-ranges.json
# jq-path: '(.prefixes[] | select(has("ip_prefix")) | .ip_prefix), (.prefixes[] | select(has("ipv6_prefix")) | .ipv6_prefix)'
# google-cloud:
# - url: https://www.gstatic.com/ipranges/cloud.json
# jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
# oracle-cloud:
# - url: https://docs.oracle.com/en-us/iaas/tools/public_ip_ranges.json
# jq-path: '.regions[] | .cidrs[] | .cidr'
# azure-cloud:
# # todo: https://www.microsoft.com/en-us/download/details.aspx?id=56519 does not provide direct JSON
# - url: https://raw.githubusercontent.com/femueller/cloud-ip-ranges/refs/heads/master/microsoft-azure-ip-ranges.json
# jq-path: '.values[] | .properties.addressPrefixes[]'
#
# digitalocean:
# - url: https://www.digitalocean.com/geo/google.csv
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# linode:
# - url: https://geoip.linode.com/
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# vultr:
# - url: "https://geofeed.constant.com/?json"
# jq-path: '.subnets[] | .ip_prefix'
# cloudflare:
# - url: https://www.cloudflare.com/ips-v4
# regex: "(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+/[0-9]+)"
# - url: https://www.cloudflare.com/ips-v6
# regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
#
# icloud-private-relay:
# - url: https://mask-api.icloud.com/egress-ip-ranges.csv
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# tunnelbroker-relay:
# # HE Tunnelbroker
# - url: https://tunnelbroker.net/export/google
# regex: "(?P<prefix>([0-9a-f:]+::)/[0-9]+),"
googlebot:
- url: https://developers.google.com/static/search/apis/ipranges/googlebot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
bingbot:
- url: https://www.bing.com/toolbox/bingbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
qwantbot:
- url: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
duckduckbot:
- url: https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
regex: "<li><div>(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)</div></li>"
yandexbot:
# todo: detected as bot
# - url: https://yandex.com/ips
# regex: "<span>(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+)[ \\\\t]*</span><br/>"
- prefixes:
- "5.45.192.0/18"
- "5.255.192.0/18"
- "37.9.64.0/18"
- "37.140.128.0/18"
- "77.88.0.0/18"
- "84.252.160.0/19"
- "87.250.224.0/19"
- "90.156.176.0/22"
- "93.158.128.0/18"
- "95.108.128.0/17"
- "141.8.128.0/18"
- "178.154.128.0/18"
- "185.32.187.0/24"
- "2a02:6b8::/29"
kagibot:
- url: https://kagi.com/bot
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
challenges: challenges:
js-pow-sha256: # Challenges will get included from snippets
runtime: js
parameters:
# specifies the folder path that assets are under
# can be either embedded or external path
# defaults to name of challenge
path: "js-pow-sha256"
# needs to be under static folder
js-loader: load.mjs
# needs to be under runtime folder
wasm-runtime: runtime.wasm
wasm-runtime-settings:
difficulty: 20
verify-probability: 0.02
# Challenges with a cookie, self redirect (non-JS, requires HTTP parsing)
self-cookie:
runtime: "cookie"
# Challenges with a redirect via Link header with rel=preload and early hints (non-JS, requires HTTP parsing, fetching and logic)
# Works on HTTP/2 and above!
self-preload-link:
condition: '"Sec-Fetch-Mode" in headers && headers["Sec-Fetch-Mode"] == "navigate"'
runtime: "preload-link"
parameters:
preload-early-hint-deadline: 3s
# Challenges with a redirect via Refresh header (non-JS, requires HTTP parsing and logic)
self-header-refresh:
runtime: "refresh"
parameters:
refresh-via: "header"
# Challenges with a redirect via Refresh meta (non-JS, requires HTML parsing and logic)
self-meta-refresh:
runtime: "refresh"
parameters:
refresh-via: "meta"
# Challenges with loading a random CSS or image document (non-JS, requires HTML parsing and logic)
self-resource-load:
runtime: "resource-load"
# Verifies the existence of a cookie and confirms it against some backend request, passing the entire client cookie contents # Verifies the existence of a cookie and confirms it against some backend request, passing the entire client cookie contents
http-cookie-check: http-cookie-check:
@@ -149,29 +36,12 @@ challenges:
http-code: 200 http-code: 200
verify-probability: 0.1 verify-probability: 0.1
dnsbl:
runtime: dnsbl
parameters:
dnsbl-decay: 1h
dnsbl-timeout: 1s
conditions: conditions:
# Conditions will get replaced on rules AST when found as ($condition-name) # Conditions will get replaced on rules AST when found as ($condition-name)
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
#- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
is-generic-browser: # Conditions will get included from snippets
- 'userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")'
is-well-known-asset:
- 'path == "/robots.txt"'
- 'path.startsWith("/.well-known")'
is-static-asset: is-static-asset:
- 'path == "/favicon.ico"'
- 'path == "/apple-touch-icon.png"' - 'path == "/apple-touch-icon.png"'
- 'path == "/apple-touch-icon-precomposed.png"' - 'path == "/apple-touch-icon-precomposed.png"'
- 'path.startsWith("/assets/")' - 'path.startsWith("/assets/")'
@@ -181,39 +51,9 @@ conditions:
- 'path.startsWith("/user/avatar/")' - 'path.startsWith("/user/avatar/")'
- 'path.startsWith("/attachments/")' - 'path.startsWith("/attachments/")'
is-git-ua:
- 'userAgent.startsWith("git/") || userAgent.contains("libgit")'
- 'userAgent.startsWith("go-git")'
- 'userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")'
# Golang proxy and initial fetch
- 'userAgent.startsWith("GoModuleMirror/")'
- 'userAgent.startsWith("Go-http-client/") && "go-get" in query && query["go-get"] == "1"'
- '"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"'
is-git-path: is-git-path:
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")' - 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
is-generic-robot-ua:
- 'userAgent.matches("compatible[;)]") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
is-tool-ua:
- 'userAgent.startsWith("python-requests/")'
- 'userAgent.startsWith("Python-urllib/")'
- 'userAgent.startsWith("python-httpx/")'
- 'userAgent.contains("aoihttp/")'
- 'userAgent.startsWith("http.rb/")'
- 'userAgent.startsWith("curl/")'
- 'userAgent.startsWith("Wget/")'
- 'userAgent.startsWith("libcurl/")'
- 'userAgent.startsWith("okhttp/")'
- 'userAgent.startsWith("Java/")'
- 'userAgent.startsWith("Apache-HttpClient//")'
- 'userAgent.startsWith("Go-http-client/")'
- 'userAgent.startsWith("node-fetch/")'
- 'userAgent.startsWith("reqwest/")'
is-suspicious-crawler: is-suspicious-crawler:
# TLS Fingerprint for specific agent without ALPN # TLS Fingerprint for specific agent without ALPN
- '(userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")) && ("ja4" in fp && fp.ja4.matches("^t[0-9a-z]+00_"))' - '(userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")) && ("ja4" in fp && fp.ja4.matches("^t[0-9a-z]+00_"))'
@@ -305,11 +145,11 @@ rules:
- name: 1 - name: 1
action: check action: check
settings: settings:
challenges: [self-preload-link, self-resource-load] challenges: [preload-link, resource-load]
- name: 2 - name: 2
action: check action: check
settings: settings:
challenges: [self-header-refresh] challenges: [header-refresh]
- name: always-pow-challenge - name: always-pow-challenge
conditions: conditions:
@@ -388,12 +228,12 @@ rules:
- name: desired-crawlers - name: desired-crawlers
conditions: conditions:
- 'userAgent.contains("+https://kagi.com/bot") && remoteAddress.network("kagibot")' - *is-bot-googlebot
- '(userAgent.contains("+http://www.google.com/bot.html") || userAgent.contains("Google-PageRenderer") || userAgent.contains("Google-InspectionTool") || userAgent.contains("Googlebot")) && remoteAddress.network("googlebot")' - *is-bot-bingbot
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && remoteAddress.network("bingbot")' - *is-bot-duckduckbot
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && remoteAddress.network("duckduckbot")' - *is-bot-kagibot
- 'userAgent.contains("+https://help.qwant.com/bot/") && remoteAddress.network("qwantbot")' - *is-bot-qwantbot
- 'userAgent.contains("+http://yandex.com/bots") && remoteAddress.network("yandexbot")' - *is-bot-yandexbot
action: pass action: pass
# check a sequence of challenges # check a sequence of challenges
@@ -404,16 +244,16 @@ rules:
- name: 0 - name: 0
action: check action: check
settings: settings:
challenges: [self-preload-link, self-header-refresh, js-pow-sha256, http-cookie-check] challenges: [preload-link, header-refresh, js-pow-sha256, http-cookie-check]
- name: 1 - name: 1
action: check action: check
settings: settings:
challenges: [ self-resource-load, js-pow-sha256, http-cookie-check ] challenges: [ resource-load, js-pow-sha256, http-cookie-check ]
- name: standard-bots - name: standard-bots
action: check action: check
settings: settings:
challenges: [self-meta-refresh, self-resource-load] challenges: [meta-refresh, resource-load]
conditions: conditions:
- '($is-generic-robot-ua)' - '($is-generic-robot-ua)'
@@ -454,14 +294,14 @@ rules:
- name: plaintext-browser - name: plaintext-browser
action: challenge action: challenge
settings: settings:
challenges: [http-cookie-check, self-meta-refresh, self-cookie] challenges: [http-cookie-check, meta-refresh, cookie]
conditions: conditions:
- 'userAgent.startsWith("Lynx/")' - 'userAgent.startsWith("Lynx/")'
- name: standard-tools - name: standard-tools
action: challenge action: challenge
settings: settings:
challenges: [self-cookie] challenges: [cookie]
conditions: conditions:
- '($is-tool-ua)' - '($is-tool-ua)'
- '!($is-generic-browser)' - '!($is-generic-browser)'
@@ -469,6 +309,6 @@ rules:
- name: standard-browser - name: standard-browser
action: challenge action: challenge
settings: settings:
challenges: [http-cookie-check, self-preload-link, self-meta-refresh, self-resource-load, js-pow-sha256] challenges: [http-cookie-check, preload-link, meta-refresh, resource-load, js-pow-sha256]
conditions: conditions:
- '($is-generic-browser)' - '($is-generic-browser)'

View File

@@ -1,138 +1,27 @@
# Example cmdline (forward requests from upstream to port :8080) # Example cmdline (forward requests from upstream to port :8080)
# $ go-away --bind :8080 --backend site.example.com=http://site:3000 --policy examples/generic.yml --challenge-template anubis # $ go-away --bind :8080 --backend site.example.com=http://site:3000 --policy examples/generic.yml --policy-snippets example/snippets/ --challenge-template anubis
# Define networks to be used later below # Define networks to be used later below
networks: networks:
# Networks will get included from snippets
googlebot:
- url: https://developers.google.com/static/search/apis/ipranges/googlebot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
bingbot:
- url: https://www.bing.com/toolbox/bingbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
qwantbot:
- url: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
duckduckbot:
- url: https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
regex: "<li><div>(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)</div></li>"
yandexbot:
# todo: detected as bot
# - url: https://yandex.com/ips
# regex: "<span>(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+)[ \\\\t]*</span><br/>"
- prefixes:
- "5.45.192.0/18"
- "5.255.192.0/18"
- "37.9.64.0/18"
- "37.140.128.0/18"
- "77.88.0.0/18"
- "84.252.160.0/19"
- "87.250.224.0/19"
- "90.156.176.0/22"
- "93.158.128.0/18"
- "95.108.128.0/17"
- "141.8.128.0/18"
- "178.154.128.0/18"
- "185.32.187.0/24"
- "2a02:6b8::/29"
kagibot:
- url: https://kagi.com/bot
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
challenges: challenges:
js-pow-sha256: # Challenges will get included from snippets
runtime: js
parameters:
# needs to be under static folder
js-loader: load.mjs
# needs to be under runtime folder
wasm-runtime: runtime.wasm
wasm-runtime-settings:
difficulty: 15
verify-probability: 0.02
# Challenges with a cookie, self redirect (non-JS, requires HTTP parsing)
self-cookie:
runtime: "cookie"
# Challenges with a redirect via Link header with rel=preload and early hints (non-JS, requires HTTP parsing, fetching and logic)
# Works on HTTP/2 and above!
self-preload-link:
condition: '"Sec-Fetch-Mode" in headers && headers["Sec-Fetch-Mode"] == "navigate"'
runtime: "preload-link"
parameters:
preload-early-hint-deadline: 3s
# Challenges with a redirect via Refresh header (non-JS, requires HTTP parsing and logic)
self-header-refresh:
runtime: "refresh"
parameters:
refresh-via: "header"
# Challenges with a redirect via Refresh meta (non-JS, requires HTML parsing and logic)
self-meta-refresh:
runtime: "refresh"
parameters:
refresh-via: "meta"
# Challenges with loading a random CSS or image document (non-JS, requires HTML parsing and logic)
self-resource-load:
runtime: "resource-load"
dnsbl:
runtime: dnsbl
parameters:
dnsbl-decay: 1h
dnsbl-timeout: 1s
conditions: conditions:
# Conditions will get replaced on rules AST when found as ($condition-name) # Conditions will get replaced on rules AST when found as ($condition-name)
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
#- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
is-generic-browser: # Conditions will get included from snippets
- 'userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")'
is-well-known-asset:
- 'path == "/robots.txt"'
- 'path.startsWith("/.well-known")'
is-static-asset: is-static-asset:
- 'path == "/favicon.ico"'
- 'path == "/apple-touch-icon.png"' - 'path == "/apple-touch-icon.png"'
- 'path == "/apple-touch-icon-precomposed.png"' - 'path == "/apple-touch-icon-precomposed.png"'
- 'path.matches("\\.(manifest|ttf|woff|woff2|jpg|jpeg|gif|png|webp|avif|svg|mp4|webm|css|js|mjs|wasm)$")' - 'path.matches("\\.(manifest|ttf|woff|woff2|jpg|jpeg|gif|png|webp|avif|svg|mp4|webm|css|js|mjs|wasm)$")'
is-generic-robot-ua:
- 'userAgent.matches("compatible[;)]") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
is-tool-ua:
- 'userAgent.startsWith("python-requests/")'
- 'userAgent.startsWith("Python-urllib/")'
- 'userAgent.startsWith("python-httpx/")'
- 'userAgent.contains("aoihttp/")'
- 'userAgent.startsWith("http.rb/")'
- 'userAgent.startsWith("curl/")'
- 'userAgent.startsWith("Wget/")'
- 'userAgent.startsWith("libcurl/")'
- 'userAgent.startsWith("okhttp/")'
- 'userAgent.startsWith("Java/")'
- 'userAgent.startsWith("Apache-HttpClient//")'
- 'userAgent.startsWith("Go-http-client/")'
- 'userAgent.startsWith("node-fetch/")'
- 'userAgent.startsWith("reqwest/")'
is-suspicious-crawler: is-suspicious-crawler:
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")' - 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
# Old IE browsers # Old IE browsers
@@ -203,20 +92,20 @@ rules:
- name: 1 - name: 1
action: check action: check
settings: settings:
challenges: [self-preload-link, self-resource-load] challenges: [preload-link, resource-load]
- name: 2 - name: 2
action: check action: check
settings: settings:
challenges: [self-header-refresh] challenges: [header-refresh]
- name: desired-crawlers - name: desired-crawlers
conditions: conditions:
- 'userAgent.contains("+https://kagi.com/bot") && remoteAddress.network("kagibot")' - *is-bot-googlebot
- '(userAgent.contains("+http://www.google.com/bot.html") || userAgent.contains("Google-PageRenderer") || userAgent.contains("Google-InspectionTool") || userAgent.contains("Googlebot")) && remoteAddress.network("googlebot")' - *is-bot-bingbot
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && remoteAddress.network("bingbot")' - *is-bot-duckduckbot
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && remoteAddress.network("duckduckbot")' - *is-bot-kagibot
- 'userAgent.contains("+https://help.qwant.com/bot/") && remoteAddress.network("qwantbot")' - *is-bot-qwantbot
- 'userAgent.contains("+http://yandex.com/bots") && remoteAddress.network("yandexbot")' - *is-bot-yandexbot
action: pass action: pass
- name: homesite - name: homesite
@@ -251,14 +140,14 @@ rules:
- name: plaintext-browser - name: plaintext-browser
action: challenge action: challenge
settings: settings:
challenges: [self-meta-refresh, self-cookie] challenges: [meta-refresh, cookie]
conditions: conditions:
- 'userAgent.startsWith("Lynx/")' - 'userAgent.startsWith("Lynx/")'
- name: standard-tools - name: standard-tools
action: challenge action: challenge
settings: settings:
challenges: [self-cookie] challenges: [cookie]
conditions: conditions:
- '($is-generic-robot-ua)' - '($is-generic-robot-ua)'
- '($is-tool-ua)' - '($is-tool-ua)'
@@ -267,6 +156,6 @@ rules:
- name: standard-browser - name: standard-browser
action: challenge action: challenge
settings: settings:
challenges: [self-preload-link, self-meta-refresh, self-resource-load, js-pow-sha256] challenges: [preload-link, meta-refresh, resource-load, js-pow-sha256]
conditions: conditions:
- '($is-generic-browser)' - '($is-generic-browser)'

View File

@@ -0,0 +1,8 @@
networks:
bingbot:
- url: https://www.bing.com/toolbox/bingbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
conditions:
is-bot-bingbot:
- &is-bot-bingbot 'userAgent.contains("+http://www.bing.com/bingbot.htm") && remoteAddress.network("bingbot")'

View File

@@ -0,0 +1,8 @@
networks:
duckduckbot:
- url: https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
regex: "<li><div>(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)</div></li>"
conditions:
is-bot-duckduckbot:
- &is-bot-duckduckbot 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && remoteAddress.network("duckduckbot")'

View File

@@ -0,0 +1,8 @@
networks:
googlebot:
- url: https://developers.google.com/static/search/apis/ipranges/googlebot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
conditions:
is-bot-googlebot:
- &is-bot-googlebot '(userAgent.contains("+http://www.google.com/bot.html") || userAgent.contains("Google-PageRenderer") || userAgent.contains("Google-InspectionTool") || userAgent.contains("Googlebot")) && remoteAddress.network("googlebot")'

View File

@@ -0,0 +1,8 @@
networks:
kagibot:
- url: https://kagi.com/bot
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
conditions:
is-bot-kagibot:
- &is-bot-kagibot 'userAgent.contains("+https://kagi.com/bot") && remoteAddress.network("kagibot")'

View File

@@ -0,0 +1,8 @@
networks:
qwantbot:
- url: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
conditions:
is-bot-qwantbot:
- &is-bot-qwantbot 'userAgent.contains("+https://help.qwant.com/bot/") && remoteAddress.network("qwantbot")'

View File

@@ -0,0 +1,24 @@
networks:
yandexbot:
# todo: detected as bot
# - url: https://yandex.com/ips
# regex: "<span>(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+)[ \\\\t]*</span><br/>"
- prefixes:
- "5.45.192.0/18"
- "5.255.192.0/18"
- "37.9.64.0/18"
- "37.140.128.0/18"
- "77.88.0.0/18"
- "84.252.160.0/19"
- "87.250.224.0/19"
- "90.156.176.0/22"
- "93.158.128.0/18"
- "95.108.128.0/17"
- "141.8.128.0/18"
- "178.154.128.0/18"
- "185.32.187.0/24"
- "2a02:6b8::/29"
conditions:
is-bot-yandexbot:
- &is-bot-yandexbot 'userAgent.contains("+http://yandex.com/bots") && remoteAddress.network("yandexbot")'

View File

@@ -0,0 +1,6 @@
challenges:
dnsbl:
runtime: dnsbl
parameters:
dnsbl-decay: 1h
dnsbl-timeout: 1s

View File

@@ -0,0 +1,15 @@
challenges:
js-pow-sha256:
runtime: js
parameters:
# specifies the folder path that assets are under
# can be either embedded or external path
# defaults to name of challenge
path: "js-pow-sha256"
# needs to be under static folder
js-loader: load.mjs
# needs to be under runtime folder
wasm-runtime: runtime.wasm
wasm-runtime-settings:
difficulty: 20
verify-probability: 0.02

View File

@@ -0,0 +1,28 @@
challenges:
# Challenges with a cookie, self redirect (non-JS, requires HTTP parsing)
cookie:
runtime: "cookie"
# Challenges with a redirect via Link header with rel=preload and early hints (non-JS, requires HTTP parsing, fetching and logic)
# Works on HTTP/2 and above!
preload-link:
condition: '"Sec-Fetch-Mode" in headers && headers["Sec-Fetch-Mode"] == "navigate"'
runtime: "preload-link"
parameters:
preload-early-hint-deadline: 3s
# Challenges with a redirect via Refresh header (non-JS, requires HTTP parsing and logic)
header-refresh:
runtime: "refresh"
parameters:
refresh-via: "header"
# Challenges with a redirect via Refresh meta (non-JS, requires HTML parsing and logic)
meta-refresh:
runtime: "refresh"
parameters:
refresh-via: "meta"
# Challenges with loading a random CSS or image document (non-JS, requires HTML parsing and logic)
resource-load:
runtime: "resource-load"

View File

@@ -0,0 +1,45 @@
conditions:
is-well-known-asset:
- 'path == "/robots.txt"'
- 'path == "/favicon.ico"'
- 'path.startsWith("/.well-known")'
is-git-ua:
- 'userAgent.startsWith("git/") || userAgent.contains("libgit")'
- 'userAgent.startsWith("go-git")'
- 'userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")'
# Golang proxy and initial fetch
- 'userAgent.startsWith("GoModuleMirror/")'
- 'userAgent.startsWith("Go-http-client/") && "go-get" in query && query["go-get"] == "1"'
- '"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"'
is-generic-browser:
- 'userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")'
is-generic-robot-ua:
- 'userAgent.matches("compatible[;)]") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
is-tool-ua:
- 'userAgent.startsWith("python-requests/")'
- 'userAgent.startsWith("Python-urllib/")'
- 'userAgent.startsWith("python-httpx/")'
- 'userAgent.contains("aoihttp/")'
- 'userAgent.startsWith("http.rb/")'
- 'userAgent.startsWith("curl/")'
- 'userAgent.startsWith("Wget/")'
- 'userAgent.startsWith("libcurl/")'
- 'userAgent.startsWith("okhttp/")'
- 'userAgent.startsWith("Java/")'
- 'userAgent.startsWith("Apache-HttpClient//")'
- 'userAgent.startsWith("Go-http-client/")'
- 'userAgent.startsWith("node-fetch/")'
- 'userAgent.startsWith("reqwest/")'
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
#- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'

View File

@@ -0,0 +1,37 @@
networks:
# aws-cloud:
# - url: https://ip-ranges.amazonaws.com/ip-ranges.json
# jq-path: '(.prefixes[] | select(has("ip_prefix")) | .ip_prefix), (.prefixes[] | select(has("ipv6_prefix")) | .ipv6_prefix)'
# google-cloud:
# - url: https://www.gstatic.com/ipranges/cloud.json
# jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
# oracle-cloud:
# - url: https://docs.oracle.com/en-us/iaas/tools/public_ip_ranges.json
# jq-path: '.regions[] | .cidrs[] | .cidr'
# azure-cloud:
# # todo: https://www.microsoft.com/en-us/download/details.aspx?id=56519 does not provide direct JSON
# - url: https://raw.githubusercontent.com/femueller/cloud-ip-ranges/refs/heads/master/microsoft-azure-ip-ranges.json
# jq-path: '.values[] | .properties.addressPrefixes[]'
#
# digitalocean:
# - url: https://www.digitalocean.com/geo/google.csv
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# linode:
# - url: https://geoip.linode.com/
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# vultr:
# - url: "https://geofeed.constant.com/?json"
# jq-path: '.subnets[] | .ip_prefix'
# cloudflare:
# - url: https://www.cloudflare.com/ips-v4
# regex: "(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+/[0-9]+)"
# - url: https://www.cloudflare.com/ips-v6
# regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
#
# icloud-private-relay:
# - url: https://mask-api.icloud.com/egress-ip-ranges.csv
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# tunnelbroker-relay:
# # HE Tunnelbroker
# - url: https://tunnelbroker.net/export/google
# regex: "(?P<prefix>([0-9a-f:]+::)/[0-9]+),"

View File

@@ -115,3 +115,30 @@ func (n Network) FetchPrefixes(c *http.Client) (output []net.IPNet, err error) {
} }
return output, nil return output, nil
} }
func parseCIDROrIP(value string) (net.IPNet, error) {
_, ipNet, err := net.ParseCIDR(value)
if err != nil {
ip := net.ParseIP(value)
if ip == nil {
return net.IPNet{}, fmt.Errorf("failed to parse CIDR: %s", err)
}
if ip4 := ip.To4(); ip4 != nil {
return net.IPNet{
IP: ip4,
// single ip
Mask: net.CIDRMask(len(ip4)*8, len(ip4)*8),
}, nil
}
return net.IPNet{
IP: ip,
// single ip
Mask: net.CIDRMask(len(ip)*8, len(ip)*8),
}, nil
} else if ipNet != nil {
return *ipNet, nil
} else {
return net.IPNet{}, errors.New("invalid CIDR")
}
}

View File

@@ -1,38 +1,13 @@
package policy package policy
import ( import (
"errors" "bytes"
"fmt" "github.com/goccy/go-yaml"
"net" "io"
"os"
"path"
) )
func parseCIDROrIP(value string) (net.IPNet, error) {
_, ipNet, err := net.ParseCIDR(value)
if err != nil {
ip := net.ParseIP(value)
if ip == nil {
return net.IPNet{}, fmt.Errorf("failed to parse CIDR: %s", err)
}
if ip4 := ip.To4(); ip4 != nil {
return net.IPNet{
IP: ip4,
// single ip
Mask: net.CIDRMask(len(ip4)*8, len(ip4)*8),
}, nil
}
return net.IPNet{
IP: ip,
// single ip
Mask: net.CIDRMask(len(ip)*8, len(ip)*8),
}, nil
} else if ipNet != nil {
return *ipNet, nil
} else {
return net.IPNet{}, errors.New("invalid CIDR")
}
}
type Policy struct { type Policy struct {
// Networks map of networks and prefixes to be loaded // Networks map of networks and prefixes to be loaded
@@ -43,8 +18,70 @@ type Policy struct {
Challenges map[string]Challenge `yaml:"challenges"` Challenges map[string]Challenge `yaml:"challenges"`
Rules []Rule `yaml:"rules"` Rules []Rule `yaml:"rules"`
}
// Backends
// Deprecated func NewPolicy(r io.Reader, snippetsDirectory string) (*Policy, error) {
Backends map[string]string `json:"backends"` var p Policy
p.Networks = make(map[string][]Network)
p.Conditions = make(map[string][]string)
p.Challenges = make(map[string]Challenge)
if snippetsDirectory == "" {
err := yaml.NewDecoder(r).Decode(&p)
if err != nil {
return nil, err
}
} else {
err := yaml.NewDecoder(r, yaml.ReferenceDirs(snippetsDirectory)).Decode(&p)
if err != nil {
return nil, err
}
// add specific entries from snippets
entries, err := os.ReadDir(snippetsDirectory)
if err != nil {
return nil, err
}
for _, entry := range entries {
var entryPolicy Policy
if !entry.IsDir() {
entryData, err := os.ReadFile(path.Join(snippetsDirectory, entry.Name()))
if err != nil {
return nil, err
}
err = yaml.NewDecoder(bytes.NewReader(entryData), yaml.ReferenceDirs(snippetsDirectory)).Decode(&entryPolicy)
if err != nil {
return nil, err
}
// add networks / conditions / challenges definitions if they don't exist already
for k, v := range entryPolicy.Networks {
// add network if policy entry does not exist
_, ok := p.Networks[k]
if !ok {
p.Networks[k] = v
}
}
for k, v := range entryPolicy.Conditions {
// add condition if policy entry does not exist
_, ok := p.Conditions[k]
if !ok {
p.Conditions[k] = v
}
}
for k, v := range entryPolicy.Challenges {
// add challenge if policy entry does not exist
_, ok := p.Challenges[k]
if !ok {
p.Challenges[k] = v
}
}
}
}
}
return &p, nil
} }