Implement policy snippets

This commit is contained in:
WeebDataHoarder
2025-04-23 17:16:26 +02:00
parent d83fe3653a
commit 3b11792594
18 changed files with 343 additions and 349 deletions

View File

@@ -1,12 +1,12 @@
# Example cmdline (forward requests from upstream to port :8080)
# $ go-away --bind :8080 --backend git.example.com=http://forgejo:3000 --policy examples/forgejo.yml --challenge-template forgejo --challenge-template-theme forgejo-auto
# $ go-away --bind :8080 --backend git.example.com=http://forgejo:3000 --policy examples/forgejo.yml --policy-snippets example/snippets/ --challenge-template forgejo --challenge-template-theme forgejo-auto
# Define networks to be used later below
networks:
# todo: support direct ASN lookups
# todo: cache these values
# Networks will get included from snippets
huawei-cloud:
# AS136907
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/136907/aggregated.json
@@ -19,123 +19,10 @@ networks:
# AS21859
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/21859/aggregated.json
jq-path: '.subnets.ipv4[], .subnets.ipv6[]'
# aws-cloud:
# - url: https://ip-ranges.amazonaws.com/ip-ranges.json
# jq-path: '(.prefixes[] | select(has("ip_prefix")) | .ip_prefix), (.prefixes[] | select(has("ipv6_prefix")) | .ipv6_prefix)'
# google-cloud:
# - url: https://www.gstatic.com/ipranges/cloud.json
# jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
# oracle-cloud:
# - url: https://docs.oracle.com/en-us/iaas/tools/public_ip_ranges.json
# jq-path: '.regions[] | .cidrs[] | .cidr'
# azure-cloud:
# # todo: https://www.microsoft.com/en-us/download/details.aspx?id=56519 does not provide direct JSON
# - url: https://raw.githubusercontent.com/femueller/cloud-ip-ranges/refs/heads/master/microsoft-azure-ip-ranges.json
# jq-path: '.values[] | .properties.addressPrefixes[]'
#
# digitalocean:
# - url: https://www.digitalocean.com/geo/google.csv
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# linode:
# - url: https://geoip.linode.com/
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# vultr:
# - url: "https://geofeed.constant.com/?json"
# jq-path: '.subnets[] | .ip_prefix'
# cloudflare:
# - url: https://www.cloudflare.com/ips-v4
# regex: "(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+/[0-9]+)"
# - url: https://www.cloudflare.com/ips-v6
# regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
#
# icloud-private-relay:
# - url: https://mask-api.icloud.com/egress-ip-ranges.csv
# regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
# tunnelbroker-relay:
# # HE Tunnelbroker
# - url: https://tunnelbroker.net/export/google
# regex: "(?P<prefix>([0-9a-f:]+::)/[0-9]+),"
googlebot:
- url: https://developers.google.com/static/search/apis/ipranges/googlebot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
bingbot:
- url: https://www.bing.com/toolbox/bingbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
qwantbot:
- url: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
duckduckbot:
- url: https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
regex: "<li><div>(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)</div></li>"
yandexbot:
# todo: detected as bot
# - url: https://yandex.com/ips
# regex: "<span>(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+)[ \\\\t]*</span><br/>"
- prefixes:
- "5.45.192.0/18"
- "5.255.192.0/18"
- "37.9.64.0/18"
- "37.140.128.0/18"
- "77.88.0.0/18"
- "84.252.160.0/19"
- "87.250.224.0/19"
- "90.156.176.0/22"
- "93.158.128.0/18"
- "95.108.128.0/17"
- "141.8.128.0/18"
- "178.154.128.0/18"
- "185.32.187.0/24"
- "2a02:6b8::/29"
kagibot:
- url: https://kagi.com/bot
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
challenges:
js-pow-sha256:
runtime: js
parameters:
# specifies the folder path that assets are under
# can be either embedded or external path
# defaults to name of challenge
path: "js-pow-sha256"
# needs to be under static folder
js-loader: load.mjs
# needs to be under runtime folder
wasm-runtime: runtime.wasm
wasm-runtime-settings:
difficulty: 20
verify-probability: 0.02
# Challenges with a cookie, self redirect (non-JS, requires HTTP parsing)
self-cookie:
runtime: "cookie"
# Challenges with a redirect via Link header with rel=preload and early hints (non-JS, requires HTTP parsing, fetching and logic)
# Works on HTTP/2 and above!
self-preload-link:
condition: '"Sec-Fetch-Mode" in headers && headers["Sec-Fetch-Mode"] == "navigate"'
runtime: "preload-link"
parameters:
preload-early-hint-deadline: 3s
# Challenges with a redirect via Refresh header (non-JS, requires HTTP parsing and logic)
self-header-refresh:
runtime: "refresh"
parameters:
refresh-via: "header"
# Challenges with a redirect via Refresh meta (non-JS, requires HTML parsing and logic)
self-meta-refresh:
runtime: "refresh"
parameters:
refresh-via: "meta"
# Challenges with loading a random CSS or image document (non-JS, requires HTML parsing and logic)
self-resource-load:
runtime: "resource-load"
# Challenges will get included from snippets
# Verifies the existence of a cookie and confirms it against some backend request, passing the entire client cookie contents
http-cookie-check:
@@ -149,29 +36,12 @@ challenges:
http-code: 200
verify-probability: 0.1
dnsbl:
runtime: dnsbl
parameters:
dnsbl-decay: 1h
dnsbl-timeout: 1s
conditions:
# Conditions will get replaced on rules AST when found as ($condition-name)
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
#- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
is-generic-browser:
- 'userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")'
is-well-known-asset:
- 'path == "/robots.txt"'
- 'path.startsWith("/.well-known")'
# Conditions will get included from snippets
is-static-asset:
- 'path == "/favicon.ico"'
- 'path == "/apple-touch-icon.png"'
- 'path == "/apple-touch-icon-precomposed.png"'
- 'path.startsWith("/assets/")'
@@ -181,39 +51,9 @@ conditions:
- 'path.startsWith("/user/avatar/")'
- 'path.startsWith("/attachments/")'
is-git-ua:
- 'userAgent.startsWith("git/") || userAgent.contains("libgit")'
- 'userAgent.startsWith("go-git")'
- 'userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")'
# Golang proxy and initial fetch
- 'userAgent.startsWith("GoModuleMirror/")'
- 'userAgent.startsWith("Go-http-client/") && "go-get" in query && query["go-get"] == "1"'
- '"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"'
is-git-path:
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
is-generic-robot-ua:
- 'userAgent.matches("compatible[;)]") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
is-tool-ua:
- 'userAgent.startsWith("python-requests/")'
- 'userAgent.startsWith("Python-urllib/")'
- 'userAgent.startsWith("python-httpx/")'
- 'userAgent.contains("aoihttp/")'
- 'userAgent.startsWith("http.rb/")'
- 'userAgent.startsWith("curl/")'
- 'userAgent.startsWith("Wget/")'
- 'userAgent.startsWith("libcurl/")'
- 'userAgent.startsWith("okhttp/")'
- 'userAgent.startsWith("Java/")'
- 'userAgent.startsWith("Apache-HttpClient//")'
- 'userAgent.startsWith("Go-http-client/")'
- 'userAgent.startsWith("node-fetch/")'
- 'userAgent.startsWith("reqwest/")'
is-suspicious-crawler:
# TLS Fingerprint for specific agent without ALPN
- '(userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")) && ("ja4" in fp && fp.ja4.matches("^t[0-9a-z]+00_"))'
@@ -305,11 +145,11 @@ rules:
- name: 1
action: check
settings:
challenges: [self-preload-link, self-resource-load]
challenges: [preload-link, resource-load]
- name: 2
action: check
settings:
challenges: [self-header-refresh]
challenges: [header-refresh]
- name: always-pow-challenge
conditions:
@@ -388,12 +228,12 @@ rules:
- name: desired-crawlers
conditions:
- 'userAgent.contains("+https://kagi.com/bot") && remoteAddress.network("kagibot")'
- '(userAgent.contains("+http://www.google.com/bot.html") || userAgent.contains("Google-PageRenderer") || userAgent.contains("Google-InspectionTool") || userAgent.contains("Googlebot")) && remoteAddress.network("googlebot")'
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && remoteAddress.network("bingbot")'
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && remoteAddress.network("duckduckbot")'
- 'userAgent.contains("+https://help.qwant.com/bot/") && remoteAddress.network("qwantbot")'
- 'userAgent.contains("+http://yandex.com/bots") && remoteAddress.network("yandexbot")'
- *is-bot-googlebot
- *is-bot-bingbot
- *is-bot-duckduckbot
- *is-bot-kagibot
- *is-bot-qwantbot
- *is-bot-yandexbot
action: pass
# check a sequence of challenges
@@ -404,16 +244,16 @@ rules:
- name: 0
action: check
settings:
challenges: [self-preload-link, self-header-refresh, js-pow-sha256, http-cookie-check]
challenges: [preload-link, header-refresh, js-pow-sha256, http-cookie-check]
- name: 1
action: check
settings:
challenges: [ self-resource-load, js-pow-sha256, http-cookie-check ]
challenges: [ resource-load, js-pow-sha256, http-cookie-check ]
- name: standard-bots
action: check
settings:
challenges: [self-meta-refresh, self-resource-load]
challenges: [meta-refresh, resource-load]
conditions:
- '($is-generic-robot-ua)'
@@ -454,14 +294,14 @@ rules:
- name: plaintext-browser
action: challenge
settings:
challenges: [http-cookie-check, self-meta-refresh, self-cookie]
challenges: [http-cookie-check, meta-refresh, cookie]
conditions:
- 'userAgent.startsWith("Lynx/")'
- name: standard-tools
action: challenge
settings:
challenges: [self-cookie]
challenges: [cookie]
conditions:
- '($is-tool-ua)'
- '!($is-generic-browser)'
@@ -469,6 +309,6 @@ rules:
- name: standard-browser
action: challenge
settings:
challenges: [http-cookie-check, self-preload-link, self-meta-refresh, self-resource-load, js-pow-sha256]
challenges: [http-cookie-check, preload-link, meta-refresh, resource-load, js-pow-sha256]
conditions:
- '($is-generic-browser)'