Move policy examples to examples folder, rename to forgejo
This commit is contained in:
416
examples/forgejo.yml
Normal file
416
examples/forgejo.yml
Normal file
@@ -0,0 +1,416 @@
|
||||
# Example cmdline (forward requests from upstream to port :8080)
|
||||
# $ go-away --bind :8080 --backend git.example.com:http://forgejo:3000 --policy forgejo.yml --challenge-template forgejo --challenge-template-theme forgejo-dark
|
||||
|
||||
|
||||
|
||||
# Define networks to be used later below
|
||||
networks:
|
||||
# todo: support direct ASN lookups
|
||||
# todo: cache these values
|
||||
huawei-cloud:
|
||||
# AS136907
|
||||
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/136907/aggregated.json
|
||||
jq-path: '.subnets.ipv4[], .subnets.ipv6[]'
|
||||
alibaba-cloud:
|
||||
# AS45102
|
||||
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/45102/aggregated.json
|
||||
jq-path: '.subnets.ipv4[], .subnets.ipv6[]'
|
||||
zenlayer-inc:
|
||||
# AS21859
|
||||
- url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/21859/aggregated.json
|
||||
jq-path: '.subnets.ipv4[], .subnets.ipv6[]'
|
||||
aws-cloud:
|
||||
- url: https://ip-ranges.amazonaws.com/ip-ranges.json
|
||||
jq-path: '(.prefixes[] | select(has("ip_prefix")) | .ip_prefix), (.prefixes[] | select(has("ipv6_prefix")) | .ipv6_prefix)'
|
||||
google-cloud:
|
||||
- url: https://www.gstatic.com/ipranges/cloud.json
|
||||
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
|
||||
oracle-cloud:
|
||||
- url: https://docs.oracle.com/en-us/iaas/tools/public_ip_ranges.json
|
||||
jq-path: '.regions[] | .cidrs[] | .cidr'
|
||||
azure-cloud:
|
||||
# todo: https://www.microsoft.com/en-us/download/details.aspx?id=56519 does not provide direct JSON
|
||||
- url: https://raw.githubusercontent.com/femueller/cloud-ip-ranges/refs/heads/master/microsoft-azure-ip-ranges.json
|
||||
jq-path: '.values[] | .properties.addressPrefixes[]'
|
||||
|
||||
digitalocean:
|
||||
- url: https://www.digitalocean.com/geo/google.csv
|
||||
regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
|
||||
linode:
|
||||
- url: https://geoip.linode.com/
|
||||
regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
|
||||
vultr:
|
||||
- url: "https://geofeed.constant.com/?json"
|
||||
jq-path: '.subnets[] | .ip_prefix'
|
||||
cloudflare:
|
||||
- url: https://www.cloudflare.com/ips-v4
|
||||
regex: "(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+/[0-9]+)"
|
||||
- url: https://www.cloudflare.com/ips-v6
|
||||
regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
|
||||
|
||||
icloud-private-relay:
|
||||
- url: https://mask-api.icloud.com/egress-ip-ranges.csv
|
||||
regex: "(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+),"
|
||||
tunnelbroker-relay:
|
||||
# HE Tunnelbroker
|
||||
- url: https://tunnelbroker.net/export/google
|
||||
regex: "(?P<prefix>([0-9a-f:]+::)/[0-9]+),"
|
||||
|
||||
|
||||
googlebot:
|
||||
- url: https://developers.google.com/static/search/apis/ipranges/googlebot.json
|
||||
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
|
||||
bingbot:
|
||||
- url: https://www.bing.com/toolbox/bingbot.json
|
||||
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
|
||||
qwantbot:
|
||||
- url: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
|
||||
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
|
||||
duckduckbot:
|
||||
- url: https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/
|
||||
regex: "<li>(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)</li>"
|
||||
yandexbot:
|
||||
# todo: detected as bot
|
||||
# - url: https://yandex.com/ips
|
||||
# regex: "<span>(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+)[ \\\\t]*</span><br/>"
|
||||
- prefixes:
|
||||
- "5.45.192.0/18"
|
||||
- "5.255.192.0/18"
|
||||
- "37.9.64.0/18"
|
||||
- "37.140.128.0/18"
|
||||
- "77.88.0.0/18"
|
||||
- "84.252.160.0/19"
|
||||
- "87.250.224.0/19"
|
||||
- "90.156.176.0/22"
|
||||
- "93.158.128.0/18"
|
||||
- "95.108.128.0/17"
|
||||
- "141.8.128.0/18"
|
||||
- "178.154.128.0/18"
|
||||
- "185.32.187.0/24"
|
||||
- "2a02:6b8::/29"
|
||||
kagibot:
|
||||
- url: https://kagi.com/bot
|
||||
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
|
||||
|
||||
|
||||
# todo: define interface
|
||||
challenges:
|
||||
js-pow-sha256:
|
||||
# Asset must be under challenges/{name}/static/{asset}
|
||||
# Other files here will be available under that path
|
||||
mode: js
|
||||
asset: load.mjs
|
||||
parameters:
|
||||
difficulty: 20
|
||||
runtime:
|
||||
mode: wasm
|
||||
# Verify must be under challenges/{name}/runtime/{asset}
|
||||
asset: runtime.wasm
|
||||
probability: 0.02
|
||||
|
||||
# Challenges with a cookie, self redirect (non-JS, requires HTTP parsing)
|
||||
self-cookie:
|
||||
mode: "cookie"
|
||||
|
||||
# Challenges with a redirect via header (non-JS, requires HTTP parsing and logic)
|
||||
self-header-refresh:
|
||||
mode: "header-refresh"
|
||||
runtime:
|
||||
# verifies that result = key
|
||||
mode: "key"
|
||||
probability: 0.1
|
||||
|
||||
# Challenges with a redirect via meta (non-JS, requires HTML parsing and logic)
|
||||
self-meta-refresh:
|
||||
mode: "meta-refresh"
|
||||
runtime:
|
||||
# verifies that result = key
|
||||
mode: "key"
|
||||
probability: 0.1
|
||||
|
||||
# Challenges with loading a random CSS or image document (non-JS, requires HTML parsing and logic)
|
||||
self-resource-load:
|
||||
mode: "resource-load"
|
||||
runtime:
|
||||
# verifies that result = key
|
||||
mode: "key"
|
||||
probability: 0.1
|
||||
parameters:
|
||||
key-code: 200
|
||||
key-mime: text/css
|
||||
key-content: ""
|
||||
|
||||
|
||||
# Verifies the existence of a cookie and confirms it against some backend request, passing the entire client cookie contents
|
||||
http-cookie-check:
|
||||
mode: http
|
||||
url: http://forgejo:3000/user/stopwatches
|
||||
# url: http://forgejo:3000/repo/search
|
||||
# url: http://forgejo:3000/notifications/new
|
||||
parameters:
|
||||
http-method: GET
|
||||
http-cookie: gammaspectra_session
|
||||
http-code: 200
|
||||
# todo: archive value of session within token to bind it
|
||||
|
||||
conditions:
|
||||
# Conditions will get replaced on rules AST when found as ($condition-name)
|
||||
# Checks to detect a headless chromium via headers only
|
||||
is-headless-chromium:
|
||||
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
|
||||
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
|
||||
#- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
|
||||
|
||||
is-generic-browser:
|
||||
- 'userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")'
|
||||
|
||||
is-well-known-asset:
|
||||
- 'path == "/robots.txt"'
|
||||
- 'path.startsWith("/.well-known")'
|
||||
|
||||
is-static-asset:
|
||||
- 'path == "/favicon.ico"'
|
||||
- 'path == "/apple-touch-icon.png"'
|
||||
- 'path == "/apple-touch-icon-precomposed.png"'
|
||||
- 'path.startsWith("/assets/")'
|
||||
- 'path.startsWith("/repo-avatars/")'
|
||||
- 'path.startsWith("/avatars/")'
|
||||
- 'path.startsWith("/avatar/")'
|
||||
- 'path.startsWith("/attachments/")'
|
||||
|
||||
is-git-ua:
|
||||
- 'userAgent.startsWith("git/")'
|
||||
- 'userAgent.startsWith("go-git")'
|
||||
- 'userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")'
|
||||
# Golang proxy and initial fetch
|
||||
- 'userAgent.startsWith("GoModuleMirror/")'
|
||||
- 'userAgent.startsWith("Go-http-client/") && "go-get" in query && query["go-get"] == "1" && (path.matches("^/[^/]+/[^/]+$") || path.matches("^/[^/]+/[^/]+/v[0-9]+$"))'
|
||||
is-git-path:
|
||||
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
|
||||
|
||||
is-generic-robot-ua:
|
||||
- 'userAgent.contains("compatible;") && !userAgent.contains("Trident/")'
|
||||
- 'userAgent.matches("\\+https?://")'
|
||||
- 'userAgent.matches("[bB]ot/[0-9]")'
|
||||
|
||||
is-tool-ua:
|
||||
- 'userAgent.startsWith("python-requests/")'
|
||||
- 'userAgent.startsWith("Python-urllib/")'
|
||||
- 'userAgent.startsWith("python-httpx/")'
|
||||
- 'userAgent.contains("aoihttp/")'
|
||||
- 'userAgent.startsWith("http.rb/")'
|
||||
- 'userAgent.startsWith("curl/")'
|
||||
- 'userAgent.startsWith("Wget/")'
|
||||
- 'userAgent.startsWith("libcurl/")'
|
||||
- 'userAgent.startsWith("okhttp/")'
|
||||
- 'userAgent.startsWith("Java/")'
|
||||
- 'userAgent.startsWith("Apache-HttpClient//")'
|
||||
- 'userAgent.startsWith("Go-http-client/")'
|
||||
- 'userAgent.startsWith("node-fetch/")'
|
||||
- 'userAgent.startsWith("reqwest/")'
|
||||
|
||||
is-suspicious-crawler:
|
||||
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
|
||||
# Old IE browsers
|
||||
- 'userAgent.matches("MSIE ([2-9]|10|11)\\.")'
|
||||
# Old Linux browsers
|
||||
- 'userAgent.contains("Linux i686")'
|
||||
# Old Windows browsers
|
||||
- 'userAgent.matches("Windows (3|95|98|CE)") || userAgent.matches("Windows NT [1-5]\\.")'
|
||||
# Old mobile browsers
|
||||
- 'userAgent.matches("Android [1-5]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
|
||||
# Old generic browsers
|
||||
- 'userAgent.startsWith("Opera/")'
|
||||
#- 'userAgent.matches("Gecko/(201[0-9]|200[0-9])")'
|
||||
- 'userAgent.matches("^Mozilla/[1-4]")'
|
||||
is-heavy-resource:
|
||||
- 'path.startsWith("/explore/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/src/commit/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/compare/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/commits/commit/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/blame/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/search/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/find/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/activity")'
|
||||
# any search with a custom query
|
||||
- '"q" in query && query.q != ""'
|
||||
# user activity tab
|
||||
- 'path.matches("^/[^/]") && "tab" in query && query.tab == "activity"'
|
||||
|
||||
# Rules and conditions are served this environment
|
||||
# remoteAddress (net.IP) - Connecting client remote address from headers or properties
|
||||
# host (string) - HTTP Host
|
||||
# method (string) - HTTP Method/Verb
|
||||
# userAgent (string) - HTTP User-Agent header
|
||||
# path (string) - HTTP request Path
|
||||
# query (map[string]string) - HTTP request Query arguments
|
||||
# headers (map[string]string) - HTTP request headers
|
||||
#
|
||||
# Additionally these functions are available
|
||||
# inNetwork(networkName string, address net.IP) bool
|
||||
# inNetwork(networkCIDR string, address net.IP) bool
|
||||
rules:
|
||||
- name: allow-well-known-resources
|
||||
conditions:
|
||||
- '($is-well-known-asset)'
|
||||
action: pass
|
||||
|
||||
- name: undesired-networks
|
||||
conditions:
|
||||
- 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress) || inNetwork("zenlayer-inc", remoteAddress)'
|
||||
action: poison
|
||||
|
||||
- name: undesired-crawlers
|
||||
conditions:
|
||||
- '($is-headless-chromium)'
|
||||
- 'userAgent.startsWith("Lightpanda/")'
|
||||
- 'userAgent.startsWith("masscan/")'
|
||||
# Typo'd opera botnet
|
||||
- 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
|
||||
# AI bullshit stuff, they do not respect robots.txt even while they read it
|
||||
# TikTok Bytedance AI training
|
||||
- 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider")'
|
||||
# Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly.
|
||||
- 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")'
|
||||
# Anthropic AI training and usage
|
||||
- 'userAgent.contains("ClaudeBot") || userAgent.contains("Claude-User")|| userAgent.contains("Claude-SearchBot")'
|
||||
# Common Crawl AI crawlers
|
||||
- 'userAgent.contains("CCBot")'
|
||||
# ChatGPT AI crawlers https://platform.openai.com/docs/bots
|
||||
- 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")'
|
||||
# Other AI crawlers
|
||||
- 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")'
|
||||
action: poison
|
||||
|
||||
- name: unknown-crawlers
|
||||
conditions:
|
||||
# No user agent set
|
||||
- 'userAgent == ""'
|
||||
action: deny
|
||||
|
||||
# check a sequence of challenges for non logged in
|
||||
- name: suspicious-crawlers/0
|
||||
conditions: ['($is-suspicious-crawler)']
|
||||
action: check
|
||||
challenges: [js-pow-sha256, http-cookie-check]
|
||||
- name: suspicious-crawlers/1
|
||||
conditions: ['($is-suspicious-crawler)']
|
||||
action: check
|
||||
challenges: [self-header-refresh]
|
||||
- name: suspicious-crawlers/2
|
||||
conditions: ['($is-suspicious-crawler)']
|
||||
action: check
|
||||
challenges: [self-resource-load]
|
||||
|
||||
- name: allow-static-resources
|
||||
conditions:
|
||||
- '($is-static-asset)'
|
||||
action: pass
|
||||
|
||||
- name: always-pow-challenge
|
||||
conditions:
|
||||
# login paths
|
||||
- 'path.startsWith("/user/sign_up") || path.startsWith("/user/login") || path.startsWith("/user/oauth2/")'
|
||||
# repo / org / mirror creation paths
|
||||
- 'path == "/repo/create" || path == "/repo/migrate" || path == "/org/create"'
|
||||
# user profile info edit paths
|
||||
- 'path == "/user/settings" || path.startsWith("/user/settings/hooks/")'
|
||||
# issue creation
|
||||
- 'path.matches("^/[^/]+/[^/]+/issues/new")'
|
||||
# Match archive downloads from browsers and not tools
|
||||
- 'path.matches("^/[^/]+/[^/]+/archive/.*\\.(bundle|zip|tar\\.gz)") && ($is-generic-browser)'
|
||||
action: challenge
|
||||
challenges: [ js-pow-sha256 ]
|
||||
|
||||
- name: allow-git-operations
|
||||
conditions:
|
||||
- '($is-git-path)'
|
||||
- 'path.matches("^/[^/]+/[^/]+\\.git")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/") && ($is-git-ua)'
|
||||
action: pass
|
||||
|
||||
- name: sitemap
|
||||
conditions:
|
||||
- 'path == "/sitemap.xml" || path.matches("^/explore/(users|repos)/sitemap-[0-9]+\\.xml$")'
|
||||
action: pass
|
||||
|
||||
# TODO: rss
|
||||
|
||||
- name: api-call
|
||||
conditions:
|
||||
- 'path.startsWith("/api/v1/") || path.startsWith("/api/forgejo/v1/")'
|
||||
- 'path.startsWith("/login/oauth/")'
|
||||
- 'path.startsWith("/captcha/")'
|
||||
- 'path.startsWith("/metrics/")'
|
||||
# todo: post only
|
||||
- 'path == "/-/markup"'
|
||||
- 'path == "/user/events"'
|
||||
- 'path == "/ssh_info"'
|
||||
- 'path == "/api/healthz"'
|
||||
# user pubkeys
|
||||
- 'path.matches("^/[^/]+\\.keys$")'
|
||||
- 'path.matches("^/[^/]+\\.gpg")'
|
||||
action: pass
|
||||
|
||||
- name: preview-fetchers
|
||||
conditions:
|
||||
- 'path.endsWith("/-/summary-card")'
|
||||
#- 'userAgent.contains("facebookexternalhit/")'
|
||||
- 'userAgent.contains("Twitterbot/")'
|
||||
- '"X-Purpose" in headers && headers["X-Purpose"] == "preview"'
|
||||
action: pass
|
||||
|
||||
- name: desired-crawlers
|
||||
conditions:
|
||||
- 'userAgent.contains("+https://kagi.com/bot") && inNetwork("kagibot", remoteAddress)'
|
||||
- '(userAgent.contains("+http://www.google.com/bot.html") || userAgent.contains("Google-InspectionTool")) && inNetwork("googlebot", remoteAddress)'
|
||||
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && inNetwork("bingbot", remoteAddress)'
|
||||
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && inNetwork("duckduckbot", remoteAddress)'
|
||||
- 'userAgent.contains("+https://help.qwant.com/bot/") && inNetwork("qwantbot", remoteAddress)'
|
||||
- 'userAgent.contains("+http://yandex.com/bots") && inNetwork("yandexbot", remoteAddress)'
|
||||
action: pass
|
||||
|
||||
- name: homesite
|
||||
conditions:
|
||||
- 'path == "/"'
|
||||
# generic /*/*/ match gave too many options for scrapers to trigger random endpoints
|
||||
- 'path.matches("(?i)^/(WeebDataHoarder|P2Pool|mirror|git|S\\.O\\.N\\.G|FM10K|Sillycom|pwgen2155|kaitou|metonym)/[^/]+$")'
|
||||
action: pass
|
||||
|
||||
- name: suspicious-fetchers
|
||||
action: challenge
|
||||
challenges: [js-pow-sha256, http-cookie-check]
|
||||
conditions:
|
||||
- 'userAgent.contains("facebookexternalhit/") || userAgent.contains("facebookcatalog/")'
|
||||
|
||||
# check a sequence of challenges
|
||||
- name: heavy-operations/0
|
||||
action: check
|
||||
challenges: [self-header-refresh, js-pow-sha256, http-cookie-check]
|
||||
conditions: ['($is-heavy-resource)']
|
||||
- name: heavy-operations/1
|
||||
action: check
|
||||
challenges: [self-resource-load, js-pow-sha256, http-cookie-check]
|
||||
conditions: ['($is-heavy-resource)']
|
||||
|
||||
# Allow all source downloads not caught in browser above
|
||||
# todo: limit this as needed?
|
||||
- name: source-download
|
||||
conditions:
|
||||
- 'path.matches("^/[^/]+/[^/]+/raw/branch/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/archive/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/media/")'
|
||||
- 'path.matches("^/[^/]+/[^/]+/releases/download/")'
|
||||
action: pass
|
||||
|
||||
# Allow PUT/DELETE/PATCH/POST requests in general
|
||||
- name: non-get-request
|
||||
action: pass
|
||||
conditions:
|
||||
- '!(method == "HEAD" || method == "GET")'
|
||||
|
||||
- name: standard-browser
|
||||
action: challenge
|
||||
challenges: [http-cookie-check, self-meta-refresh, self-resource-load, js-pow-sha256]
|
||||
conditions:
|
||||
- '($is-generic-browser)'
|
Reference in New Issue
Block a user