331 lines
12 KiB
YAML
331 lines
12 KiB
YAML
# Example cmdline (forward requests from upstream to port :8080)
|
|
# $ go-away --bind :8080 --backend git.example.com=http://forgejo:3000 --policy examples/forgejo.yml --policy-snippets example/snippets/ --challenge-template forgejo --challenge-template-theme forgejo-auto
|
|
|
|
|
|
|
|
# Define networks to be used later below
|
|
networks:
|
|
# Networks will get included from snippets
|
|
|
|
huawei-cloud:
|
|
- asn: 136907
|
|
alibaba-cloud:
|
|
- asn: 45102
|
|
zenlayer-inc:
|
|
- asn: 21859
|
|
|
|
|
|
challenges:
|
|
# Challenges will get included from snippets
|
|
|
|
# Verifies the existence of a cookie and confirms it against some backend request, passing the entire client cookie contents
|
|
http-cookie-check:
|
|
runtime: http
|
|
parameters:
|
|
http-url: http://forgejo:3000/user/stopwatches
|
|
# http-url: http://forgejo:3000/repo/search
|
|
# http-url: http://forgejo:3000/notifications/new
|
|
http-method: GET
|
|
http-cookie: i_like_gitea
|
|
http-code: 200
|
|
verify-probability: 0.1
|
|
|
|
conditions:
|
|
# Conditions will get replaced on rules AST when found as ($condition-name)
|
|
|
|
# Conditions will get included from snippets
|
|
|
|
is-static-asset:
|
|
- 'path == "/apple-touch-icon.png"'
|
|
- 'path == "/apple-touch-icon-precomposed.png"'
|
|
- 'path.startsWith("/assets/")'
|
|
- 'path.startsWith("/repo-avatars/")'
|
|
- 'path.startsWith("/avatars/")'
|
|
- 'path.startsWith("/avatar/")'
|
|
- 'path.startsWith("/user/avatar/")'
|
|
- 'path.startsWith("/attachments/")'
|
|
|
|
is-git-path:
|
|
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
|
|
|
|
is-suspicious-crawler:
|
|
# TLS Fingerprint for specific agent without ALPN
|
|
- '(userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")) && ("ja4" in fp && fp.ja4.matches("^t[0-9a-z]+00_")) && !(userAgent.contains("compatible;") || userAgent.contains("+http") || userAgent.contains("facebookexternalhit/") || userAgent.contains("Twitterbot/"))'
|
|
# Old engines
|
|
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
|
|
# Old IE browsers
|
|
- 'userAgent.matches("MSIE ([2-9]|10|11)\\.")'
|
|
# Old Linux browsers
|
|
- 'userAgent.matches("Linux i[63]86") || userAgent.matches("FreeBSD i[63]86")'
|
|
# Old Windows browsers
|
|
- 'userAgent.matches("Windows (3|95|98|CE)") || userAgent.matches("Windows NT [1-5]\\.")'
|
|
# Old mobile browsers
|
|
- 'userAgent.matches("Android [1-5]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
|
|
# Old generic browsers
|
|
- 'userAgent.startsWith("Opera/")'
|
|
#- 'userAgent.matches("Gecko/(201[0-9]|200[0-9])")'
|
|
- 'userAgent.matches("^Mozilla/[1-4]")'
|
|
|
|
is-heavy-resource:
|
|
- 'path.startsWith("/explore/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/src/commit/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/compare/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/commits/commit/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/blame/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/search/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/find/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/activity")'
|
|
- 'path.matches("^/[^/]+/[^/]+/graph$")'
|
|
# any search with a custom query
|
|
- '"q" in query && query.q != ""'
|
|
# user activity tab
|
|
- 'path.matches("^/[^/]+$") && "tab" in query && query.tab == "activity"'
|
|
|
|
|
|
# Rules are checked sequentially in order, from top to bottom
|
|
rules:
|
|
- name: allow-well-known-resources
|
|
conditions:
|
|
- '($is-well-known-asset)'
|
|
action: pass
|
|
|
|
- name: allow-static-resources
|
|
conditions:
|
|
- '($is-static-asset)'
|
|
action: pass
|
|
|
|
- name: desired-crawlers
|
|
conditions:
|
|
- *is-bot-googlebot
|
|
- *is-bot-bingbot
|
|
- *is-bot-duckduckbot
|
|
- *is-bot-kagibot
|
|
- *is-bot-qwantbot
|
|
- *is-bot-yandexbot
|
|
action: pass
|
|
|
|
# Matches private networks and localhost.
|
|
# Uncomment this if you want to let your own tools this way
|
|
# - name: allow-private-networks
|
|
# conditions:
|
|
# # Allows localhost and private networks CIDR
|
|
# - *is-network-localhost
|
|
# - *is-network-private
|
|
# action: pass
|
|
|
|
- name: undesired-networks
|
|
conditions:
|
|
- 'remoteAddress.network("huawei-cloud") || remoteAddress.network("alibaba-cloud") || remoteAddress.network("zenlayer-inc")'
|
|
action: drop
|
|
|
|
- name: undesired-crawlers
|
|
conditions:
|
|
- '($is-headless-chromium)'
|
|
- 'userAgent.startsWith("Lightpanda/")'
|
|
- 'userAgent.startsWith("masscan/")'
|
|
# Typo'd opera botnet
|
|
- 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
|
|
# AI bullshit stuff, they do not respect robots.txt even while they read it
|
|
# TikTok Bytedance AI training
|
|
- 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider") || userAgent.contains("TikTokSpider")'
|
|
# Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly.
|
|
- 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")'
|
|
# Anthropic AI training and usage
|
|
- 'userAgent.contains("ClaudeBot") || userAgent.contains("Claude-User")|| userAgent.contains("Claude-SearchBot")'
|
|
# Common Crawl AI crawlers
|
|
- 'userAgent.contains("CCBot")'
|
|
# ChatGPT AI crawlers https://platform.openai.com/docs/bots
|
|
- 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")'
|
|
# Other AI crawlers
|
|
- 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")'
|
|
# SEO / Ads and marketing
|
|
- 'userAgent.contains("BLEXBot")'
|
|
action: drop
|
|
|
|
- name: unknown-crawlers
|
|
conditions:
|
|
# No user agent set
|
|
- 'userAgent == ""'
|
|
action: deny
|
|
|
|
# check a sequence of challenges for non logged in
|
|
- name: suspicious-crawlers
|
|
conditions: ['($is-suspicious-crawler)']
|
|
action: none
|
|
children:
|
|
- name: 0
|
|
action: check
|
|
settings:
|
|
challenges: [js-refresh, http-cookie-check]
|
|
- name: 1
|
|
action: check
|
|
settings:
|
|
challenges: [preload-link, resource-load]
|
|
- name: 2
|
|
action: check
|
|
settings:
|
|
challenges: [header-refresh]
|
|
|
|
- name: always-pow-challenge
|
|
conditions:
|
|
# login and sign up paths
|
|
- 'path.startsWith("/user/sign_up")'
|
|
- 'path.startsWith("/user/login") || path.startsWith("/user/oauth2/")'
|
|
- 'path.startsWith("/user/activate")'
|
|
# repo / org / mirror creation paths
|
|
- 'path == "/repo/create" || path == "/repo/migrate" || path == "/org/create"'
|
|
# user profile info edit paths
|
|
- 'path == "/user/settings" || path.startsWith("/user/settings/hooks/")'
|
|
# issue creation
|
|
- 'path.matches("^/[^/]+/[^/]+/issues/new")'
|
|
# Match archive downloads from browsers and not tools
|
|
- 'path.matches("^/[^/]+/[^/]+/archive/.*\\.(bundle|zip|tar\\.gz)") && ($is-generic-browser)'
|
|
action: challenge
|
|
settings:
|
|
challenges: [ js-refresh ]
|
|
|
|
- name: allow-git-operations
|
|
conditions:
|
|
- '($is-git-path)'
|
|
# Includes repository and wiki git endpoints
|
|
- 'path.matches("^/[^/]+/[^/]+\\.git")'
|
|
- 'path.matches("^/[^/]+/[^/]+/") && ($is-git-ua)'
|
|
action: pass
|
|
|
|
- name: sitemap
|
|
conditions:
|
|
- 'path == "/sitemap.xml" || path.matches("^/explore/(users|repos)/sitemap-[0-9]+\\.xml$")'
|
|
action: pass
|
|
|
|
# TODO: rss
|
|
|
|
- name: api-call
|
|
conditions:
|
|
- 'path.startsWith("/api/v1/") || path.startsWith("/api/forgejo/v1/")'
|
|
- 'path.startsWith("/login/oauth/")'
|
|
- 'path.startsWith("/captcha/")'
|
|
- 'path.startsWith("/metrics/")'
|
|
# todo: post only
|
|
- 'path == "/-/markup"'
|
|
- 'path == "/user/events"'
|
|
- 'path == "/ssh_info"'
|
|
- 'path == "/api/healthz"'
|
|
# actions
|
|
- 'path.startsWith("/api/actions/") || path.startsWith("/api/actions_pipeline/")'
|
|
# user pubkeys
|
|
- 'path.matches("^/[^/]+\\.keys$")'
|
|
- 'path.matches("^/[^/]+\\.gpg")'
|
|
# OCI packages API and package managers
|
|
- 'path.startsWith("/api/packages/") || path == "/api/packages"'
|
|
- 'path.startsWith("/v2/") || path == "/v2"'
|
|
- 'path.endsWith("/branches/list") || path.endsWith("/tags/list")'
|
|
action: pass
|
|
|
|
- name: preview-fetchers
|
|
conditions:
|
|
# These summary cards are included in most previews at the end of the url
|
|
- 'path.endsWith("/-/summary-card") || path.matches("^/[^/]+/[^/]+/releases/summary-card/[^/]+$")'
|
|
#- 'userAgent.contains("facebookexternalhit/")'
|
|
#- 'userAgent.contains("Twitterbot/")'
|
|
action: pass
|
|
|
|
# Allow loading and embedding of core pages without challenges
|
|
# Extended pages like linking to files or tabs are not covered here, but might be included in other challenges
|
|
- name: homesite
|
|
conditions:
|
|
# Match root of site
|
|
- 'path == "/"'
|
|
|
|
# Match root of any repository or user, or issue or pr
|
|
# generic /*/*/ match gave too many options for scrapers to trigger random endpoints
|
|
# this is a negative match of endpoints that Forgejo holds as reserved as users or orgs
|
|
# see https://codeberg.org/forgejo/forgejo/src/branch/forgejo/models/user/user.go#L582
|
|
- '(path.matches("^/[^/]+/[^/]+/?$") || path.matches("^/[^/]+/[^/]+/badges/") || path.matches("^/[^/]+/[^/]+/(issues|pulls)/[0-9]+$") || (path.matches("^/[^/]+/?$") && size(query) == 0)) && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)(/|$)")'
|
|
action: pass
|
|
|
|
# check a sequence of challenges
|
|
- name: heavy-operations
|
|
conditions: ['($is-heavy-resource)']
|
|
action: none
|
|
children:
|
|
- name: 0
|
|
action: check
|
|
settings:
|
|
challenges: [preload-link, header-refresh, js-refresh, http-cookie-check]
|
|
- name: 1
|
|
action: check
|
|
settings:
|
|
challenges: [ resource-load, js-refresh, http-cookie-check ]
|
|
|
|
# Allow all source downloads not caught in browser above
|
|
# todo: limit this as needed?
|
|
- name: source-download
|
|
conditions:
|
|
- 'path.matches("^/[^/]+/[^/]+/raw/branch/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/archive/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/releases/download/")'
|
|
- 'path.matches("^/[^/]+/[^/]+/media/") && ($is-generic-browser)'
|
|
action: pass
|
|
|
|
# check DNSBL and serve harder challenges
|
|
# todo: make this specific to score
|
|
- name: undesired-dnsbl
|
|
action: check
|
|
settings:
|
|
challenges: [dnsbl]
|
|
# if DNSBL fails, check additional challenges
|
|
fail: check
|
|
fail-settings:
|
|
challenges: [js-refresh, http-cookie-check]
|
|
|
|
# Allow PUT/DELETE/PATCH/POST requests in general
|
|
- name: non-get-request
|
|
action: pass
|
|
conditions:
|
|
- '!(method == "HEAD" || method == "GET")'
|
|
|
|
# Enable fetching OpenGraph and other tags from backend on these paths
|
|
- name: enable-meta-tags
|
|
action: context
|
|
conditions:
|
|
- 'userAgent.contains("facebookexternalhit/") || userAgent.contains("Facebot/") || userAgent.contains("Twitterbot/")'
|
|
- '($is-generic-robot-ua)'
|
|
- '!($is-generic-browser)'
|
|
settings:
|
|
context-set:
|
|
# Map OpenGraph or similar <meta> tags back to the reply, even if denied/challenged
|
|
proxy-meta-tags: "true"
|
|
# proxy-safe-link-tags: "true"
|
|
|
|
# Set additional response headers
|
|
#response-headers:
|
|
# X-Clacks-Overhead:
|
|
# - GNU Terry Pratchett
|
|
|
|
|
|
|
|
- name: plaintext-browser
|
|
action: challenge
|
|
settings:
|
|
challenges: [http-cookie-check, meta-refresh, cookie]
|
|
conditions:
|
|
- 'userAgent.startsWith("Lynx/")'
|
|
|
|
# Comment this rule out to not challenge tool-like user agents
|
|
- name: standard-tools
|
|
action: challenge
|
|
settings:
|
|
challenges: [cookie]
|
|
conditions:
|
|
- '($is-tool-ua)'
|
|
- '!($is-generic-browser)'
|
|
|
|
- name: standard-browser
|
|
action: challenge
|
|
settings:
|
|
challenges: [http-cookie-check, preload-link, meta-refresh, resource-load, js-refresh, js-pow-sha256]
|
|
conditions:
|
|
- '($is-generic-browser)'
|
|
|
|
# If end of rules is reached, default is PASS
|