# Example cmdline (forward requests from upstream to port :8080) # $ go-away --bind :8080 --backend git.example.com=http://forgejo:3000 --policy examples/forgejo.yml --policy-snippets example/snippets/ --challenge-template forgejo --challenge-template-theme forgejo-auto # Define networks to be used later below networks: # Networks will get included from snippets huawei-cloud: - asn: 136907 alibaba-cloud: - asn: 45102 zenlayer-inc: - asn: 21859 challenges: # Challenges will get included from snippets # Verifies the existence of a cookie and confirms it against some backend request, passing the entire client cookie contents http-cookie-check: runtime: http parameters: http-url: http://forgejo:3000/user/stopwatches # http-url: http://forgejo:3000/repo/search # http-url: http://forgejo:3000/notifications/new http-method: GET http-cookie: i_like_gitea http-code: 200 verify-probability: 0.1 conditions: # Conditions will get replaced on rules AST when found as ($condition-name) # Conditions will get included from snippets is-static-asset: - 'path == "/apple-touch-icon.png"' - 'path == "/apple-touch-icon-precomposed.png"' - 'path.startsWith("/assets/")' - 'path.startsWith("/repo-avatars/")' - 'path.startsWith("/avatars/")' - 'path.startsWith("/avatar/")' - 'path.startsWith("/user/avatar/")' - 'path.startsWith("/attachments/")' is-git-path: - 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")' is-suspicious-crawler: # TLS Fingerprint for specific agent without ALPN - '(userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")) && ("ja4" in fp && fp.ja4.matches("^t[0-9a-z]+00_")) && !(userAgent.contains("compatible;") || userAgent.contains("+http") || userAgent.contains("facebookexternalhit/") || userAgent.contains("Twitterbot/"))' # Old engines - 'userAgent.contains("Presto/") || userAgent.contains("Trident/")' # Old IE browsers - 'userAgent.matches("MSIE ([2-9]|10|11)\\.")' # Old Linux browsers - 'userAgent.matches("Linux i[63]86") || userAgent.matches("FreeBSD i[63]86")' # Old Windows browsers - 'userAgent.matches("Windows (3|95|98|CE)") || userAgent.matches("Windows NT [1-5]\\.")' # Old mobile browsers - 'userAgent.matches("Android [1-5]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")' # Old generic browsers - 'userAgent.startsWith("Opera/")' #- 'userAgent.matches("Gecko/(201[0-9]|200[0-9])")' - 'userAgent.matches("^Mozilla/[1-4]")' is-heavy-resource: - 'path.startsWith("/explore/")' - 'path.matches("^/[^/]+/[^/]+/src/commit/")' - 'path.matches("^/[^/]+/[^/]+/compare/")' - 'path.matches("^/[^/]+/[^/]+/commits/commit/")' - 'path.matches("^/[^/]+/[^/]+/blame/")' - 'path.matches("^/[^/]+/[^/]+/search/")' - 'path.matches("^/[^/]+/[^/]+/find/")' - 'path.matches("^/[^/]+/[^/]+/activity")' - 'path.matches("^/[^/]+/[^/]+/graph$")' # any search with a custom query - '"q" in query && query.q != ""' # user activity tab - 'path.matches("^/[^/]+$") && "tab" in query && query.tab == "activity"' # Rules are checked sequentially in order, from top to bottom rules: - name: allow-well-known-resources conditions: - '($is-well-known-asset)' action: pass - name: allow-static-resources conditions: - '($is-static-asset)' action: pass - name: desired-crawlers conditions: - *is-bot-googlebot - *is-bot-bingbot - *is-bot-duckduckbot - *is-bot-kagibot - *is-bot-qwantbot - *is-bot-yandexbot action: pass # Matches private networks and localhost. # Uncomment this if you want to let your own tools this way # - name: allow-private-networks # conditions: # # Allows localhost and private networks CIDR # - *is-network-localhost # - *is-network-private # action: pass - name: undesired-networks conditions: - 'remoteAddress.network("huawei-cloud") || remoteAddress.network("alibaba-cloud") || remoteAddress.network("zenlayer-inc")' action: drop - name: undesired-crawlers conditions: - '($is-headless-chromium)' - 'userAgent.startsWith("Lightpanda/")' - 'userAgent.startsWith("masscan/")' # Typo'd opera botnet - 'userAgent.matches("^Opera/[0-9.]+\\.\\(")' # AI bullshit stuff, they do not respect robots.txt even while they read it # TikTok Bytedance AI training - 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider") || userAgent.contains("TikTokSpider")' # Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly. - 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")' # Anthropic AI training and usage - 'userAgent.contains("ClaudeBot") || userAgent.contains("Claude-User")|| userAgent.contains("Claude-SearchBot")' # Common Crawl AI crawlers - 'userAgent.contains("CCBot")' # ChatGPT AI crawlers https://platform.openai.com/docs/bots - 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")' # Other AI crawlers - 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")' # SEO / Ads and marketing - 'userAgent.contains("BLEXBot")' action: drop - name: unknown-crawlers conditions: # No user agent set - 'userAgent == ""' action: deny # check a sequence of challenges for non logged in - name: suspicious-crawlers conditions: ['($is-suspicious-crawler)'] action: none children: - name: 0 action: check settings: challenges: [js-refresh, http-cookie-check] - name: 1 action: check settings: challenges: [preload-link, resource-load] - name: 2 action: check settings: challenges: [header-refresh] - name: always-pow-challenge conditions: # login and sign up paths - 'path.startsWith("/user/sign_up")' - 'path.startsWith("/user/login") || path.startsWith("/user/oauth2/")' - 'path.startsWith("/user/activate")' # repo / org / mirror creation paths - 'path == "/repo/create" || path == "/repo/migrate" || path == "/org/create"' # user profile info edit paths - 'path == "/user/settings" || path.startsWith("/user/settings/hooks/")' # issue creation - 'path.matches("^/[^/]+/[^/]+/issues/new")' # Match archive downloads from browsers and not tools - 'path.matches("^/[^/]+/[^/]+/archive/.*\\.(bundle|zip|tar\\.gz)") && ($is-generic-browser)' action: challenge settings: challenges: [ js-refresh ] - name: allow-git-operations conditions: - '($is-git-path)' # Includes repository and wiki git endpoints - 'path.matches("^/[^/]+/[^/]+\\.git")' - 'path.matches("^/[^/]+/[^/]+/") && ($is-git-ua)' action: pass - name: sitemap conditions: - 'path == "/sitemap.xml" || path.matches("^/explore/(users|repos)/sitemap-[0-9]+\\.xml$")' action: pass # TODO: rss - name: api-call conditions: - 'path.startsWith("/api/v1/") || path.startsWith("/api/forgejo/v1/")' - 'path.startsWith("/login/oauth/")' - 'path.startsWith("/captcha/")' - 'path.startsWith("/metrics/")' # todo: post only - 'path == "/-/markup"' - 'path == "/user/events"' - 'path == "/ssh_info"' - 'path == "/api/healthz"' # actions - 'path.startsWith("/api/actions/") || path.startsWith("/api/actions_pipeline/")' # user pubkeys - 'path.matches("^/[^/]+\\.keys$")' - 'path.matches("^/[^/]+\\.gpg")' # OCI packages API and package managers - 'path.startsWith("/api/packages/") || path == "/api/packages"' - 'path.startsWith("/v2/") || path == "/v2"' - 'path.endsWith("/branches/list") || path.endsWith("/tags/list")' action: pass - name: preview-fetchers conditions: # These summary cards are included in most previews at the end of the url - 'path.endsWith("/-/summary-card") || path.matches("^/[^/]+/[^/]+/releases/summary-card/[^/]+$")' #- 'userAgent.contains("facebookexternalhit/")' #- 'userAgent.contains("Twitterbot/")' action: pass # Allow loading and embedding of core pages without challenges # Extended pages like linking to files or tabs are not covered here, but might be included in other challenges - name: homesite conditions: # Match root of site - 'path == "/"' # Match root of any repository or user, or issue or pr # generic /*/*/ match gave too many options for scrapers to trigger random endpoints # this is a negative match of endpoints that Forgejo holds as reserved as users or orgs # see https://codeberg.org/forgejo/forgejo/src/branch/forgejo/models/user/user.go#L582 - '(path.matches("^/[^/]+/[^/]+/?$") || path.matches("^/[^/]+/[^/]+/badges/") || path.matches("^/[^/]+/[^/]+/(issues|pulls)/[0-9]+$") || (path.matches("^/[^/]+/?$") && size(query) == 0)) && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)(/|$)")' action: pass # check a sequence of challenges - name: heavy-operations conditions: ['($is-heavy-resource)'] action: none children: - name: 0 action: check settings: challenges: [preload-link, header-refresh, js-refresh, http-cookie-check] - name: 1 action: check settings: challenges: [ resource-load, js-refresh, http-cookie-check ] # Allow all source downloads not caught in browser above # todo: limit this as needed? - name: source-download conditions: - 'path.matches("^/[^/]+/[^/]+/raw/branch/")' - 'path.matches("^/[^/]+/[^/]+/archive/")' - 'path.matches("^/[^/]+/[^/]+/releases/download/")' - 'path.matches("^/[^/]+/[^/]+/media/") && ($is-generic-browser)' action: pass # check DNSBL and serve harder challenges # todo: make this specific to score - name: undesired-dnsbl action: check settings: challenges: [dnsbl] # if DNSBL fails, check additional challenges fail: check fail-settings: challenges: [js-refresh, http-cookie-check] # Allow PUT/DELETE/PATCH/POST requests in general - name: non-get-request action: pass conditions: - '!(method == "HEAD" || method == "GET")' # Enable fetching OpenGraph and other tags from backend on these paths - name: enable-meta-tags action: context conditions: - 'userAgent.contains("facebookexternalhit/") || userAgent.contains("Facebot/") || userAgent.contains("Twitterbot/")' - '($is-generic-robot-ua)' - '!($is-generic-browser)' settings: context-set: # Map OpenGraph or similar tags back to the reply, even if denied/challenged proxy-meta-tags: "true" # proxy-safe-link-tags: "true" # Set additional response headers #response-headers: # X-Clacks-Overhead: # - GNU Terry Pratchett - name: plaintext-browser action: challenge settings: challenges: [http-cookie-check, meta-refresh, cookie] conditions: - 'userAgent.startsWith("Lynx/")' # Comment this rule out to not challenge tool-like user agents - name: standard-tools action: challenge settings: challenges: [cookie] conditions: - '($is-tool-ua)' - '!($is-generic-browser)' - name: standard-browser action: challenge settings: challenges: [http-cookie-check, preload-link, meta-refresh, resource-load, js-refresh, js-pow-sha256] conditions: - '($is-generic-browser)' # If end of rules is reached, default is PASS