Create negative match based on Forgejo reserved word list for embedding homesites

This commit is contained in:
WeebDataHoarder
2025-04-16 18:48:34 +02:00
parent f690cfaac3
commit 6458e6d019

View File

@@ -376,10 +376,22 @@ rules:
- name: preview-fetchers
conditions:
# These summary cards are included in most previews at the end of the url
- 'path.endsWith("/-/summary-card")'
#- 'userAgent.contains("facebookexternalhit/")'
- 'userAgent.contains("Twitterbot/")'
- '"X-Purpose" in headers && headers["X-Purpose"] == "preview"'
#- 'userAgent.contains("Twitterbot/")'
action: pass
- name: homesite
conditions:
# Match root of site
- 'path == "/"'
# Match root of any repository
# generic /*/*/ match gave too many options for scrapers to trigger random endpoints
# this is a negative match of endpoints that Forgejo holds as reserved as users or orgs
# see https://codeberg.org/forgejo/forgejo/src/branch/forgejo/models/user/user.go#L582
- 'path.matches("^/[^/]+/[^/]+$") && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)/[^/]+$")'
action: pass
- name: desired-crawlers
@@ -392,15 +404,6 @@ rules:
- 'userAgent.contains("+http://yandex.com/bots") && inNetwork("yandexbot", remoteAddress)'
action: pass
- name: homesite
conditions:
- 'path == "/"'
# generic /*/*/ match gave too many options for scrapers to trigger random endpoints
# edit this with preferential users/orgs for now
# todo: create negative match?
- 'path.matches("(?i)^/(WeebDataHoarder|P2Pool|mirror|git|S\\.O\\.N\\.G|FM10K|Sillycom|pwgen2155|kaitou|metonym)/[^/]+$")'
action: pass
# check a sequence of challenges
- name: heavy-operations/0
action: check