examples: update bot matches, allow badges to be fetched

This commit is contained in:
WeebDataHoarder
2025-04-23 07:25:06 +02:00
parent 1cc95a5fa7
commit d83fe3653a
2 changed files with 3 additions and 5 deletions

View File

@@ -193,7 +193,7 @@ conditions:
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
is-generic-robot-ua:
- 'userAgent.contains("compatible;") && !userAgent.contains("Trident/")'
- 'userAgent.matches("compatible[;)]") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
@@ -383,7 +383,7 @@ rules:
# generic /*/*/ match gave too many options for scrapers to trigger random endpoints
# this is a negative match of endpoints that Forgejo holds as reserved as users or orgs
# see https://codeberg.org/forgejo/forgejo/src/branch/forgejo/models/user/user.go#L582
- '(path.matches("^/[^/]+/[^/]+/?$") || path.matches("^/[^/]+/[^/]+/(issues|pulls)/[0-9]+$") || (path.matches("^/[^/]+/?$") && size(query) == 0)) && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)(/|$)")'
- '(path.matches("^/[^/]+/[^/]+/?$") || path.matches("^/[^/]+/[^/]+/badges/") || path.matches("^/[^/]+/[^/]+/(issues|pulls)/[0-9]+$") || (path.matches("^/[^/]+/?$") && size(query) == 0)) && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)(/|$)")'
action: pass
- name: desired-crawlers
@@ -409,8 +409,6 @@ rules:
action: check
settings:
challenges: [ self-resource-load, js-pow-sha256, http-cookie-check ]
settings:
challenges: [self-preload-link, self-header-refresh, js-pow-sha256, http-cookie-check]
- name: standard-bots
action: check

View File

@@ -112,7 +112,7 @@ conditions:
is-generic-robot-ua:
- 'userAgent.contains("compatible;") && !userAgent.contains("Trident/")'
- 'userAgent.matches("compatible[;)]") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'