remove reading reddit collection for community threads collection (#16)

This commit is contained in:
Bhupesh Varshney 2024-02-25 14:21:15 +05:30 committed by GitHub
parent e41335edc3
commit 4325ecc230
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 68 additions and 88 deletions

View File

@ -1,35 +1,37 @@
name : Community Threads Wiki Updater name : Community Threads Wiki Updater
on: on:
schedule: workflow_dispatch:
- cron: '0 0 * * *' # This cron expression triggers the workflow every day at midnight UTC inputs:
workflow_dispatch: post_url:
description: 'The URL of the Reddit post to add'
required: true
permissions: permissions:
contents: read contents: read
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Set up Python 3.10 - name: Set up Python 3.10
uses: actions/setup-python@v3 uses: actions/setup-python@v3
with: with:
python-version: "3.10" python-version: "3.10"
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Update Wiki - name: Update Wiki
env: env:
REDDIT_CLIENT_ID: ${{ secrets.REDDIT_CLIENT_ID }} REDDIT_CLIENT_ID: ${{ secrets.REDDIT_CLIENT_ID }}
REDDIT_CLIENT_SECRET: ${{ secrets.REDDIT_CLIENT_SECRET }} REDDIT_CLIENT_SECRET: ${{ secrets.REDDIT_CLIENT_SECRET }}
REDDIT_PASSWORD: ${{ secrets.REDDIT_PASSWORD }} REDDIT_PASSWORD: ${{ secrets.REDDIT_PASSWORD }}
REDDIT_USERNAME: ${{ secrets.REDDIT_USERNAME }} REDDIT_USERNAME: ${{ secrets.REDDIT_USERNAME }}
GIST_ID: ${{ secrets.GIST_ID }} GIST_ID: ${{ secrets.GIST_ID }}
GIST_TOKEN: ${{ secrets.GIST_TOKEN }} GIST_TOKEN: ${{ secrets.GIST_TOKEN }}
run: | run: |
cd community-threads cd community-threads
python main.py python main.py ${{ github.event.inputs.post_url }}

View File

@ -1,5 +1,6 @@
import praw import praw
import os import os
import argparse
from datetime import datetime from datetime import datetime
import json import json
from collections import defaultdict from collections import defaultdict
@ -36,13 +37,26 @@ def update_gist(gist_id, filename, content, description=""):
) )
return response.json() return response.json()
# farewell, reddit collections
# def get_collection(reddit):
# collection = reddit.subreddit(sub).collections(
# permalink="https://reddit.com/r/developersIndia/collection/958aef35-f9cb-414d-ab33-08bc639e47de"
# )
# return collection
def get_collection(reddit): def get_post_data(reddit, post_url):
collection = reddit.subreddit(sub).collections( submission = reddit.submission(url=post_url)
permalink="https://reddit.com/r/developersIndia/collection/958aef35-f9cb-414d-ab33-08bc639e47de" post = {
) "title": submission.title,
return collection "url": submission.url,
"id": submission.id,
"num_comments": submission.num_comments,
"created_at": datetime.utcfromtimestamp(
submission.created_utc
).isoformat(),
"flair_text": submission.link_flair_text,
}
return post
def update_wiki(reddit, wikipage, posts): def update_wiki(reddit, wikipage, posts):
# Group posts by year # Group posts by year
@ -78,6 +92,10 @@ def update_wiki(reddit, wikipage, posts):
def main(): def main():
parser = argparse.ArgumentParser(description='Update Community Threads Collection.')
parser.add_argument('post_url', help='The URL of the Reddit post to add.')
args = parser.parse_args()
reddit = praw.Reddit( reddit = praw.Reddit(
client_id=client_id, client_id=client_id,
client_secret=client_secret, client_secret=client_secret,
@ -86,70 +104,30 @@ def main():
user_agent=f"Automod reader by u/{username}", user_agent=f"Automod reader by u/{username}",
) )
collection = get_collection(reddit)
saved_collection_posts = json.loads(get_gist_content(gist_id)) saved_collection_posts = json.loads(get_gist_content(gist_id))
saved_collection_ids = [post["id"] for post in saved_collection_posts["posts"]] saved_collection_ids = [post["id"] for post in saved_collection_posts["posts"]]
print(f"Database was last updated on {saved_collection_posts['collection_last_updated']}") print(f"Database was last updated on {saved_collection_posts['collection_last_updated']}")
print(f"Collection was last updated on {datetime.utcfromtimestamp(collection.last_update_utc).isoformat()}")
if ( posts = []
saved_collection_posts["collection_last_updated"] for submission_id in saved_collection_posts["posts"]:
!= datetime.utcfromtimestamp(collection.last_update_utc).isoformat() post = {
): "title": submission_id["title"],
print("Collection was updated, getting new posts data...") "url": submission_id["url"],
"id": submission_id["id"],
"num_comments": submission_id["num_comments"],
"created_at": submission_id["created_at"],
"flair_text": submission_id["flair_text"],
}
posts.append(post)
# given 2 lists find non-common elements new_post = get_post_data(reddit, args.post_url)
db_posts = set(saved_collection_ids) if new_post["id"] not in saved_collection_ids:
collection_posts = [] posts.append(new_post)
for submission in collection:
collection_posts.append(submission.id)
collection_posts = set(collection_posts)
new_posts = list(collection_posts - db_posts)
deleted_posts = list(db_posts - collection_posts)
print(f"Found {len(new_posts)} new posts!")
print(f"Found {len(deleted_posts)} deleted posts!")
posts = []
# load the saved collection posts data
for submission_id in saved_collection_posts["posts"]:
if submission_id["id"] in deleted_posts:
continue
post = {
"title": submission_id["title"],
"url": submission_id["url"],
"id": submission_id["id"],
"num_comments": submission_id["num_comments"],
"created_at": submission_id["created_at"],
"flair_text": submission_id["flair_text"],
}
posts.append(post)
# get the new posts data
for submission_id in new_posts:
submission = reddit.submission(submission_id)
post = {
"title": submission.title,
"url": submission.url,
"id": submission.id,
"num_comments": submission.num_comments,
"created_at": datetime.utcfromtimestamp(
submission.created_utc
).isoformat(),
"flair_text": submission.link_flair_text,
}
posts.append(post)
# sort the posts by created_at
posts = sorted(posts, key=lambda k: k["created_at"]) posts = sorted(posts, key=lambda k: k["created_at"])
collection_json = { collection_json = {
"collection_last_updated": datetime.utcfromtimestamp( "collection_last_updated": datetime.utcnow().isoformat(),
collection.last_update_utc
).isoformat(),
"posts": posts, "posts": posts,
} }
@ -157,7 +135,7 @@ def main():
print("Internal database updated successfully!") print("Internal database updated successfully!")
update_wiki(reddit, "community-threads", posts) update_wiki(reddit, "community-threads", posts)
else: else:
print("Wiki is up to date!") print("Post is already in the collection. No changes were made.")
if __name__ == "__main__": if __name__ == "__main__":