strip html

This commit is contained in:
git-bruh 2023-02-15 15:34:30 +05:30
parent 4d029e7dfb
commit 51c33b2913
No known key found for this signature in database

View File

@ -1,5 +1,6 @@
import json import json
import logging import logging
import re
from copy import deepcopy from copy import deepcopy
from dataclasses import dataclass from dataclasses import dataclass
from os import environ, fsync from os import environ, fsync
@ -129,6 +130,10 @@ class Job:
permalink: str permalink: str
def strip_html(text):
return re.sub("<[^<]+?>", "", text)
def get_job_entries(feed_url): def get_job_entries(feed_url):
entries = feedparser.parse(feed_url).entries entries = feedparser.parse(feed_url).entries
@ -140,7 +145,7 @@ def get_job_entries(feed_url):
location=entry.get("job_listing_location", "N/A"), location=entry.get("job_listing_location", "N/A"),
job_type=entry["job_listing_job_type"], job_type=entry["job_listing_job_type"],
salary=entry.get("job_listing_salary", "N/A"), salary=entry.get("job_listing_salary", "N/A"),
summary=entry["summary"], summary=strip_html(entry["summary"]),
permalink=entry["link"], permalink=entry["link"],
) )
for entry in entries for entry in entries