import re
import WebSiteParser
class ReceivesmsCo(WebSiteParser.WebSiteParser):
def __init__(self):
super().__init__("receivesms.co")
def Parse(self):
"""Perform parsing of entire web site.
"""
# Parsing main page with list of countries
country_uris = self.ParseAllFromPage(
"
[\s]*.*",
to_remove="([\s]*.*)"
)
numbers = []
for country_uri in country_uris:
country_number_uris = self.ParseAllFromPage(
"[\s]*.*[\s]* | ",
to_remove="([\s]*.*[\s]* | )",
location=(country_uri if country_uri[0]!="/" else country_uri[1:])
)
for num_uri in country_number_uris:
numbers.append({
"uri_raw": (num_uri if num_uri[0]!="/" else num_uri[1:])
})
break # TODO: remove
for i in range(len(numbers)):
numbers[i].update(
self.ParseNumberPage(numbers[i]["uri_raw"])
)
self.ProcessRawNumbers(numbers)
self.ParseDone = True
self.Log("parsing done")
def ParseNumberPage(self, uri):
"""Parse page with history of messages, related to single number.
"""
def die(text):
self.Log(text, f"ParseNumberPage(self at {id(self)}, \"{uri}\")")
result = {
"number": self.EMPTY,
"country": self.EMPTY,
"last_use_raw": self.EMPTY,
"born_raw": self.EMPTY,
"times_used": self.EMPTY
}
markup = self.RequestPage(uri).text
country = re.findall(
"[\s]*[\s]*.*[\s]-[\s].*Phone Number[\s]*
",
markup
)
country = re.sub(
"([\s]*[\s]*[\s]*)|([\s]-[\s].*Phone Number[\s]*
)",
"",
str(country[0]) if country else ""
)
if not country:
die("error: page parsing failed, country is empty")
return result
result["country"] = country
number = re.findall(
"+.*)|([\s]*From[\s]*.*[\s]*.*[(].+ago[)][\s]*[\s]*From[\s]*.*[\s]*.*[(])|([)][\s]*[\s]* 1:
pages_amount -= 2
last_page_uri = nav_links[-1]
last_page_uri = re.sub(
"(^[\s]*