import re import WebSiteParser class ReceivesmsCo(WebSiteParser.WebSiteParser): def __init__(self): super().__init__("receivesms.co") def Parse(self): """Perform parsing of entire web site. """ # Parsing main page with list of countries country_uris = self.ParseAllFromPage( "[\s]*.*", to_remove="([\s]*.*)" ) numbers = [] for country_uri in country_uris: country_number_uris = self.ParseAllFromPage( "[\s]*.*[\s]*", to_remove="([\s]*.*[\s]*)", location=(country_uri if country_uri[0]!="/" else country_uri[1:]) ) for num_uri in country_number_uris: numbers.append({ "uri_raw": (num_uri if num_uri[0]!="/" else num_uri[1:]) }) break # TODO: remove for i in range(len(numbers)): numbers[i].update( self.ParseNumberPage(numbers[i]["uri_raw"]) ) self.ProcessRawNumbers(numbers) self.ParseDone = True self.Log("parsing done") def ParseNumberPage(self, uri): """Parse page with history of messages, related to single number. """ def die(text): self.Log(text, f"ParseNumberPage(self at {id(self)}, \"{uri}\")") result = { "number": self.EMPTY, "country": self.EMPTY, "last_use_raw": self.EMPTY, "born_raw": self.EMPTY, "times_used": self.EMPTY } markup = self.RequestPage(uri).text country = re.findall( "

[\s]*[\s]*.*[\s]-[\s].*Phone Number[\s]*

", markup ) country = re.sub( "(

[\s]*[\s]*[\s]*)|([\s]-[\s].*Phone Number[\s]*

)", "", str(country[0]) if country else "" ) if not country: die("error: page parsing failed, country is empty") return result result["country"] = country number = re.findall( "+.*)|([\s]*From[\s]*.*[\s]*.*[(].+ago[)][\s]*[\s]*From[\s]*.*[\s]*.*[(])|([)][\s]*[\s]* 1: pages_amount -= 2 last_page_uri = nav_links[-1] last_page_uri = re.sub( "(^
  • [\s]*