On ajoute un parse des javascript (qui permet de supprimer la pause)
This commit is contained in:
parent
2fe2611788
commit
ea0e7a3db9
@ -15,6 +15,31 @@ logger = logging.getLogger(__name__)
|
|||||||
# such as https://www.facebook.com/laJeteeClermont/events
|
# such as https://www.facebook.com/laJeteeClermont/events
|
||||||
class CExtractor(TwoStepsExtractor):
|
class CExtractor(TwoStepsExtractor):
|
||||||
|
|
||||||
|
def find_event_id_fragment_in_array(self, array, first=True):
|
||||||
|
found = False
|
||||||
|
if isinstance(array, dict):
|
||||||
|
if "__typename" in array and array["__typename"] == "Event" and "id" in array:
|
||||||
|
self.add_event_url("https://www.facebook.com/events/" + array["id"] + "/")
|
||||||
|
found = True
|
||||||
|
if not found:
|
||||||
|
for k in array:
|
||||||
|
found = self.find_event_id_fragment_in_array(array[k], False) or found
|
||||||
|
elif isinstance(array, list):
|
||||||
|
for e in array:
|
||||||
|
found = self.find_event_id_fragment_in_array(e, False) or found
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
def find_in_js(self, soup):
|
||||||
|
found = False
|
||||||
|
|
||||||
|
for json_script in soup.find_all("script", type="application/json"):
|
||||||
|
json_txt = json_script.get_text()
|
||||||
|
json_struct = json.loads(json_txt)
|
||||||
|
found = self.find_event_id_fragment_in_array(json_struct) or found
|
||||||
|
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
def build_event_url_list(self, content):
|
def build_event_url_list(self, content):
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
@ -28,6 +53,8 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
self.add_event_url(link.get('href').split('?')[0])
|
self.add_event_url(link.get('href').split('?')[0])
|
||||||
found = True
|
found = True
|
||||||
|
|
||||||
|
found = self.find_in_js(soup) or found
|
||||||
|
|
||||||
if not found and debug:
|
if not found and debug:
|
||||||
directory = "errors/"
|
directory = "errors/"
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user