154 lines
3.9 KiB
Python
154 lines
3.9 KiB
Python
import mastodon
|
|
import pandas as pd
|
|
import networkx as nx
|
|
import matplotlib.pyplot as plt
|
|
import argparse
|
|
from urllib.parse import urlparse
|
|
import seaborn as sns
|
|
|
|
|
|
# Parse les arguments du script
|
|
parser = argparse.ArgumentParser(description="Produit un graphique des tags")
|
|
parser.add_argument("--access-token", type=str, help="The access token of your mastodon app", required=True)
|
|
parser.add_argument("--hashtag", type=str, help="The hashtag to search for (required)", required=True)
|
|
args = parser.parse_args()
|
|
|
|
|
|
MASTODON_INSTANCE = 'https://mastodon.cipherbliss.com'
|
|
ACCESS_TOKEN = args.access_token
|
|
hashtag = args.hashtag
|
|
MAX_RESULTS = 500
|
|
|
|
mastodon = mastodon.Mastodon(
|
|
access_token=ACCESS_TOKEN,
|
|
api_base_url=MASTODON_INSTANCE
|
|
)
|
|
|
|
posts = mastodon.timeline_hashtag(hashtag, limit=MAX_RESULTS)
|
|
|
|
print('posts trouvés:', len(posts))
|
|
|
|
|
|
|
|
##################################
|
|
|
|
|
|
urls = []
|
|
domains = []
|
|
|
|
|
|
for status in posts:
|
|
content = status['content']
|
|
urls = [item['url'] for item in status.get('media\_attachments', []) + status.get('cards', [])] if status.get('media\_attachments') or status.get('cards') else []
|
|
if urls:
|
|
domain = urllib.parse.urlparse(urls[-1]).netloc
|
|
domains.append(domain)
|
|
urls.pop()
|
|
|
|
df = pd.DataFrame({'URL': urls, 'Domain': domains})
|
|
|
|
plt.figure(figsize=(8,6))
|
|
ax = sns.countplot(data=df, x='Domain')
|
|
ax.set_title('Occurrences of Domains in Fetched Posts')
|
|
ax.set_xticklabels(ax.get_xticks(), rotation=90)
|
|
|
|
# Enregistrement du graphe sous forme de fichier PNG
|
|
plt.savefig(f'barres_liens_tag_{hashtag}.png')
|
|
|
|
##################################
|
|
|
|
# Création d'une dataframe pour stocker les associations entre tags
|
|
df = pd.DataFrame(columns=['post_id', 'tags'])
|
|
|
|
# Parcours des posts et stockage des informations dans la dataframe
|
|
for post in posts:
|
|
post_id = post['id']
|
|
tags = [tag['name'] for tag in post['tags']]
|
|
df.loc[len(df)] = [post_id, ', '.join(tags)]
|
|
|
|
df_list = []
|
|
for index, row in df.iterrows():
|
|
tags = row['tags'].split(',')
|
|
df_list.append(tags)
|
|
|
|
df_tags = pd.DataFrame(df_list)
|
|
|
|
|
|
#########################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Génère le réseau social
|
|
G = nx.Graph()
|
|
|
|
for i, row in df.iterrows():
|
|
for tag in row['tags'].split(','):
|
|
G.add_node(tag)
|
|
for other_tag in row['tags'].split(','):
|
|
if tag != other_tag:
|
|
G.add_edge(tag, other_tag)
|
|
|
|
plt.figure(figsize=(20,15))
|
|
|
|
# Génère les positions des noeuds
|
|
pos = nx.spring_layout(G)
|
|
|
|
# Définisse la taille des noeuds en fonction du nombre de liens
|
|
node_sizes = [len(list(G.neighbors(node))) for node in G.nodes()]
|
|
|
|
# Determine the maximum node size
|
|
max_node_size = max(node_sizes)
|
|
|
|
# Scale the node sizes so that they are all relatively equal, but not too large
|
|
node_sizes_scaled = [size / max_node_size * 1000 for size in node_sizes]
|
|
|
|
# Dessine le graphique de force
|
|
nx.draw_networkx_nodes(G, pos, node_size=node_sizes_scaled, node_color='skyblue', alpha=0.7)
|
|
|
|
|
|
# Dessine les liens entre les noeuds
|
|
# nx.draw_networkx_edges(G, pos, width=2, edge_color='#ccc')
|
|
# Génère les arêtes du graphe avec des épaisseurs de traits qui dépendent du poids des noeuds
|
|
edge_widths = [len(list(G.neighbors(node)))/3 for node in G.nodes()]
|
|
nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='gray')
|
|
|
|
|
|
# Ajoute un contour blanc aux labels des noeuds pour améliorer la lisibilité
|
|
labels = nx.get_node_attributes(G, 'name')
|
|
nx.draw_networkx_labels(G, pos, font_size=14, font_color='black' )
|
|
|
|
print('longueur du graphe: ',len(G))
|
|
|
|
|
|
|
|
|
|
plt.savefig(f'network_graph_tag_{hashtag}.svg')
|
|
|
|
##############################
|
|
# Génération d'un graphique en barre des occurrences des tags
|
|
tag_counts = df['tags'].str.split(',').explode().value_counts().sort_values(ascending=False)
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
plt.barh(tag_counts.index, tag_counts.values)
|
|
plt.xlabel('Occurrences')
|
|
plt.ylabel('Tags')
|
|
plt.title('Tag Occurrences')
|
|
|
|
|
|
|
|
|
|
|
|
# Enregistrement du graphe sous forme de fichier PNG
|
|
plt.savefig(f'barres_tag_{hashtag}.png')
|