more fields for gynandco

This commit is contained in:
Tykayn 2024-10-22 00:19:28 +02:00 committed by tykayn
parent e13c22924b
commit 2c3cb0a5eb
2 changed files with 2007 additions and 111 deletions

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,15 @@ def extraire_numero_telephone(line):
else: else:
return None return None
def extraire_code_postal(line):
# Extraction du code postal dans la ligne
match = re.search(r'\b(\d{5})\b', line)
if match:
# Extraction des nombres correspondant au code postal
code_postal = match.group(1)
return code_postal
else:
return None
# trouver si la ligne est une adresse en cherchant deux numéros distincts et une virgule # trouver si la ligne est une adresse en cherchant deux numéros distincts et une virgule
def extraire_addr_line(line): def extraire_addr_line(line):
# Extraction des nombres dans la ligne # Extraction des nombres dans la ligne
@ -69,22 +78,42 @@ for article in articles:
# Recherche d'un numéro de téléphone dans l'article # Recherche d'un numéro de téléphone dans l'article
phone_number = None phone_number = None
gender = 'unknown' gender = 'unknown'
trans_friendly = 'unknown' trans_friendly = ''
handles_violence = 'unknown' handles_violence = ''
visio_meeting = 'no' visio_meeting = 'no'
pseudo_science = 'unknown' pseudo_science = ''
premenstrual_syndrome = 'unknown' premenstrual_syndrome = ''
accessible_cabinet = 'unknown' accessible_cabinet = ''
tatoo = 'unknown' tatoo = ''
toxico = 'unknown' toxico = ''
sterilisation = 'unknown' sterilisation = ''
abortion = 'unknown' abortion = ''
endometriosis = 'unknown' endometriosis = ''
bigbody = 'unknown' bigbody = ''
poil = 'unknown' poil = ''
bi = 'unknown' bi = ''
lesbian = ''
pma = ''
ist = ''
pregnancy = ''
ivg = ''
generaliste = ''
gyneco = ''
sage_femme = ''
auto_prelev = ''
mycoses = ''
dyspareunie = ''
spoken = '' # langues parlées
diu = '' # dispositif intra utérin
puma= '' # PUMA (ex-CMU)
ame= '' # AME (Aide médicale détat)
code_postal = ''
for line in article.stripped_strings: for line in article.stripped_strings:
found = extraire_code_postal(line)
if found:
code_postal = found
address = line
found = extraire_numero_telephone(line) found = extraire_numero_telephone(line)
if found: if found:
print(found) print(found)
@ -111,6 +140,38 @@ for article in articles:
bi = 'yes' bi = 'yes'
if 'Stérilisation' in line: if 'Stérilisation' in line:
sterilisation = 'yes' sterilisation = 'yes'
if 'DIU' in line:
diu = 'yes'
if 'Accompagnement grossesse' in line:
pregnancy = 'yes'
if 'suivi des IST' in line:
ist = 'yes'
if 'IVG' in line:
ivg = 'yes'
if 'Médecin généraliste' in line:
generaliste = 'yes'
if 'Sage-femme' in line:
sage_femme = 'yes'
if 'auto prélèvement' in line:
auto_prelev = 'yes'
if 'Conseils mycoses' in line:
mycoses = 'yes'
if 'Lesbiennes friendly' in line:
lesbian = 'yes'
if 'PMA' in line:
pma = 'yes'
if 'PUMA' in line:
puma = 'yes'
if 'AME' in line:
ame = 'yes'
if 'français, anglais' in line:
spoken = 'french;english'
if 'espagnol' in line:
spoken = (spoken+';spanish')
if 'Dyspareunie' in line:
dyspareunie = 'yes'
if 'Gynécologue' in line:
gyneco = 'yes'
# chercher une adresse si on en a pas # chercher une adresse si on en a pas
if not address: if not address:
found = extraire_addr_line(line) found = extraire_addr_line(line)
@ -123,6 +184,7 @@ for article in articles:
doctor = { doctor = {
"name": name, "name": name,
"address": address, "address": address,
"address:code_postal": code_postal,
"ref:FR:convention_secteur": sector, "ref:FR:convention_secteur": sector,
"contact:phone": phone_number, "contact:phone": phone_number,
"contact:website": url, "contact:website": url,
@ -131,8 +193,10 @@ for article in articles:
"handles:gender:trans": trans_friendly, "handles:gender:trans": trans_friendly,
"handles:violence": handles_violence, "handles:violence": handles_violence,
"handles:premenstrual_syndrome": premenstrual_syndrome, "handles:premenstrual_syndrome": premenstrual_syndrome,
"handles:IST": ist,
"accessible_cabinet": accessible_cabinet, "accessible_cabinet": accessible_cabinet,
"pseudo_science": pseudo_science, "pseudo_science": pseudo_science,
"speaks": spoken,
"handles:tatoo": tatoo, "handles:tatoo": tatoo,
"handles:toxico": toxico, "handles:toxico": toxico,
"handles:sterilisation": sterilisation, "handles:sterilisation": sterilisation,
@ -142,6 +206,18 @@ for article in articles:
"handles:hairy": poil, "handles:hairy": poil,
"handles:bigbody": bigbody, "handles:bigbody": bigbody,
"handles:gender:bi": bi, "handles:gender:bi": bi,
"handles:diu": diu,
"handles:mycoses": mycoses,
"handles:pregnancy": pregnancy,
"handles:abortion": ivg,
"handles:auto_prelevement": auto_prelev,
"handles:pma": pma,
"handles:dyspareunie": dyspareunie,
"handles:gender:lesbian": lesbian,
"healcare:generaliste": generaliste,
"healcare:sage_femme": sage_femme,
"healcare:gynecologist": gyneco,
} }
doctors.append(doctor) doctors.append(doctor)