scrap one list of books

This commit is contained in:
Tykayn 2023-08-17 12:40:49 +02:00 committed by tykayn
parent 757528b952
commit 31d8fd53c8
3 changed files with 178 additions and 0 deletions

101
scraping/main.ts Normal file
View File

@ -0,0 +1,101 @@
/**
scrapping des livres de la médiathèque de briis
**/
// @ts-ignore
import https from 'https';
import WriteFile from "./utils";
const axios = require('axios');
const cheerio = require('cheerio');
const url: string = "www.mediatheque-de-briis-sous-forges.net";
const fetching_path: string = "/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=2";
interface Book {
author: string
title: string
description: string
img: string
}
const books: Book[] = [];
// autres pages:
// http://www.mediatheque-de-briis-sous-forges.net/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=2
const page_max = 1927
const getTables = (html: string): any => {
const $ = cheerio.load(html);
const tableElements = $(
"table.notice"
);
return tableElements;
};
const getHtml = async (hostname: string, path: string): Promise<string> =>
new Promise((resolve, reject) => {
https
.get(
{
hostname,
path,
// port:80,
method: "GET",
},
(res) => {
let html = "";
res.on("data", function (chunk) {
html += chunk;
});
res.on("end", function () {
resolve(html);
});
}
)
.on("error", (error) => {
console.error(error);
reject(error);
});
});
// fetchData(url).then((res: any) => {
// const html = res.data;
// const $ = cheerio.load(html);
// const statsTable :any = $('table.notice');
// console.log('statsTable', statsTable)
// statsTable.each(function(){
// let elem:any = this;
// let author = $(elem).find('td').eq(2).text();
// // let img = $(this).find('img').attr('src');
// // console.log(elem);
// console.log(author);
// });
// })
function writeBookScrapping() {
WriteFile('books.json', JSON.stringify(books, null , 2))
}
getHtml(url, fetching_path)
.then(getTables)
.then(
(tables: any) => tables.each(
(_: any, table: any) => {
const $ = cheerio.load(table);
// console.log('une table')
// let author = $().find('a.notice').text();
let text_description = $(table).find('td').eq(1).text();
let boom = text_description.split('\n');
let splitting = boom[1].split('/')
let img_src = $(table).find('td img').attr('src');
console.log(img_src);
books.push({
author: boom[0],
title: splitting[0],
description: splitting[1],
img: img_src
})
// console.log(cheerio.load(table).html())
}
)
)
.then(writeBookScrapping)
.catch((error) => console.log(error));

View File

@ -0,0 +1,62 @@
[
{
"author": "ALLENDE, Isabel",
"title": "Portrait sépia ",
"description": " Isabel Allende ; Trad. de l'espagnol par Claude de Frayssinet. - Paris : Grasset et Fasquelle, 2001. - 1 vol. , 391 p. : couv. ill. ; 24 x 15 cm.",
"img": "http://images-eu.amazon.com/images/P/2246617715.08.MZZZZZZZ.jpg"
},
{
"author": "AMETTE, Jacques-Pierre",
"title": "La Maîtresse de Brecht : roman ",
"description": " Jacques-Pierre Amette. - Paris : Albin Michel, 2003. - 300 p. : jaquette ill. ; 20 cm.",
"img": "http://images-eu.amazon.com/images/P/2226141634.08.MZZZZZZZ.jpg"
},
{
"author": "ANDRIÂC, Ivo",
"title": "Mara la courtisane : et autres nouvelles ",
"description": " Ivo Andriâc ; Trad. du serbo-croate par Pascale Delpech. - Paris : Belfond, 1999. - 234 p. : couv. ill. en coul. ; 23 cm. - (Littérature étrangère).",
"img": "http://images-eu.amazon.com/images/P/2714435572.08.MZZZZZZZ.jpg"
},
{
"author": "ANGLADE, Jean",
"title": "Un Lit d'aubépine : roman ",
"description": " Jean Anglade. - Paris : Presses de la Cité, 1995. - 325 p. : couv. ill. en coul. ; 23 cm. - (Production Jeannine Balland).",
"img": "http://images-eu.amazon.com/images/P/2258039568.08.MZZZZZZZ.jpg"
},
{
"author": "ARNOTHY, Christine",
"title": "J'ai quinze ans et je ne veux pas mourir ; (suivi de) Il n'est pas si facile de vivre ",
"description": " Christine Arnothy. - Paris : France loisirs, 1981. - 330 p ; 23 cm.",
"img": "http://images-eu.amazon.com/images/P/2724211065.08.MZZZZZZZ.jpg"
},
{
"author": "CHAUVIN, Rémy",
"title": "Le Monde animal et ses comportements complexes ",
"description": " Rémy Chauvin, Bernadette Chauvin. - Paris : Plon, 1977. - 282 p : ill ; 21 cm.",
"img": "http://images-eu.amazon.com/images/P/2259002331.08.MZZZZZZZ.jpg"
},
{
"author": "D'ARZO, Silvio",
"title": "Maison des autres , (Contient) Un moment comme ça ",
"description": " texte de Silvio D'Arzo ; Trad. de l'italien par Bernard Simeone, Philippe Renard ; Préf. Attilio Bertolucci. - Lagrasse : Verdier, 1997. - 1 vol. , 86 p. : - ; 22 x 14 cm. - (Terra d'altri, ISSN 0989-4160).",
"img": "http://images-eu.amazon.com/images/P/2864322838.08.MZZZZZZZ.jpg"
},
{
"author": "ASSOULINE, Pierre",
"title": "La Cliente : roman ",
"description": " Pierre Assouline. - Paris : Gallimard, 1998. - 191 p. ; 21 cm.",
"img": "http://images-eu.amazon.com/images/P/207075278X.08.MZZZZZZZ.jpg"
},
{
"author": "ATKINSON, Kate",
"title": "Dans les replis du temps ",
"description": " texte de Kate Atkinson ; Trad. de l'anglais par Jean Bourdier. - Paris : Librairie générale française, 1999. - 1 vol. , 403 p. : ill., couv. ill. en coul. ; 18 x 11 cm. - (Le livre de poche ; 14687).",
"img": "http://images-eu.amazon.com/images/P/2253146870.08.MZZZZZZZ.jpg"
},
{
"author": "ATKINSON, Kate",
"title": "Dans les coulisses du musée : roman ",
"description": " Kate Atkinson ; Trad. de l'anglais par Jean Bourdier. - Paris : Bernard de Fallois, 1996. - 348 p. : couv. ill. en coul. ; 23 cm.",
"img": "http://images-eu.amazon.com/images/P/2877062775.08.MZZZZZZZ.jpg"
}
]

15
scraping/utils.ts Normal file
View File

@ -0,0 +1,15 @@
import * as fs from "node:fs";
export default function WriteFile(fileName: string, fileContent: any) {
console.log('write file', fileName)
return fs.writeFile(
`./output/${fileName}`,
fileContent,
'utf8',
(err) => {
if (err) {
console.log(`Error writing file: ${err}`)
}
}
)
}