diff --git a/scripts/import-json.ts b/scripts/import-json.ts index e1947d8..8385e94 100644 --- a/scripts/import-json.ts +++ b/scripts/import-json.ts @@ -119,6 +119,7 @@ function transformCharacterData(item: CharacterRecord) { return { id: item.id, name: item.name, + frName: toNullable(item.frName), gender: toNullable(item.gender), age: toNullable(item.age), affiliations: toJsonArray(item.affiliations), @@ -137,7 +138,8 @@ function transformCharacterData(item: CharacterRecord) { frEpithets: toJsonArray(item.frEpithets), status: toNullable(item.status), arcId: toNullable(item.arcId), - url: toNullable(item.url) + url: toNullable(item.url), + frUrl: toNullable(item.frUrl) }; } diff --git a/scripts/scrape-onepiece.ts b/scripts/scrape-onepiece.ts index 4c63bb5..7b1e4b6 100644 --- a/scripts/scrape-onepiece.ts +++ b/scripts/scrape-onepiece.ts @@ -276,13 +276,12 @@ async function saveArcsToCSV(arcs: Arc[]): Promise { } /** - * Fetch all cannon characters from One Piece fandom using API + * Fetch all cannon characters from One Piece fandom, including their full data. */ -async function fetchAllCharactersUrl(): Promise { +async function fetchAllCharacters(arcsList: Arc[]): Promise { try { - const apiUrl = `${FANDOM_API_BASE}List_of_Canon_Characters`; console.log('Fetching character list via API...'); - const response = await fetchWithRetry(apiUrl); + const response = await fetchWithRetry(`${FANDOM_API_BASE}List_of_Canon_Characters`); const jsonData = await response.json(); // Extract HTML from API response @@ -292,7 +291,7 @@ async function fetchAllCharactersUrl(): Promise { } const $ = cheerio.load(htmlContent); - const characters: CharacterListItem[] = []; + const characterList: CharacterListItem[] = []; $('table.fandom-table tbody tr').each((index, element) => { if (index === 0) return; // Skip header row let charUrl = $(element).find('td:nth-child(2) a').attr('href'); @@ -304,27 +303,99 @@ async function fetchAllCharactersUrl(): Promise { charChapter = charChapter.replace(/\D/g, ''); // If charChapter is empty, skip the character as it means they don't have a proper page and are just mentioned in the list - if (!charChapter) { - return; - } - - if (parseInt(charChapter, 10) === 0) { + if (!charChapter || parseInt(charChapter, 10) === 0) { return; } if (charUrl) { charUrl = charUrl.replace('/wiki/', ''); - characters.push({ + characterList.push({ name: charName, url: charUrl, chapter: parseInt(charChapter, 10) }); } }); - console.log(`Found ${characters.length} characters.`); + + if (characterList.length === 0) { + console.error('No characters found.'); + return []; + } + console.log(`Found ${characterList.length} characters.`); + + // Fetch the french character list to get the picture URLs + console.log('Fetching French character list via API...'); + const frResponse = await fetchWithRetry(`${FR_FANDOM_API_BASE}Liste_des_Personnages_Canon`); + const frJsonData = await frResponse.json(); + + // Create a map of character name to picture URL from the French list + const frHtmlContent = frJsonData.parse?.text?.['*']; + const fr$ = cheerio.load(frHtmlContent); + const frCharacterPictureMap: Record = {}; + fr$('table.wikitable tbody tr').each((index, element) => { + if (index === 0) return; // Skip header row + const charName = fr$(element).find('td:nth-child(2) a').text().trim(); + const pictureUrl = fr$(element).find('td:nth-child(1) img').attr('data-src') || fr$(element).find('td:nth-child(1) img').attr('src') || null; + if (charName && pictureUrl) { + frCharacterPictureMap[charName] = pictureUrl; + } + }); + + const characters: Character[] = []; + let failedCharacters: CharacterListItem[] = [...characterList]; + + while (failedCharacters.length > 0) { + const nextFailedCharacters: CharacterListItem[] = []; + console.log(`\nFetching ${failedCharacters.length} characters...`); + + for (let i = 0; i < failedCharacters.length; i += FETCH_CONCURRENCY) { + const batch = failedCharacters.slice(i, i + FETCH_CONCURRENCY); + const batchResults = await Promise.all( + batch.map(async (char) => { + const data = await fetchCharacter(char.url, char.name, char.chapter, arcsList, frCharacterPictureMap); + return { char, data }; + }) + ); + + for (const { char, data } of batchResults) { + if (data) { + console.table({ + ID: data.id, + Name: data.name, + Gender: data.gender, + Age: data.age, + Status: data.status, + Epithets: data.epithets.join(', '), + Affiliations: data.affiliations.join(', '), + DevilFruitId: data.devilFruitId, + DevilFruitUrl: data.devilFruitUrl, + HakiObservation: data.hakiObservation ? 'Yes' : 'No', + HakiArmament: data.hakiArmament ? 'Yes' : 'No', + HakiConqueror: data.hakiConqueror ? 'Yes' : 'No', + Height: data.height, + Bounty: data.bounty, + Origin: data.origin, + FirstAppearance: data.firstAppearance, + pictureUrl: data.pictureUrl, + FandomURL: data.url + }); + characters.push(data); + } else { + nextFailedCharacters.push(char); + } + } + } + + failedCharacters = nextFailedCharacters; + if (failedCharacters.length > 0) { + console.log(`⚠️ ${failedCharacters.length} characters failed. Retrying...`); + } + } + + console.log(`\n✓ Scraped ${characters.length} characters\n`); return characters; } catch (error) { - console.error('Error fetching character list:', (error as Error).message); + console.error('Error fetching characters:', (error as Error).message); return []; } } @@ -336,7 +407,8 @@ async function fetchCharacter( characterUrl: string, characterName: string, characterChapter: number, - arcsList: Arc[] + arcsList: Arc[], + frCharacterPictureMap: Record ): Promise { try { console.log(`Fetching: ${characterName}...`); @@ -453,6 +525,8 @@ async function fetchCharacter( frName = name; } + const pictureUrl = frCharacterPictureMap[frName || ''] || null; + return { id: finalCharacterId, name, @@ -475,7 +549,7 @@ async function fetchCharacter( firstAppearance, arcId, status, - pictureUrl: 'Image_Non_Disponible', + pictureUrl, url: characterUrl, frUrl }; @@ -934,72 +1008,17 @@ async function main(): Promise { } // Step 2: Scraping Characters - console.log('=== Step 1: Scraping Characters ===\n'); - const characterList = await fetchAllCharactersUrl(); + console.log('=== Step 2: Scraping Characters ===\n'); + const characters = await fetchAllCharacters(arcsList); - if (characterList.length === 0) { + if (characters.length === 0) { console.error('No characters found. Exiting.'); return; } - const characters: Character[] = []; - const devilFruitUrls = new Set(); - let failedCharacters: CharacterListItem[] = [...characterList]; - - while (failedCharacters.length > 0) { - const nextFailedCharacters: CharacterListItem[] = []; - console.log(`\nFetching ${failedCharacters.length} characters...`); - - for (let i = 0; i < failedCharacters.length; i += FETCH_CONCURRENCY) { - const batch = failedCharacters.slice(i, i + FETCH_CONCURRENCY); - const batchResults = await Promise.all( - batch.map(async (char) => { - const data = await fetchCharacter(char.url, char.name, char.chapter, arcsList); - return { char, data }; - }) - ); - - for (const { char, data } of batchResults) { - if (data) { - console.table({ - ID: data.id, - Name: data.name, - Gender: data.gender, - Age: data.age, - Status: data.status, - Epithets: data.epithets.join(', '), - Affiliations: data.affiliations.join(', '), - DevilFruitId: data.devilFruitId, - DevilFruitUrl: data.devilFruitUrl, - HakiObservation: data.hakiObservation ? 'Yes' : 'No', - HakiArmament: data.hakiArmament ? 'Yes' : 'No', - HakiConqueror: data.hakiConqueror ? 'Yes' : 'No', - Height: data.height, - Bounty: data.bounty, - Origin: data.origin, - FirstAppearance: data.firstAppearance, - pictureUrl: data.pictureUrl, - FandomURL: data.url - }); - - if (data.devilFruitUrl) { - devilFruitUrls.add(data.devilFruitUrl); - } - - characters.push(data); - } else { - nextFailedCharacters.push(char); - } - } - } - - failedCharacters = nextFailedCharacters; - if (failedCharacters.length > 0) { - console.log(`⚠️ ${failedCharacters.length} characters failed. Retrying...`); - } - } - - console.log(`\n✓ Scraped ${characters.length} characters\n`); + const devilFruitUrls = new Set( + characters.filter((c) => c.devilFruitUrl).map((c) => c.devilFruitUrl!) + ); console.log(`✓ Found ${devilFruitUrls.size} unique devil fruits\n`); // Step 3: Scraping Devil Fruits diff --git a/src/routes/(admin)/admin/+layout.svelte b/src/routes/(admin)/admin/+layout.svelte index 6ba6ae6..49c5ade 100644 --- a/src/routes/(admin)/admin/+layout.svelte +++ b/src/routes/(admin)/admin/+layout.svelte @@ -1,6 +1,7 @@