import * as cheerio from 'cheerio'; import fs from 'fs'; import { createObjectCsvWriter } from 'csv-writer'; // Type definitions interface Arc { id: string; name: string; startChapter: number; endChapter: number | null; url: string; } interface Character { id: string; name: string; gender: string | null; age: number | null; height: number | null; origin: string | null; devilFruitId: string | null; devilFruitUrl: string | null; affiliations: string[]; bounty: number | null; hakiObservation: boolean; hakiArmament: boolean; hakiConqueror: boolean; epithets: string[]; firstAppearance: number; status: string | null; pictureUrl: string | null; url: string; arcId?: string; } interface CharacterListItem { name: string; url: string; pictureUrl: string | null; chapter: string; } interface DevilFruitData { devilFruitId: string; devilFruitUrl: string; } interface DevilFruit { id: string; name: string; type: string | null; url: string; } const FANDOM_API_BASE = 'https://onepiece.fandom.com/fr/api.php?action=parse&format=json&page='; const OUTPUT_DIR = './scraped-data'; const MAX_RETRIES = 0; // Set to 0 to disable retries, can be increased if needed const INITIAL_RETRY_DELAY = 1000; const CHARACTER_FETCH_CONCURRENCY = 50; // Store cookies across requests (simulate browser behavior) const cookies = new Map(); function getCookieHeader(): string { const cookieArray = Array.from(cookies.values()).map(c => c.split(';')[0]); return cookieArray.length > 0 ? cookieArray.join('; ') : ''; } function saveCookies(setCookieHeader: string | string[] | null): void { if (setCookieHeader) { const cookiesList = Array.isArray(setCookieHeader) ? setCookieHeader : [setCookieHeader]; cookiesList.forEach(cookie => { const [nameValue] = cookie.split(';'); const [name] = nameValue.split('='); if (name) cookies.set(name, cookie); }); } } // Create output directory if (!fs.existsSync(OUTPUT_DIR)) { fs.mkdirSync(OUTPUT_DIR, { recursive: true }); } /** * Retry a fetch request with exponential backoff */ async function fetchWithRetry(url: string, options: RequestInit = {}, retries: number = 0): Promise { try { const headers: Record = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:150.0) Firefox/150.0', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', ...((options.headers as Record) || {}) }; // Add cookies from previous requests const cookieHeader = getCookieHeader(); if (cookieHeader) { headers['Cookie'] = cookieHeader; } const response = await fetch(url, { headers, ...options } as any); // Save cookies from response const setCookie = response.headers.get('set-cookie'); if (setCookie) { saveCookies(setCookie); } // Check if response is OK (status 200-299) if (response.ok) { return response; } // If not OK and we have retries left, retry if (retries < MAX_RETRIES) { const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries); console.log(`⚠️ HTTP ${response.status} for ${url}, retrying in ${delay}ms...`); await new Promise(resolve => setTimeout(resolve, delay)); return fetchWithRetry(url, options, retries + 1); } // If we've exhausted retries, throw error throw new Error(`HTTP ${response.status}: ${response.statusText}`); } catch (error) { // If it's a network error and we have retries left, retry if (retries < MAX_RETRIES) { const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries); console.log(`⚠️ Network error: ${(error as Error).message}, retrying in ${delay}ms...`); await new Promise(resolve => setTimeout(resolve, delay)); return fetchWithRetry(url, options, retries + 1); } // If we've exhausted retries, throw error throw error; } } /** * Normalize string by decoding URI components, punctuation, and replacing spaces with underscores */ function normalizeId(str: string): string { return decodeURIComponent(str) .normalize('NFD') .replace(/[,:.\(\)]/g, '') .replace(/\s+/g, '_') .toLowerCase(); } /** * Fetch all arcs from One Piece fandom using API */ async function fetchAllArcs(): Promise { try { const apiUrl = `${FANDOM_API_BASE}Chapitres_et_Tomes`; console.log('Fetching arcs list via API...'); const response = await fetchWithRetry(apiUrl); const jsonData = await response.json() as any; // Extract HTML from API response const htmlContent = jsonData.parse?.text?.['*']; if (!htmlContent) { throw new Error('Unable to extract HTML content from API response'); } const $ = cheerio.load(htmlContent); const arcs: Arc[] = []; // Find all arc links in the table $('table.wikitable td a').each((index, element) => { const text = $(element).text().trim(); const href = $(element).attr('href'); // Check if it's an arc link (contains "Arc" and chapter info) if (text.includes('Arc') && text.includes('Ch.') && href) { // Extract arc name and chapter range // Example text: "Arc Ville d'Orange(Ch.8 à 21)[T.1 à 3]" console.log(`Processing arc link: ${text} (${href})`); const nameMatch = text.match(/^(.*?Arc.*?)\s*\(Ch\.(\d+)(?:\s*à\s*(?:(\d+)|(?:...)))?\)/); if (nameMatch) { let arcName = nameMatch[1].trim(); // Remove "Arc " from the name arcName = arcName.replace(/^Arc\s+/i, ''); const startChapter = parseInt(nameMatch[2]); const endChapter = nameMatch[3] ? parseInt(nameMatch[3]) : null; // Generate arc ID by normalizing the url let arcId = normalizeId(href.replace('/fr/wiki/', '')); // Remove "Arc_" from the id arcId = arcId.replace(/^arc_/i, ''); arcs.push({ id: arcId, name: arcName, startChapter, endChapter, url: href.replace('/fr/wiki/', '') }); } } }); console.log(`Found ${arcs.length} arcs.`); return arcs; } catch (error) { console.error('Error fetching arcs list:', (error as Error).message); return []; } } /** * Save arcs to JSON */ async function saveArcsToJSON(arcs: Arc[]): Promise { const filepath = `${OUTPUT_DIR}/arcs.json`; fs.writeFileSync(filepath, JSON.stringify(arcs, null, 2)); console.log(`✓ Saved to ${filepath}`); } /** * Save arcs to CSV */ async function saveArcsToCSV(arcs: Arc[]): Promise { const filepath = `${OUTPUT_DIR}/arcs.csv`; const csvWriter = createObjectCsvWriter({ path: filepath, header: [ { id: 'id', title: 'ID' }, { id: 'name', title: 'Name' }, { id: 'startChapter', title: 'Start Chapter' }, { id: 'endChapter', title: 'End Chapter' }, { id: 'url', title: 'URL' } ], }); const records = arcs .filter((arc) => arc !== null) .map((arc) => ({ id: arc.id || '', name: arc.name || '', startChapter: arc.startChapter || '', endChapter: arc.endChapter || '', url: arc.url || '' })); await csvWriter.writeRecords(records); console.log(`✓ Saved to ${filepath}`); } /** * Fetch all cannon characters from One Piece fandom using API */ async function fetchAllCharactersUrl(): Promise { try { const apiUrl = `${FANDOM_API_BASE}Liste_des_Personnages_Canon`; console.log('Fetching character list via API...'); const response = await fetchWithRetry(apiUrl); const jsonData = await response.json() as any; // Extract HTML from API response const htmlContent = jsonData.parse?.text?.['*']; if (!htmlContent) { throw new Error('Unable to extract HTML content from API response'); } const $ = cheerio.load(htmlContent); const characters: CharacterListItem[] = []; $('table.wikitable tbody tr').each((index, element) => { if (index === 0) return; // Skip header row let charpictureUrl = $(element).find('td:nth-child(1) a img').attr('data-src') || $(element).find('td:nth-child(1) a img').attr('src'); let charUrl = $(element).find('td:nth-child(2) a').attr('href'); let charName = $(element).find('td:nth-child(2) a').text().trim(); let charChapter = $(element).find('td:nth-child(3)').text().trim(); // Remove parentheses and their content from chapter info (e.g. "1 (flashback)" becomes "1") charChapter = charChapter.replace(/\([^)]*\)/g, ''); charChapter = charChapter.replace(/\D/g, ''); // If charChapter is empty, skip the character as it means they don't have a proper page and are just mentioned in the list if (!charChapter) { return; } if (charUrl) { charUrl = charUrl.replace('/fr/wiki/', ''); characters.push({ name: charName, url: charUrl, pictureUrl: charpictureUrl || null, chapter: charChapter, }); } }); console.log(`Found ${characters.length} characters.`); return characters; } catch (error) { console.error('Error fetching character list:', (error as Error).message); return []; } } /** * Fetch character data from fandom using provided URL */ async function fetchCharacter( characterUrl: string, characterName: string, characterpictureUrl: string | null, characterChapter: string ): Promise { try { console.log(`Fetching: ${characterName}...`); // Use API to fetch character page const apiUrl = `${FANDOM_API_BASE}${characterUrl}`; let response = await fetchWithRetry(apiUrl); let jsonData = await response.json() as any; // Use final page name from API (if parse.limks contains one element, it means the original page was a redirect, so we use the the element 0 as the final URL, otherwise we use the original URL) let finalCharacterUrl = characterUrl; if (jsonData.parse?.links?.length === 1) { finalCharacterUrl = jsonData.parse.links[0]['*']; // Query the API again with the final URL to get the correct HTML content (in case of redirect) response = await fetchWithRetry(`${FANDOM_API_BASE}${finalCharacterUrl}`); jsonData = await response.json() as any; } const categories = jsonData.parse?.categories || []; // Extract HTML from API response const htmlContent = jsonData.parse?.text?.['*']; if (!htmlContent) { throw new Error('Unable to extract HTML content from API response'); } const $ = cheerio.load(htmlContent); const name = characterName; // Generate character ID from URL + name combination const finalCharacterId = normalizeId(finalCharacterUrl + '_' + name); // Extract gender from JSON categories let gender: string | null = null; for (const cat of categories) { const catName = cat['*'] || ''; if (catName === 'Personnages_Masculins') { gender = 'Male'; break; } else if (catName === 'Personnages_Féminins') { gender = 'Female'; break; } } // Extract age const age = extractAge($); // Extract affiliations const affiliations = extractAffiliations($); // Extract epithets const epithets = extractEpithets($); // Extract devil fruit const devilFruitData = await extractDevilFruit($); const devilFruitId = devilFruitData?.devilFruitId || null; const devilFruitUrl = devilFruitData?.devilFruitUrl || null; // Extract haki from JSON categories let hakiObservation = false; let hakiArmament = false; let hakiConqueror = false; for (const cat of categories) { const catName = cat['*'] || ''; if (catName === 'Utilisateurs_du_Haki_de_l\'observation') { hakiObservation = true; } else if (catName === 'Utilisateurs_du_Haki_de_l\'armement') { hakiArmament = true; } else if (catName === 'Utilisateurs_du_Haki_des_rois') { hakiConqueror = true; } } // Extract bounty const bounty = extractBounty($); // Extract height const height = extractHeight($); // Use chapter from character list, cast to int let firstAppearance = parseInt(characterChapter); // Extract origin const origin = extractOrigin($); // Extract status const status = extractStatus($); // Extract image URL and clean it let pictureUrl = characterpictureUrl; if (pictureUrl && pictureUrl.includes('Image_Non_Disponible')) { pictureUrl = null; } return { id: finalCharacterId, name, gender, age, height, origin, devilFruitId, devilFruitUrl, affiliations, bounty, hakiObservation, hakiArmament, hakiConqueror, epithets, firstAppearance, status, pictureUrl, url: finalCharacterUrl }; } catch (error) { console.error(`Error fetching ${characterName}:`, (error as Error).message); return null; } } /** * Extract age from infobox */ function extractAge($: cheerio.CheerioAPI): number | null { const div = $('[data-source="âge"] .pi-data-value'); if (div.length === 0) return null; let text = div.html(); if (!text) return null; // Remove all sup blocks (citations) text = text.replace(/]*>.*?<\/sup>/gi, ''); // Get the last element and extract only digits const parts = text.split(']*>/g, '').trim(); // Remove content with parentheses cleanText = cleanText.replace(/\([^)]*\)/g, ''); const digitsOnly = cleanText.replace(/\D/g, ''); return parseInt(digitsOnly) || null; } /** * Extract affiliations from infobox */ function extractAffiliations($: cheerio.CheerioAPI): string[] { const div = $('[data-source="affiliation"] .pi-data-value'); if (div.length === 0) return []; const cleanedDiv = div.clone(); cleanedDiv.find('sup').remove(); let text = cleanedDiv.html(); if (!text) return []; // Extract all link values const linkValues = cleanedDiv.find('a').map((i, el) => $(el).text().trim()).get(); if (linkValues.length > 0) { return linkValues; } // Fallback to parsing text const cleanText = text.replace(/<[^>]*>/g, '').trim(); const parts = cleanText.split(/\s*\n\s*|\s*;\s*|\s*,\s*/).filter(Boolean); return parts.length > 0 ? parts : []; } /** * Extract epithets from infobox * Epithets are always between double quotes */ function extractEpithets($: cheerio.CheerioAPI): string[] { const div = $('[data-source="épithète"] .pi-data-value'); if (div.length === 0) return []; const cleanedDiv = div.clone(); cleanedDiv.find('sup').remove(); let text = cleanedDiv.text(); if (!text) return []; // Extract all text between double quotes (both straight and curly quotes) const matches = text.match(/["«"]([^"»"]+)["»"]/g); if (!matches) return []; // Remove the quotes and trim const epithets = matches.map(match => match.replace(/^["«"]|["»"]$/g, '').trim() ).filter(Boolean); return epithets; } /** * Extract devil fruit from infobox * Returns both normalized ID and URL */ async function extractDevilFruit($: cheerio.CheerioAPI): Promise { const link = $('[data-source="dfnom"] .pi-data-value a').first(); if (link.length === 0) return null; const href = link.attr('href'); if (!href || !href.startsWith('/fr/wiki/')) return null; const cleanUrl = href.replace('/fr/wiki/', ''); try { // Fetch the page via API to follow redirects const apiUrl = `${FANDOM_API_BASE}${decodeURIComponent(cleanUrl)}`; const response = await fetchWithRetry(apiUrl); const jsonData = await response.json() as any; // Use final page name from API (if parse.links contains one element, it means the original page was a redirect, so we use the the element 0 as the final URL, otherwise we use the original URL) let finalPath = cleanUrl; if (jsonData.parse?.links?.length === 1) { finalPath = jsonData.parse.links[0]['*']; } if (finalPath) { return { devilFruitId: normalizeId(finalPath), devilFruitUrl: finalPath }; } } catch (error) { console.error(`Error fetching devil fruit page: ${(error as Error).message}`); } // Fallback to the original href return { devilFruitId: normalizeId(cleanUrl), devilFruitUrl: cleanUrl }; } /** * Extract bounty from infobox */ function extractBounty($: cheerio.CheerioAPI): number | null { const div = $('[data-source="prime"] .pi-data-value'); if (div.length === 0) return 0; let text = div.html(); if (!text) return 0; // Remove all sup blocks (citations) text = text.replace(/]*>.*?<\/sup>/gi, ''); // Extract the first value before any
tag const firstValue = text.split(']*>/g, '').trim(); // Check if cleanText contains digits if (!/\d/.test(cleanText)) { // If no digits, try second value after
const secondValue = text.split('
')[1]; if (secondValue) { cleanText = secondValue.replace(/<[^>]*>/g, '').trim(); } } // Remove all non-digits cleanText = cleanText.replace(/\D/g, ''); return cleanText ? parseInt(cleanText) : 0; } /** * Extract height from infobox */ function extractHeight($: cheerio.CheerioAPI): number | null { const div = $('[data-source="taille"] .pi-data-value'); if (div.length === 0) return null; let text = div.html(); if (!text) return null; // Remove all sup blocks (citations) text = text.replace(/]*>.*?<\/sup>/gi, ''); // Check if there's a

tag - if yes, use content from

let content; const pMatch = text.match(/]*>(.*?)<\/p>/i); if (pMatch) { // Extract content from the

tag content = pMatch[1]; } else { // Use the last value method (after any
tag) content = text.split('
').pop(); } let cleanText = (content || '').replace(/<[^>]*>/g, '').trim(); // Remove content with parentheses cleanText = cleanText.replace(/\([^)]*\)/g, ''); // Normalize units for meters or centimeters const normalized = cleanText.toLowerCase().replace(/\s/g, ''); if (normalized.includes('cm')) { const digitsOnly = normalized.replace(/\D/g, ''); const cm = parseFloat(digitsOnly); return cm ? cm / 100 : null; } if (normalized.includes('m')) { const parts = normalized.split('m').filter(Boolean); return parts.length > 0 ? parseFloat(parts.join('.')) : null; } return normalized.length > 0 ? parseFloat(normalized.replace(/\D/g, '')) : null; } /** * Extract origin from infobox */ function extractOrigin($: cheerio.CheerioAPI): string | null { const div = $('[data-source="origine"] .pi-data-value'); if (div.length === 0) return null; let text = div.html(); if (!text) return null; // Remove all sup blocks (citations) text = text.replace(/]*>.*?<\/sup>/gi, ''); // Extract the first value before any
tag const firstValue = text.split(']*>/g, '').trim(); // Remove content with parentheses cleanText = cleanText.replace(/\([^)]*\)/g, '').trim(); return cleanText || null; } /** * Extract status from infobox */ function extractStatus($: cheerio.CheerioAPI): string | null { const div = $('[data-source="statut"] .pi-data-value'); if (div.length === 0) return null; const statusText = div.text().trim().toLowerCase(); if (statusText.includes('vivant')) { return 'Alive'; } else if (statusText.includes('décédé')) { return 'Dead'; } else if (statusText.includes('inconnu')) { return 'Unknown'; } return 'Alive'; } /** * Save data to JSON */ async function saveToJSON(characters: Character[]): Promise { const filepath = `${OUTPUT_DIR}/characters.json`; fs.writeFileSync(filepath, JSON.stringify(characters, null, 2)); console.log(`✓ Saved to ${filepath}`); } /** * Save data to CSV */ async function saveToCSV(characters: Character[]): Promise { const filepath = `${OUTPUT_DIR}/characters.csv`; const csvWriter = createObjectCsvWriter({ path: filepath, header: [ { id: 'id', title: 'ID' }, { id: 'name', title: 'Name' }, { id: 'gender', title: 'Gender' }, { id: 'age', title: 'Age' }, { id: 'height', title: 'Height' }, { id: 'origin', title: 'Origin' }, { id: 'status', title: 'Status' }, { id: 'epithets', title: 'Epithets' }, { id: 'devilFruitId', title: 'Devil Fruit ID' }, { id: 'affiliations', title: 'Affiliations' }, { id: 'bounty', title: 'Bounty' }, { id: 'hakiObservation', title: 'Haki Observation' }, { id: 'hakiArmament', title: 'Haki Armament' }, { id: 'hakiConqueror', title: 'Haki Conqueror' }, { id: 'firstAppearance', title: 'First Appearance' }, { id: 'arcId', title: 'Arc ID' }, { id: 'pictureUrl', title: 'Image URL' }, { id: 'url', title: 'Fandom URL' } ], }); const records = characters .filter((c) => c !== null) .map((c) => ({ id: c.id || '', name: c.name || '', gender: c.gender || '', age: c.age || '', height: c.height || '', origin: c.origin || '', status: c.status || '', epithets: Array.isArray(c.epithets) ? c.epithets.join(', ') : (c.epithets || ''), devilFruitId: c.devilFruitId || '', affiliations: Array.isArray(c.affiliations) ? c.affiliations.join(', ') : (c.affiliations || ''), bounty: c.bounty ?? 0, hakiObservation: c.hakiObservation ? 1 : 0, hakiArmament: c.hakiArmament ? 1 : 0, hakiConqueror: c.hakiConqueror ? 1 : 0, firstAppearance: c.firstAppearance || '', arcId: c.arcId || '', pictureUrl: c.pictureUrl || '', url: c.url || '' })); await csvWriter.writeRecords(records); console.log(`✓ Saved to ${filepath}`); } /** * Fetch devil fruit data from fandom using provided URL */ async function fetchDevilFruit(devilFruitUrl: string, devilFruitId: string): Promise { try { console.log(`Fetching devil fruit: ${devilFruitUrl}...`); // Use API to fetch devil fruit page const apiUrl = `${FANDOM_API_BASE}${devilFruitUrl}`; const response = await fetchWithRetry(apiUrl); const jsonData = await response.json() as any; // Extract HTML from API response const htmlContent = jsonData.parse?.text?.['*']; if (!htmlContent) { throw new Error('Unable to extract HTML content from API response'); } const name = jsonData.parse?.title || devilFruitId.replace(/_/g, ' '); let type: string | null = null; // Determine type based on categories (if categories contain "Paramecia", "Zoan", "Logia" or "Smile") if (jsonData.parse?.categories) { const categories = jsonData.parse.categories .map((cat: any) => String(cat['*'] || '').toLowerCase()); if (categories.some((category: string) => category.includes('paramecia'))) { type = 'Paramecia'; } else if (categories.some((category: string) => category.includes('zoan'))) { type = 'Zoan'; } else if (categories.some((category: string) => category.includes('logia'))) { type = 'Logia'; } else if (categories.some((category: string) => category.includes('smile'))) { type = 'Smile'; } } return { id: devilFruitId, name, type, url: devilFruitUrl }; } catch (error) { console.error(`Error fetching devil fruit ${devilFruitUrl}:`, (error as Error).message); return null; } } /** * Save devil fruits to JSON */ async function saveDevilFruitsToJSON(devilFruits: DevilFruit[]): Promise { const filepath = `${OUTPUT_DIR}/devil-fruits.json`; fs.writeFileSync(filepath, JSON.stringify(devilFruits, null, 2)); console.log(`✓ Saved to ${filepath}`); } /** * Save devil fruits to CSV */ async function saveDevilFruitsToCSV(devilFruits: DevilFruit[]): Promise { const filepath = `${OUTPUT_DIR}/devil-fruits.csv`; const csvWriter = createObjectCsvWriter({ path: filepath, header: [ { id: 'id', title: 'ID' }, { id: 'name', title: 'Name' }, { id: 'type', title: 'Type' }, { id: 'url', title: 'URL' } ], }); const records = devilFruits .filter((df) => df !== null) .map((df) => ({ id: df.id || '', name: df.name || '', type: df.type || '', url: df.url || '' })); await csvWriter.writeRecords(records); console.log(`✓ Saved to ${filepath}`); } /** * Main execution */ async function main(): Promise { const format = process.argv[2] || 'all'; // json, csv, or all console.log(`\nOne Piece Scraper - Mode: ${format}\n`); // Step 1: Scraping Arcs console.log('=== Step 1: Scraping Arcs ===\n'); const arcsList = await fetchAllArcs(); if (arcsList.length > 0) { // Display arcs in table format arcsList.forEach((arc) => { console.table({ ID: arc.id, Name: arc.name, StartChapter: arc.startChapter, EndChapter: arc.endChapter || 'Ongoing', URL: arc.url }); }); console.log(`\n✓ Found ${arcsList.length} arcs\n`); if (format === 'json' || format === 'all') { await saveArcsToJSON(arcsList); } if (format === 'csv' || format === 'all') { await saveArcsToCSV(arcsList); } } else { console.warn('No arcs found, continuing...\n'); } // Step 2: Scraping Characters console.log('=== Step 1: Scraping Characters ===\n'); const characterList = await fetchAllCharactersUrl(); if (characterList.length === 0) { console.error('No characters found. Exiting.'); return; } const characters: Character[] = []; const devilFruitUrls = new Set(); let failedCharacters: CharacterListItem[] = [...characterList]; while (failedCharacters.length > 0) { const nextFailedCharacters: CharacterListItem[] = []; console.log(`\nFetching ${failedCharacters.length} characters...`); for (let i = 0; i < failedCharacters.length; i += CHARACTER_FETCH_CONCURRENCY) { const batch = failedCharacters.slice(i, i + CHARACTER_FETCH_CONCURRENCY); const batchResults = await Promise.all( batch.map(async (char) => { const data = await fetchCharacter(char.url, char.name, char.pictureUrl, char.chapter); return { char, data }; }) ); for (const { char, data } of batchResults) { if (data) { console.table({ ID: data.id, Name: data.name, Gender: data.gender, Age: data.age, Status: data.status, Epithets: data.epithets.join(', '), Affiliations: data.affiliations.join(', '), DevilFruitId: data.devilFruitId, DevilFruitUrl: data.devilFruitUrl, HakiObservation: data.hakiObservation ? 'Yes' : 'No', HakiArmament: data.hakiArmament ? 'Yes' : 'No', HakiConqueror: data.hakiConqueror ? 'Yes' : 'No', Height: data.height, Bounty: data.bounty, Origin: data.origin, FirstAppearance: data.firstAppearance, pictureUrl: data.pictureUrl, FandomURL: data.url }); if (data.devilFruitUrl) { devilFruitUrls.add(data.devilFruitUrl); } if (data.firstAppearance) { const arc = arcsList.find(a => a.startChapter <= data.firstAppearance && (a.endChapter === null || a.endChapter >= data.firstAppearance)); if (arc) { data.arcId = arc.id; } } characters.push(data); } else { nextFailedCharacters.push(char); } } } failedCharacters = nextFailedCharacters; if (failedCharacters.length > 0) { console.log(`⚠️ ${failedCharacters.length} characters failed. Retrying...`); } } console.log(`\n✓ Scraped ${characters.length} characters\n`); console.log(`✓ Found ${devilFruitUrls.size} unique devil fruits\n`); // Step 3: Scraping Devil Fruits console.log('=== Step 2: Scraping Devil Fruits ===\n'); if (devilFruitUrls.size === 0) { console.warn('No devil fruits found from characters, skipping...\n'); } else { const devilFruits: DevilFruit[] = []; const devilFruitUrlArray = Array.from(devilFruitUrls); for (let i = 0; i < devilFruitUrlArray.length; i++) { const url = devilFruitUrlArray[i]; const data = await fetchDevilFruit(url, normalizeId(url)); if (data) { console.table({ ID: data.id, Name: data.name, Type: data.type, URL: data.url }); devilFruits.push(data); } } console.log(`\n✓ Scraped ${devilFruits.length} devil fruits\n`); if (format === 'json' || format === 'all') { await saveDevilFruitsToJSON(devilFruits); } if (format === 'csv' || format === 'all') { await saveDevilFruitsToCSV(devilFruits); } // Update characters with normalized devil fruit IDs const devilFruitMap = new Map(devilFruits.map(df => [df.id, df.id])); characters.forEach(char => { if (char.devilFruitUrl) { const normalizedId = normalizeId(char.devilFruitUrl); char.devilFruitId = devilFruitMap.get(normalizedId) || normalizedId; } }); } // Save characters after devil fruit IDs are updated if (format === 'json' || format === 'all') { await saveToJSON(characters); } if (format === 'csv' || format === 'all') { await saveToCSV(characters); } console.log('\n✓ Done!\n'); } main().catch(console.error);