refactor: update package.json and scripts for One Piece scraper

- Changed the scrape script to use tsx for TypeScript execution. - Added new TypeScript script for scraping One Piece data. - Refactored package.json to include dependencies for the new scraper. - Removed unused dependencies and organized devDependencies. feat: implement One Piece data scraping functionality - Added functionality to scrape arcs, characters, and devil fruits from One Piece fandom. - Implemented data extraction methods for character attributes and devil fruit details. - Added JSON and CSV export capabilities for scraped data. fix: update auth configuration to handle missing secret - Modified the auth configuration to use a default secret if BETTER_AUTH_SECRET is not set. fix: improve database client initialization - Updated database client creation to use a local database file if DATABASE_URL is not set. chore: switch Svelte adapter to node - Changed Svelte adapter from auto to node for better server-side rendering support.
2026-03-01 15:17:17 +01:00
parent b8b3f8bddc
commit 56bd6f5545
10 changed files with 1976 additions and 666 deletions
--- a/scripts/scrape-onepiece.ts
+++ b/scripts/scrape-onepiece.ts
@@ -0,0 +1,952 @@
+import * as cheerio from 'cheerio';
+import fs from 'fs';
+import { createObjectCsvWriter } from 'csv-writer';
+
+// Type definitions
+interface Arc {
+    id: string;
+    name: string;
+    startChapter: number;
+    endChapter: number | null;
+    url: string;
+}
+
+interface Character {
+    id: string;
+    name: string;
+    gender: string | null;
+    age: number | null;
+    height: number | null;
+    origin: string | null;
+    devilFruitId: string | null;
+    devilFruitUrl: string | null;
+    affiliations: string[];
+    bounty: number | null;
+    hakiObservation: boolean;
+    hakiArmament: boolean;
+    hakiConqueror: boolean;
+    epithets: string[];
+    firstAppearance: number;
+    status: string | null;
+    pictureUrl: string | null;
+    url: string;
+    arcId?: string;
+}
+
+interface CharacterListItem {
+    name: string;
+    url: string;
+    pictureUrl: string | null;
+    chapter: string;
+}
+
+interface DevilFruitData {
+    devilFruitId: string;
+    devilFruitUrl: string;
+}
+
+interface DevilFruit {
+    id: string;
+    name: string;
+    type: string | null;
+    url: string;
+}
+
+const FANDOM_BASE_URL = 'https://onepiece.fandom.com/fr/wiki';
+const OUTPUT_DIR = './scraped-data';
+const MAX_RETRIES = 0; // Set to 0 to disable retries, can be increased if needed
+const INITIAL_RETRY_DELAY = 1000;
+
+// Store cookies across requests (simulate browser behavior)
+const cookies = new Map<string, string>();
+
+function getCookieHeader(): string {
+    const cookieArray = Array.from(cookies.values()).map(c => c.split(';')[0]);
+    return cookieArray.length > 0 ? cookieArray.join('; ') : '';
+}
+
+function saveCookies(setCookieHeader: string | string[] | null): void {
+    if (setCookieHeader) {
+        const cookiesList = Array.isArray(setCookieHeader) ? setCookieHeader : [setCookieHeader];
+        cookiesList.forEach(cookie => {
+            const [nameValue] = cookie.split(';');
+            const [name] = nameValue.split('=');
+            if (name) cookies.set(name, cookie);
+        });
+    }
+}
+
+// Create output directory
+if (!fs.existsSync(OUTPUT_DIR)) {
+    fs.mkdirSync(OUTPUT_DIR, { recursive: true });
+}
+
+/**
+ * Retry a fetch request with exponential backoff
+ */
+async function fetchWithRetry(url: string, options: RequestInit = {}, retries: number = 0): Promise<Response> {
+    try {
+        const headers: Record<string, string> = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:150.0) Firefox/150.0',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            ...((options.headers as Record<string, string>) || {})
+        };
+        
+        // Add cookies from previous requests
+        const cookieHeader = getCookieHeader();
+        if (cookieHeader) {
+            headers['Cookie'] = cookieHeader;
+        }
+        
+        const response = await fetch(url, {
+            headers,
+            ...options
+        } as any);
+        
+        // Save cookies from response
+        const setCookie = response.headers.get('set-cookie');
+        if (setCookie) {
+            saveCookies(setCookie);
+        }
+
+        // Check if response is OK (status 200-299)
+        if (response.ok) {
+            return response;
+        }
+
+        // If not OK and we have retries left, retry
+        if (retries < MAX_RETRIES) {
+            const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries);
+            console.log(`⚠️  HTTP ${response.status} for ${url}, retrying in ${delay}ms...`);
+            await new Promise(resolve => setTimeout(resolve, delay));
+            return fetchWithRetry(url, options, retries + 1);
+        }
+
+        // If we've exhausted retries, throw error
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+    } catch (error) {
+        // If it's a network error and we have retries left, retry
+        if (retries < MAX_RETRIES) {
+            const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries);
+            console.log(`⚠️  Network error: ${(error as Error).message}, retrying in ${delay}ms...`);
+            await new Promise(resolve => setTimeout(resolve, delay));
+            return fetchWithRetry(url, options, retries + 1);
+        }
+
+        // If we've exhausted retries, throw error
+        throw error;
+    }
+}
+
+
+/**
+ * Normalize string by decoding URI components, punctuation, and replacing spaces with underscores
+ */
+function normalizeId(str: string): string {
+    return decodeURIComponent(str)
+        .normalize('NFD')
+        .replace(/[,:.\(\)]/g, '')
+        .replace(/\s+/g, '_')
+        .toLowerCase();
+}
+
+/**
+ * Fetch all arcs from One Piece fandom
+ */
+async function fetchAllArcs(): Promise<Arc[]> {
+    try {
+        const url = `${FANDOM_BASE_URL}/Chapitres_et_Tomes`;
+        console.log('Fetching arcs list...');
+        const response = await fetchWithRetry(url);
+        const data = await response.text();
+        const $ = cheerio.load(data);
+        const arcs: Arc[] = [];
+
+        // Find all arc links in the table
+        $('table.wikitable td a').each((index, element) => {
+            const text = $(element).text().trim();
+            const href = $(element).attr('href');
+            
+            // Check if it's an arc link (contains "Arc" and chapter info)
+            if (text.includes('Arc') && text.includes('Ch.') && href) {
+                // Extract arc name and chapter range
+                // Example text: "Arc Ville d'Orange(Ch.8 à 21)[T.1 à 3]"
+                console.log(`Processing arc link: ${text} (${href})`);
+                const nameMatch = text.match(/^(.*?Arc.*?)\s*\(Ch\.(\d+)(?:\s*à\s*(?:(\d+)|(?:...)))?\)/);
+                if (nameMatch) {
+                    let arcName = nameMatch[1].trim();
+                    // Remove "Arc " from the name
+                    arcName = arcName.replace(/^Arc\s+/i, '');
+                    
+                    const startChapter = parseInt(nameMatch[2]);
+                    const endChapter = nameMatch[3] ? parseInt(nameMatch[3]) : null;
+
+                    // Generate arc ID by normalizing the url 
+                    let arcId = normalizeId(href.replace('/fr/wiki/', ''));
+                    // Remove "Arc_" from the id
+                    arcId = arcId.replace(/^arc_/i, '');
+
+                    arcs.push({
+                        id: arcId,
+                        name: arcName,
+                        startChapter,
+                        endChapter,
+                        url: href.replace('/fr/wiki/', '')
+                    });
+                }
+            }
+        });
+
+        console.log(`Found ${arcs.length} arcs.`);
+        return arcs;
+    } catch (error) {
+        console.error('Error fetching arcs list:', (error as Error).message);
+        return [];
+    }
+}
+
+/**
+ * Save arcs to JSON
+ */
+async function saveArcsToJSON(arcs: Arc[]): Promise<void> {
+    const filepath = `${OUTPUT_DIR}/arcs.json`;
+    fs.writeFileSync(filepath, JSON.stringify(arcs, null, 2));
+    console.log(`✓ Saved to ${filepath}`);
+}
+
+/**
+ * Save arcs to CSV
+ */
+async function saveArcsToCSV(arcs: Arc[]): Promise<void> {
+    const filepath = `${OUTPUT_DIR}/arcs.csv`;
+    const csvWriter = createObjectCsvWriter({
+        path: filepath,
+        header: [
+            { id: 'id', title: 'ID' },
+            { id: 'name', title: 'Name' },
+            { id: 'startChapter', title: 'Start Chapter' },
+            { id: 'endChapter', title: 'End Chapter' },
+            { id: 'url', title: 'URL' }
+        ],
+    });
+
+    const records = arcs
+        .filter((arc) => arc !== null)
+        .map((arc) => ({
+            id: arc.id || '',
+            name: arc.name || '',
+            startChapter: arc.startChapter || '',
+            endChapter: arc.endChapter || '',
+            url: arc.url || ''
+        }));
+
+    await csvWriter.writeRecords(records);
+    console.log(`✓ Saved to ${filepath}`);
+}
+
+/**
+ * Fetch all cannon characters from One Piece fandom
+ */
+async function fetchAllCharactersUrl(): Promise<CharacterListItem[]> {
+    try {
+        const url = `${FANDOM_BASE_URL}/Liste_des_Personnages_Canon`;
+        console.log('Fetching character list...');
+        const response = await fetchWithRetry(url);
+        const data = await response.text();
+        const $ = cheerio.load(data);
+        const characters: CharacterListItem[] = [];
+        $('table.wikitable tbody tr').each((index, element) => {
+            if (index === 0) return; // Skip header row
+            let charpictureUrl = $(element).find('td:nth-child(1) a img').attr('data-src') || $(element).find('td:nth-child(1) a img').attr('src');
+            let charUrl = $(element).find('td:nth-child(2) a').attr('href');
+            let charName = $(element).find('td:nth-child(2) a').text().trim();
+            let charChapter = $(element).find('td:nth-child(3)').text().trim();
+
+            // Remove parentheses and their content from chapter info (e.g. "1 (flashback)" becomes "1")
+            charChapter = charChapter.replace(/\([^)]*\)/g, '');
+            charChapter = charChapter.replace(/\D/g, '');
+
+            // If charChapter is empty, skip the character as it means they don't have a proper page and are just mentioned in the list
+            if (!charChapter) {
+                return;
+            }
+
+            if (charUrl) {
+                charUrl = charUrl.replace('/fr/wiki/', '');
+                characters.push({
+                    name: charName,
+                    url: charUrl,
+                    pictureUrl: charpictureUrl || null,
+                    chapter: charChapter,
+                });
+            }
+        });
+        console.log(`Found ${characters.length} characters.`);
+        return characters;
+    } catch (error) {
+        console.error('Error fetching character list:', (error as Error).message);
+        return [];
+    }
+}
+
+/**
+ * Fetch character data from fandom using provided URL
+ */
+async function fetchCharacter(
+    characterUrl: string,
+    characterName: string,
+    characterpictureUrl: string | null,
+    characterChapter: string
+): Promise<Character | null> {
+    try {
+        console.log(`Fetching: ${characterName}...`);
+
+        const response = await fetchWithRetry(`${FANDOM_BASE_URL}/${characterUrl}`, {
+            redirect: 'follow'
+        });
+
+        // Use final URL after redirects (canonical character page)
+        let finalCharacterUrl = characterUrl;
+        let finalCharacterId = normalizeId(characterUrl);
+        try {
+            const finalUrl = new URL(response.url);
+            const characterUrlPath = finalUrl.pathname.replace('/fr/wiki/', '');
+            if (characterUrlPath) {
+                finalCharacterUrl = characterUrlPath;
+                finalCharacterId = normalizeId(characterUrlPath);
+            }
+        } catch {
+            // If HTTP is not ok or redirected URL, throw an error to be caught in the outer block
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+            }
+        }
+
+        const data = await response.text();
+
+        const $ = cheerio.load(data);
+
+        // Extract character name
+        const name = $('h1.mw-page-title-main').text().trim() || characterName.replace(/_/g, ' ');
+
+        // Generate character ID from URL + name combination
+        finalCharacterId = normalizeId(finalCharacterUrl + '_' + name);
+
+        // Extract gender from the specific categories link
+        let gender: string | null = null;
+        if ($('.page-header__categories a[title="Catégorie:Personnages Masculins"]').length > 0) {
+            gender = 'Male';
+        } else if ($('.page-header__categories a[title="Catégorie:Personnages Féminins"]').length > 0) {
+            gender = 'Female';
+        }
+
+        // Extract age
+        const age = extractAge($);
+
+        // Extract affiliations
+        const affiliations = extractAffiliations($);
+
+        // Extract epithets
+        const epithets = extractEpithets($);
+
+        // Extract devil fruit
+        const devilFruitData = await extractDevilFruit($);
+        const devilFruitId = devilFruitData?.devilFruitId || null;
+        const devilFruitUrl = devilFruitData?.devilFruitUrl || null;
+
+        // Extract haki
+        const hakiObservation = $('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'observation"]').length > 0;
+        const hakiArmament = $('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'armement"]').length > 0;
+        const hakiConqueror = $('.page-header__categories a[title="Catégorie:Utilisateurs du Haki des rois"]').length > 0;
+
+        // Extract bounty
+        const bounty = extractBounty($);
+
+        // Extract height
+        const height = extractHeight($);
+
+        // Use chapter from character list, cast to int 
+        let firstAppearance = parseInt(characterChapter);
+
+        // Extract origin
+        const origin = extractOrigin($);
+
+        // Extract status
+        const status = extractStatus($);
+
+        // Extract image URL and clean it
+        let pictureUrl = characterpictureUrl;
+        if (pictureUrl && pictureUrl.includes('Image_Non_Disponible')) {
+            pictureUrl = null;
+        }
+
+        return {
+            id: finalCharacterId,
+            name,
+            gender,
+            age,
+            height,
+            origin,
+            devilFruitId,
+            devilFruitUrl,
+            affiliations,
+            bounty,
+            hakiObservation,
+            hakiArmament,
+            hakiConqueror,
+            epithets,
+            firstAppearance,
+            status,
+            pictureUrl,
+            url: finalCharacterUrl
+        };
+    } catch (error) {
+        console.error(`Error fetching ${characterName}:`, (error as Error).message);
+        return null;
+    }
+}
+
+
+/**
+ * Extract age from infobox
+ */
+function extractAge($: cheerio.CheerioAPI): number | null {
+    const div = $('[data-source="âge"] .pi-data-value');
+    if (div.length === 0) return null;
+
+    let text = div.html();
+    if (!text) return null;
+
+    // Remove all sup blocks (citations)
+    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
+
+    // Get the last element and extract only digits
+    const parts = text.split('<br');
+    const lastPart = parts[parts.length - 1];
+    let cleanText = lastPart.replace(/<[^>]*>/g, '').trim();
+    
+    // Remove content with parentheses
+    cleanText = cleanText.replace(/\([^)]*\)/g, '');
+    
+    const digitsOnly = cleanText.replace(/\D/g, '');
+    return parseInt(digitsOnly) || null;
+}
+
+/**
+ * Extract affiliations from infobox
+ */
+function extractAffiliations($: cheerio.CheerioAPI): string[] {
+    const div = $('[data-source="affiliation"] .pi-data-value');
+    if (div.length === 0) return [];
+
+    const cleanedDiv = div.clone();
+    cleanedDiv.find('sup').remove();
+
+    let text = cleanedDiv.html();
+    if (!text) return [];
+
+    // Extract all link values
+    const linkValues = cleanedDiv.find('a').map((i, el) => $(el).text().trim()).get();
+    if (linkValues.length > 0) {
+        return linkValues;
+    }
+
+    // Fallback to parsing text
+    const cleanText = text.replace(/<[^>]*>/g, '').trim();
+    const parts = cleanText.split(/\s*\n\s*|\s*;\s*|\s*,\s*/).filter(Boolean);
+    return parts.length > 0 ? parts : [];
+}
+
+/**
+ * Extract epithets from infobox
+ * Epithets are always between double quotes
+ */
+function extractEpithets($: cheerio.CheerioAPI): string[] {
+    const div = $('[data-source="épithète"] .pi-data-value');
+    if (div.length === 0) return [];
+
+    const cleanedDiv = div.clone();
+    cleanedDiv.find('sup').remove();
+
+    let text = cleanedDiv.text();
+    if (!text) return [];
+
+    // Extract all text between double quotes (both straight and curly quotes)
+    const matches = text.match(/["«"]([^"»"]+)["»"]/g);
+    if (!matches) return [];
+
+    // Remove the quotes and trim
+    const epithets = matches.map(match => 
+        match.replace(/^["«"]|["»"]$/g, '').trim()
+    ).filter(Boolean);
+
+    return epithets;
+}
+
+/**
+ * Extract devil fruit from infobox
+ * Returns both normalized ID and URL
+ */
+async function extractDevilFruit($: cheerio.CheerioAPI): Promise<DevilFruitData | null> {
+    const link = $('[data-source="dfnom"] .pi-data-value a').first();
+    if (link.length === 0) return null;
+
+    const href = link.attr('href');
+    if (!href || !href.startsWith('/fr/wiki/')) return null;
+
+    const cleanUrl = href.replace('/fr/wiki/', '');
+    
+    try {
+        // Fetch the page to follow redirects
+        const response = await fetchWithRetry(`${FANDOM_BASE_URL}/${cleanUrl}`, {
+            redirect: 'follow' // Explicitly follow redirects
+        });
+        
+        // Use the final URL after redirects
+        const finalUrl = new URL(response.url);
+        const pathname = finalUrl.pathname;
+        const finalPath = pathname.replace('/fr/wiki/', '');
+        
+        if (finalPath) {
+            return {
+                devilFruitId: normalizeId(finalPath),
+                devilFruitUrl: finalPath
+            };
+        }
+    } catch (error) {
+        console.error(`Error fetching devil fruit page: ${(error as Error).message}`);
+    }
+    
+    // Fallback to the original href
+    return {
+        devilFruitId: normalizeId(cleanUrl),
+        devilFruitUrl: cleanUrl
+    };
+}
+
+/**
+ * Extract bounty from infobox
+ */
+function extractBounty($: cheerio.CheerioAPI): number | null {
+    const div = $('[data-source="prime"] .pi-data-value');
+    if (div.length === 0) return 0;
+
+    let text = div.html();
+    if (!text) return 0;
+
+    // Remove all sup blocks (citations)
+    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
+
+    // Extract the first value before any <br> tag
+    const firstValue = text.split('<br')[0].trim();
+    let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
+    
+    // Check if cleanText contains digits
+    if (!/\d/.test(cleanText)) {
+        // If no digits, try second value after <br>
+        const secondValue = text.split('<br>')[1];
+        if (secondValue) {
+            cleanText = secondValue.replace(/<[^>]*>/g, '').trim();
+        }
+    }
+
+    // Remove all non-digits
+    cleanText = cleanText.replace(/\D/g, '');
+
+    return cleanText ? parseInt(cleanText) : 0;
+}
+
+/**
+ * Extract height from infobox
+ */
+function extractHeight($: cheerio.CheerioAPI): number | null {
+    const div = $('[data-source="taille"] .pi-data-value');
+    if (div.length === 0) return null;
+
+    let text = div.html();
+    if (!text) return null;
+
+    // Remove all sup blocks (citations)
+    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
+
+    // Check if there's a <p> tag - if yes, use content from <p>
+    let content;
+    const pMatch = text.match(/<p[^>]*>(.*?)<\/p>/i);
+    if (pMatch) {
+        // Extract content from the <p> tag
+        content = pMatch[1];
+    } else {
+        // Use the last value method (after any <br> tag)
+        content = text.split('<br>').pop();
+    }
+    
+    let cleanText = (content || '').replace(/<[^>]*>/g, '').trim();
+    
+    // Remove content with parentheses
+    cleanText = cleanText.replace(/\([^)]*\)/g, '');
+    
+    // Normalize units for meters or centimeters
+    const normalized = cleanText.toLowerCase().replace(/\s/g, '');
+    if (normalized.includes('cm')) {
+        const digitsOnly = normalized.replace(/\D/g, '');
+        return parseFloat(digitsOnly) || null;
+    }
+
+    if (normalized.includes('m')) {
+        const parts = normalized.split('m').filter(Boolean);
+        return parts.length > 0 ? parseFloat(parts.join('.')) : null;
+    }
+    
+    return normalized.length > 0 ? parseFloat(normalized.replace(/\D/g, '')) : null;
+}
+
+/**
+ * Extract origin from infobox
+ */
+function extractOrigin($: cheerio.CheerioAPI): string | null {
+    const div = $('[data-source="origine"] .pi-data-value');
+    if (div.length === 0) return null;
+
+    let text = div.html();
+    if (!text) return null;
+
+    // Remove all sup blocks (citations)
+    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
+
+    // Extract the first value before any <br> tag
+    const firstValue = text.split('<br')[0].trim();
+    let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
+    
+    // Remove content with parentheses
+    cleanText = cleanText.replace(/\([^)]*\)/g, '').trim();
+    
+    return cleanText || null;
+}
+
+/**
+ * Extract status from infobox
+ */
+function extractStatus($: cheerio.CheerioAPI): string | null {
+    const div = $('[data-source="statut"] .pi-data-value');
+    if (div.length === 0) return null;
+
+    const statusText = div.text().trim().toLowerCase();
+    
+    if (statusText.includes('vivant')) {
+        return 'Alive';
+    } else if (statusText.includes('décédé')) {
+        return 'Dead';
+    }
+    
+    return null;
+}
+
+
+/**
+ * Save data to JSON
+ */
+async function saveToJSON(characters: Character[]): Promise<void> {
+    const filepath = `${OUTPUT_DIR}/characters.json`;
+    fs.writeFileSync(filepath, JSON.stringify(characters, null, 2));
+    console.log(`✓ Saved to ${filepath}`);
+}
+
+/**
+ * Save data to CSV
+ */
+async function saveToCSV(characters: Character[]): Promise<void> {
+    const filepath = `${OUTPUT_DIR}/characters.csv`;
+    const csvWriter = createObjectCsvWriter({
+        path: filepath,
+        header: [
+            { id: 'id', title: 'ID' },
+            { id: 'name', title: 'Name' },
+            { id: 'gender', title: 'Gender' },
+            { id: 'age', title: 'Age' },
+            { id: 'height', title: 'Height' },
+            { id: 'origin', title: 'Origin' },
+            { id: 'status', title: 'Status' },
+            { id: 'epithets', title: 'Epithets' },
+            { id: 'devilFruitId', title: 'Devil Fruit ID' },
+            { id: 'affiliations', title: 'Affiliations' },
+            { id: 'bounty', title: 'Bounty' },
+            { id: 'hakiObservation', title: 'Haki Observation' },
+            { id: 'hakiArmament', title: 'Haki Armament' },
+            { id: 'hakiConqueror', title: 'Haki Conqueror' },
+            { id: 'firstAppearance', title: 'First Appearance' },
+            { id: 'arcId', title: 'Arc ID' },
+            { id: 'pictureUrl', title: 'Image URL' },
+            { id: 'url', title: 'Fandom URL' }
+        ],
+    });
+
+    const records = characters
+        .filter((c) => c !== null)
+        .map((c) => ({
+            id: c.id || '',
+            name: c.name || '',
+            gender: c.gender || '',
+            age: c.age || '',
+            height: c.height || '',
+            origin: c.origin || '',
+            status: c.status || '',
+            epithets: Array.isArray(c.epithets) ? c.epithets.join(', ') : (c.epithets || ''),
+            devilFruitId: c.devilFruitId || '',
+            affiliations: Array.isArray(c.affiliations) ? c.affiliations.join(', ') : (c.affiliations || ''),
+            bounty: c.bounty ?? 0,
+            hakiObservation: c.hakiObservation ? 1 : 0,
+            hakiArmament: c.hakiArmament ? 1 : 0,
+            hakiConqueror: c.hakiConqueror ? 1 : 0,
+            firstAppearance: c.firstAppearance || '',
+            arcId: c.arcId || '',
+            pictureUrl: c.pictureUrl || '',
+            url: c.url || ''
+        }));
+
+    await csvWriter.writeRecords(records);
+    console.log(`✓ Saved to ${filepath}`);
+}
+
+/**
+ * Fetch devil fruit data from fandom using provided URL
+ */
+async function fetchDevilFruit(devilFruitUrl: string, devilFruitId: string): Promise<DevilFruit | null> {
+    try {
+        console.log(`Fetching devil fruit: ${devilFruitId}...`);
+
+        const response = await fetchWithRetry(`${FANDOM_BASE_URL}/${devilFruitUrl}`);
+        const data = await response.text();
+        const $ = cheerio.load(data);
+
+        const name = $('span.mw-page-title-main').text().trim();
+
+        // Extract type from label in infobox
+        let type: string | null = null;
+        const typeDiv = $('[data-source="type"] .pi-data-value');
+        if (typeDiv.length > 0) {
+            const typeText = typeDiv.text().trim().toLowerCase();
+            if (typeText.includes('zoan')) {
+                type = 'Zoan';
+            } else if (typeText.includes('paramecia')) {
+                type = 'Paramecia';
+            } else if (typeText.includes('logia')) {
+                type = 'Logia';
+            }
+        }
+
+        return {
+            id: devilFruitId,
+            name,
+            type,
+            url: devilFruitUrl
+        };
+    } catch (error) {
+        console.error(`Error fetching devil fruit ${devilFruitUrl}:`, (error as Error).message);
+        return null;
+    }
+}
+
+/**
+ * Save devil fruits to JSON
+ */
+async function saveDevilFruitsToJSON(devilFruits: DevilFruit[]): Promise<void> {
+    const filepath = `${OUTPUT_DIR}/devil-fruits.json`;
+    fs.writeFileSync(filepath, JSON.stringify(devilFruits, null, 2));
+    console.log(`✓ Saved to ${filepath}`);
+}
+
+/**
+ * Save devil fruits to CSV
+ */
+async function saveDevilFruitsToCSV(devilFruits: DevilFruit[]): Promise<void> {
+    const filepath = `${OUTPUT_DIR}/devil-fruits.csv`;
+    const csvWriter = createObjectCsvWriter({
+        path: filepath,
+        header: [
+            { id: 'id', title: 'ID' },
+            { id: 'name', title: 'Name' },
+            { id: 'type', title: 'Type' },
+            { id: 'url', title: 'URL' }
+        ],
+    });
+
+    const records = devilFruits
+        .filter((df) => df !== null)
+        .map((df) => ({
+            id: df.id || '',
+            name: df.name || '',
+            type: df.type || '',
+            url: df.url || ''
+        }));
+
+    await csvWriter.writeRecords(records);
+    console.log(`✓ Saved to ${filepath}`);
+}
+
+/**
+ * Main execution
+ */
+async function main(): Promise<void> {
+    const format = process.argv[2] || 'all'; // json, csv, or all
+
+    console.log(`\nOne Piece Scraper - Mode: ${format}\n`);
+
+    // Step 1: Scraping Arcs
+    console.log('=== Step 1: Scraping Arcs ===\n');
+    const arcsList = await fetchAllArcs();
+    
+    if (arcsList.length > 0) {
+        // Display arcs in table format
+        arcsList.forEach((arc) => {
+            console.table({
+                ID: arc.id,
+                Name: arc.name,
+                StartChapter: arc.startChapter,
+                EndChapter: arc.endChapter || 'Ongoing',
+                URL: arc.url
+            });
+        });
+
+        console.log(`\n✓ Found ${arcsList.length} arcs\n`);
+
+        if (format === 'json' || format === 'all') {
+            await saveArcsToJSON(arcsList);
+        }
+        if (format === 'csv' || format === 'all') {
+            await saveArcsToCSV(arcsList);
+        }
+    } else {
+        console.warn('No arcs found, continuing...\n');
+    }
+
+    // Step 2: Scraping Characters
+    console.log('=== Step 1: Scraping Characters ===\n');
+    const characterList = await fetchAllCharactersUrl();
+    
+    if (characterList.length === 0) {
+        console.error('No characters found. Exiting.');
+        return;
+    }
+
+    const characters: Character[] = [];
+    const devilFruitUrls = new Set<string>();
+    let failedCharacters: CharacterListItem[] = [...characterList];
+
+    while (failedCharacters.length > 0) {
+        const nextFailedCharacters: CharacterListItem[] = [];
+        console.log(`\nFetching ${failedCharacters.length} characters...`);
+
+        for (let i = 0; i < failedCharacters.length; i++) {
+            const char = failedCharacters[i];
+            const data = await fetchCharacter(char.url, char.name, char.pictureUrl, char.chapter);
+            
+            if (data) {
+                console.table({
+                    ID: data.id,
+                    Name: data.name,
+                    Gender: data.gender,
+                    Age: data.age,
+                    Status: data.status,
+                    Epithets: data.epithets.join(', '),
+                    Affiliations: data.affiliations.join(', '),
+                    DevilFruitId: data.devilFruitId,
+                    DevilFruitUrl: data.devilFruitUrl,
+                    HakiObservation: data.hakiObservation ? 'Yes' : 'No',
+                    HakiArmament: data.hakiArmament ? 'Yes' : 'No',
+                    HakiConqueror: data.hakiConqueror ? 'Yes' : 'No',
+                    Height: data.height,
+                    Bounty: data.bounty,
+                    Origin: data.origin,
+                    FirstAppearance: data.firstAppearance,
+                    pictureUrl: data.pictureUrl,
+                    FandomURL: data.url
+                });
+
+                // Collect devil fruit URLs
+                if (data.devilFruitUrl) {
+                    devilFruitUrls.add(data.devilFruitUrl);
+                }
+
+                // Add arc IDs to character data
+                if (data.firstAppearance) {
+                    const arc = arcsList.find(a => a.startChapter <= data.firstAppearance && (a.endChapter === null || a.endChapter >= data.firstAppearance));
+                    if (arc) {
+                        data.arcId = arc.id;
+                    }
+                }
+
+                characters.push(data);
+            } else {
+                // Add to retry list and wait before next character
+                nextFailedCharacters.push(char);
+                await new Promise(resolve => setTimeout(resolve, 1000));
+            }
+        }
+
+        failedCharacters = nextFailedCharacters;
+        if (failedCharacters.length > 0) {
+            console.log(`⚠️  ${failedCharacters.length} characters failed. Retrying...`);
+        }
+    }
+
+    console.log(`\n✓ Scraped ${characters.length} characters\n`);
+    console.log(`✓ Found ${devilFruitUrls.size} unique devil fruits\n`);
+
+    // Step 3: Scraping Devil Fruits
+    console.log('=== Step 2: Scraping Devil Fruits ===\n');
+    
+    if (devilFruitUrls.size === 0) {
+        console.warn('No devil fruits found from characters, skipping...\n');
+    } else {
+        const devilFruits: DevilFruit[] = [];
+        const devilFruitUrlArray = Array.from(devilFruitUrls);
+
+        for (let i = 0; i < devilFruitUrlArray.length; i++) {
+            const url = devilFruitUrlArray[i];
+            const data = await fetchDevilFruit(url, normalizeId(url));
+            
+            if (data) {
+                console.table({
+                    ID: data.id,
+                    Name: data.name,
+                    Type: data.type,
+                    URL: data.url
+                });
+
+                devilFruits.push(data);
+            }
+        }
+
+        console.log(`\n✓ Scraped ${devilFruits.length} devil fruits\n`);
+
+        if (format === 'json' || format === 'all') {
+            await saveDevilFruitsToJSON(devilFruits);
+        }
+        if (format === 'csv' || format === 'all') {
+            await saveDevilFruitsToCSV(devilFruits);
+        }
+
+        // Update characters with normalized devil fruit IDs
+        const devilFruitMap = new Map<string, string>(devilFruits.map(df => [df.id, df.id]));
+        characters.forEach(char => {
+            if (char.devilFruitUrl) {
+                const normalizedId = normalizeId(char.devilFruitUrl);
+                char.devilFruitId = devilFruitMap.get(normalizedId) || normalizedId;
+            }
+        });
+    }
+
+    // Save characters after devil fruit IDs are updated
+    if (format === 'json' || format === 'all') {
+        await saveToJSON(characters);
+    }
+    if (format === 'csv' || format === 'all') {
+        await saveToCSV(characters);
+    }
+
+    console.log('\n✓ Done!\n');
+}
+
+main().catch(console.error);