import * as cheerio from 'cheerio';
import fs from 'fs';
import https from 'https';
import { createObjectCsvWriter } from 'csv-writer';

const FANDOM_BASE_URL = 'https://onepiece.fandom.com/fr/wiki';
const OUTPUT_DIR = './scraped-data';
const MAX_RETRIES = 0; // Set to 0 to disable retries, can be increased if needed
const INITIAL_RETRY_DELAY = 1000;

// Keep same HTTP session like a normal browser - maintain connection pool and allow cookie persistence
const httpsAgent = new https.Agent({
    keepAlive: true,
    keepAliveMsecs: 1000,
    maxFreeSockets: 10,
    maxSockets: 50,
    maxConnections: 50,
    timeout: 30000
});

// Store cookies across requests (simulate browser behavior)
const cookies = new Map();

function getCookieHeader() {
    const cookieArray = Array.from(cookies.values()).map(c => c.split(';')[0]);
    return cookieArray.length > 0 ? cookieArray.join('; ') : '';
}

function saveCookies(setCookieHeader) {
    if (setCookieHeader) {
        const cookiesList = Array.isArray(setCookieHeader) ? setCookieHeader : [setCookieHeader];
        cookiesList.forEach(cookie => {
            const [nameValue] = cookie.split(';');
            const [name] = nameValue.split('=');
            if (name) cookies.set(name, cookie);
        });
    }
}

// Create output directory
if (!fs.existsSync(OUTPUT_DIR)) {
    fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}

/**
 * Retry a fetch request with exponential backoff
 */
async function fetchWithRetry(url, options = {}, retries = 0) {
    try {
        const headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:150.0) Firefox/150.0',
            'Accept-Language': 'en-US,en;q=0.9',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            ...options.headers
        };
        
        // Add cookies from previous requests
        const cookieHeader = getCookieHeader();
        if (cookieHeader) {
            headers['Cookie'] = cookieHeader;
        }
        
        const response = await fetch(url, {
            headers,
            agent: httpsAgent,
            ...options
        });
        
        // Save cookies from response
        const setCookie = response.headers.get('set-cookie');
        if (setCookie) {
            saveCookies(setCookie);
        }

        // Check if response is OK (status 200-299)
        if (response.ok) {
            return response;
        }

        // If not OK and we have retries left, retry
        if (retries < MAX_RETRIES) {
            const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries);
            console.log(`⚠️  HTTP ${response.status} for ${url}, retrying in ${delay}ms...`);
            await new Promise(resolve => setTimeout(resolve, delay));
            return fetchWithRetry(url, options, retries + 1);
        }

        // If we've exhausted retries, throw error
        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
    } catch (error) {
        // If it's a network error and we have retries left, retry
        if (retries < MAX_RETRIES) {
            const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries);
            console.log(`⚠️  Network error: ${error.message}, retrying in ${delay}ms...`);
            await new Promise(resolve => setTimeout(resolve, delay));
            return fetchWithRetry(url, options, retries + 1);
        }

        // If we've exhausted retries, throw error
        throw error;
    }
}


/**
 * Normalize string by decoding URI components, punctuation, and replacing spaces with underscores
 */
function normalizeId(str) {
    return decodeURIComponent(str)
        .normalize('NFD')
        .replace(/[,:.\(\)]/g, '')
        .replace(/\s+/g, '_')
        .toLowerCase();
}

/**
 * Fetch all arcs from One Piece fandom
 */
async function fetchAllArcs() {
    try {
        const url = `${FANDOM_BASE_URL}/Chapitres_et_Tomes`;
        console.log('Fetching arcs list...');
        const response = await fetchWithRetry(url);
        const data = await response.text();
        const $ = cheerio.load(data);
        const arcs = [];

        // Find all arc links in the table
        $('table.wikitable td a').each((index, element) => {
            const text = $(element).text().trim();
            const href = $(element).attr('href');
            
            // Check if it's an arc link (contains "Arc" and chapter info)
            if (text.includes('Arc') && text.includes('Ch.')) {
                // Extract arc name and chapter range
                // Example text: "Arc Ville d'Orange(Ch.8 à 21)[T.1 à 3]"
                console.log(`Processing arc link: ${text} (${href})`);
                const nameMatch = text.match(/^(.*?Arc.*?)\s*\(Ch\.(\d+)(?:\s*à\s*(?:(\d+)|(?:...)))?\)/);
                if (nameMatch) {
                    let arcName = nameMatch[1].trim();
                    // Remove "Arc " from the name
                    arcName = arcName.replace(/^Arc\s+/i, '');
                    
                    const startChapter = parseInt(nameMatch[2]);
                    const endChapter = nameMatch[3] ? parseInt(nameMatch[3]) : null;

                    // Generate arc ID by normalizing the url 
                    let arcId = normalizeId(href.replace('/fr/wiki/', ''));
                    // Remove "Arc_" from the id
                    arcId = arcId.replace(/^arc_/i, '');

                    arcs.push({
                        id: arcId,
                        name: arcName,
                        startChapter,
                        endChapter,
                        url: href.replace('/fr/wiki/', '')
                    });
                }
            }
        });

        console.log(`Found ${arcs.length} arcs.`);
        return arcs;
    } catch (error) {
        console.error('Error fetching arcs list:', error.message);
        return [];
    }
}

/**
 * Save arcs to JSON
 */
async function saveArcsToJSON(arcs) {
    const filepath = `${OUTPUT_DIR}/arcs.json`;
    fs.writeFileSync(filepath, JSON.stringify(arcs, null, 2));
    console.log(`✓ Saved to ${filepath}`);
}

/**
 * Save arcs to CSV
 */
async function saveArcsToCSV(arcs) {
    const filepath = `${OUTPUT_DIR}/arcs.csv`;
    const csvWriter = createObjectCsvWriter({
        path: filepath,
        header: [
            { id: 'id', title: 'ID' },
            { id: 'name', title: 'Name' },
            { id: 'startChapter', title: 'Start Chapter' },
            { id: 'endChapter', title: 'End Chapter' },
            { id: 'url', title: 'URL' }
        ],
    });

    const records = arcs
        .filter((arc) => arc !== null)
        .map((arc) => ({
            id: arc.id || '',
            name: arc.name || '',
            startChapter: arc.startChapter || '',
            endChapter: arc.endChapter || '',
            url: arc.url || ''
        }));

    await csvWriter.writeRecords(records);
    console.log(`✓ Saved to ${filepath}`);
}

/**
 * Fetch all cannon characters from One Piece fandom
 */
async function fetchAllCharactersUrl() {
    try {
        const url = `${FANDOM_BASE_URL}/Liste_des_Personnages_Canon`;
        console.log('Fetching character list...');
        const response = await fetchWithRetry(url);
        const data = await response.text();
        const $ = cheerio.load(data);
        const characters = [];
        $('table.wikitable tbody tr').each((index, element) => {
            if (index === 0) return; // Skip header row
            let charpictureUrl = $(element).find('td:nth-child(1) a img').attr('data-src') || $(element).find('td:nth-child(1) a img').attr('src');
            let charUrl = $(element).find('td:nth-child(2) a').attr('href');
            let charName = $(element).find('td:nth-child(2) a').text().trim();
            let charChapter = $(element).find('td:nth-child(3)').text().trim();

            // Remove parentheses and their content from chapter info (e.g. "1 (flashback)" becomes "1")
            charChapter = charChapter.replace(/\([^)]*\)/g, '');
            charChapter = charChapter.replace(/\D/g, '');

            // If charChapter is empty, skip the character as it means they don't have a proper page and are just mentioned in the list
            if (!charChapter) {
                return;
            }

            if (charUrl) {
                charUrl = charUrl.replace('/fr/wiki/', '');
                characters.push({
                    name: charName,
                    url: charUrl,
                    pictureUrl: charpictureUrl,
                    chapter: charChapter,
                });
            }
        });
        console.log(`Found ${characters.length} characters.`);
        return characters;
    } catch (error) {
        console.error('Error fetching character list:', error.message);
        return [];
    }
}

/**
 * Fetch character data from fandom using provided URL
 */
async function fetchCharacter(characterUrl, characterName, characterpictureUrl, characterChapter) {
    try {
        console.log(`Fetching: ${characterName}...`);

        const response = await fetchWithRetry(`${FANDOM_BASE_URL}/${characterUrl}`, {
            redirect: 'follow'
        });

        // Use final URL after redirects (canonical character page)
        let finalCharacterUrl = characterUrl;
        let finalCharacterId = normalizeId(characterUrl);
        try {
            const finalUrl = new URL(response.url);
            const characterUrl = finalUrl.pathname.replace('/fr/wiki/', '');
            if (characterUrl) {
                finalCharacterUrl = characterUrl;
                finalCharacterId = normalizeId(characterUrl);
            }
        } catch {
            // If HTTP is not ok or redirected URL, throw an error to be caught in the outer block
            if (!response.ok) {
                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
            }
        }

        const data = await response.text();

        const $ = cheerio.load(data);

        // Extract character name
        const name = $('h1.mw-page-title-main').text().trim() || characterName.replace(/_/g, ' ');

        // Generate character ID from URL + name combination
        finalCharacterId = normalizeId(finalCharacterUrl + '_' + name);

        // Extract gender from the specific categories link
        let gender = null;
        if ($('.page-header__categories a[title="Catégorie:Personnages Masculins"]').length > 0) {
            gender = 'Male';
        } else if ($('.page-header__categories a[title="Catégorie:Personnages Féminins"]').length > 0) {
            gender = 'Female';
        }

        // Extract age
        const age = extractAge($);

        // Extract affiliations
        const affiliations = extractAffiliations($);

        // Extract epithets
        const epithets = extractEpithets($);

        // Extract devil fruit
        const devilFruitData = await extractDevilFruit($);
        const devilFruitId = devilFruitData?.devilFruitId || null;
        const devilFruitUrl = devilFruitData?.devilFruitUrl || null;

        // Extract haki
        const hakiObservation = $('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'observation"]').length > 0;
        const hakiArmament = $('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'armement"]').length > 0;
        const hakiConqueror = $('.page-header__categories a[title="Catégorie:Utilisateurs du Haki des rois"]').length > 0;

        // Extract bounty
        const bounty = extractBounty($);

        // Extract height
        const height = extractHeight($);

        // Use chapter from character list, cast to int 
        let firstAppearance = parseInt(characterChapter);

        // Extract origin
        const origin = extractOrigin($);

        // Extract status
        const status = extractStatus($);

        // Extract image URL and clean it
        let pictureUrl = characterpictureUrl;
        if (pictureUrl && pictureUrl.includes('Image_Non_Disponible')) {
            pictureUrl = null;
        }

        return {
            id: finalCharacterId,
            name,
            gender,
            age,
            height,
            origin,
            devilFruitId,
            devilFruitUrl,
            affiliations,
            bounty,
            hakiObservation,
            hakiArmament,
            hakiConqueror,
            epithets,
            firstAppearance,
            status,
            pictureUrl,
            url: finalCharacterUrl
        };
    } catch (error) {
        console.error(`Error fetching ${characterName}:`, error.message);
        return null;
    }
}


/**
 * Extract age from infobox
 */
function extractAge($) {
    const div = $('[data-source="âge"] .pi-data-value');
    if (div.length === 0) return null;

    let text = div.html();
    if (!text) return null;

    // Remove all sup blocks (citations)
    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');

    // Get the last element and extract only digits
    const parts = text.split('<br');
    const lastPart = parts[parts.length - 1];
    let cleanText = lastPart.replace(/<[^>]*>/g, '').trim();
    
    // Remove content with parentheses
    cleanText = cleanText.replace(/\([^)]*\)/g, '');
    
    const digitsOnly = cleanText.replace(/\D/g, '');
    return parseInt(digitsOnly) || null;
}

/**
 * Extract affiliations from infobox
 */
function extractAffiliations($) {
    const div = $('[data-source="affiliation"] .pi-data-value');
    if (div.length === 0) return [];

    const cleanedDiv = div.clone();
    cleanedDiv.find('sup').remove();

    let text = cleanedDiv.html();
    if (!text) return [];

    // Extract all link values
    const linkValues = cleanedDiv.find('a').map((i, el) => $(el).text().trim()).get();
    if (linkValues.length > 0) {
        return linkValues;
    }

    // Fallback to parsing text
    const cleanText = text.replace(/<[^>]*>/g, '').trim();
    const parts = cleanText.split(/\s*\n\s*|\s*;\s*|\s*,\s*/).filter(Boolean);
    return parts.length > 0 ? parts : [];
}

/**
 * Extract epithets from infobox
 * Epithets are always between double quotes
 */
function extractEpithets($) {
    const div = $('[data-source="épithète"] .pi-data-value');
    if (div.length === 0) return [];

    const cleanedDiv = div.clone();
    cleanedDiv.find('sup').remove();

    let text = cleanedDiv.text();
    if (!text) return [];

    // Extract all text between double quotes (both straight and curly quotes)
    const matches = text.match(/["«"]([^"»"]+)["»"]/g);
    if (!matches) return [];

    // Remove the quotes and trim
    const epithets = matches.map(match => 
        match.replace(/^["«"]|["»"]$/g, '').trim()
    ).filter(Boolean);

    return epithets;
}

/**
 * Extract devil fruit from infobox
 * Returns both normalized ID and URL
 */
async function extractDevilFruit($) {
    const link = $('[data-source="dfnom"] .pi-data-value a').first();
    if (link.length === 0) return null;

    const href = link.attr('href');
    if (!href || !href.startsWith('/fr/wiki/')) return null;

    const cleanUrl = href.replace('/fr/wiki/', '');
    
    try {
        // Fetch the page to follow redirects
        const response = await fetchWithRetry(`${FANDOM_BASE_URL}/${cleanUrl}`, {
            redirect: 'follow' // Explicitly follow redirects
        });
        
        // Use the final URL after redirects
        const finalUrl = new URL(response.url);
        const pathname = finalUrl.pathname;
        const finalPath = pathname.replace('/fr/wiki/', '');
        
        if (finalPath) {
            return {
                devilFruitId: normalizeId(finalPath),
                devilFruitUrl: finalPath
            };
        }
    } catch (error) {
        console.error(`Error fetching devil fruit page: ${error.message}`);
    }
    
    // Fallback to the original href
    return {
        devilFruitId: normalizeId(cleanUrl),
        devilFruitUrl: cleanUrl
    };
}

/**
 * Extract bounty from infobox
 */
function extractBounty($) {
    const div = $('[data-source="prime"] .pi-data-value');
    if (div.length === 0) return 0;

    let text = div.html();
    if (!text) return 0;

    // Remove all sup blocks (citations)
    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');

    // Extract the first value before any <br> tag
    const firstValue = text.split('<br')[0].trim();
    let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
    
    // Check if cleanText contains digits
    if (!/\d/.test(cleanText)) {
        // If no digits, try second value after <br>
        const secondValue = text.split('<br>')[1];
        if (secondValue) {
            cleanText = secondValue.replace(/<[^>]*>/g, '').trim();
        }
    }

    // Remove all non-digits
    cleanText = cleanText.replace(/\D/g, '');
    
    return cleanText || 0;
}

/**
 * Extract height from infobox
 */
function extractHeight($) {
    const div = $('[data-source="taille"] .pi-data-value');
    if (div.length === 0) return null;

    let text = div.html();
    if (!text) return null;

    // Remove all sup blocks (citations)
    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');

    // Check if there's a <p> tag - if yes, use content from <p>
    let content;
    const pMatch = text.match(/<p[^>]*>(.*?)<\/p>/i);
    if (pMatch) {
        // Extract content from the <p> tag
        content = pMatch[1];
    } else {
        // Use the last value method (after any <br> tag)
        content = text.split('<br>').pop();
    }
    
    let cleanText = content.replace(/<[^>]*>/g, '').trim();
    
    // Remove content with parentheses
    cleanText = cleanText.replace(/\([^)]*\)/g, '');
    
    // Normalize units for meters or centimeters
    const normalized = cleanText.toLowerCase().replace(/\s/g, '');
    if (normalized.includes('cm')) {
        const digitsOnly = normalized.replace(/\D/g, '');
        return digitsOnly || null;
    }

    if (normalized.includes('m')) {
        const parts = normalized.split('m').filter(Boolean);
        return parts.length > 0 ? parts.join('.') : null;
    }
    
    return normalized.replace(/\D/g, '') || null;
}

/**
 * Extract origin from infobox
 */
function extractOrigin($) {
    const div = $('[data-source="origine"] .pi-data-value');
    if (div.length === 0) return null;

    let text = div.html();
    if (!text) return null;

    // Remove all sup blocks (citations)
    text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');

    // Extract the first value before any <br> tag
    const firstValue = text.split('<br')[0].trim();
    let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
    
    // Remove content with parentheses
    cleanText = cleanText.replace(/\([^)]*\)/g, '').trim();
    
    return cleanText || null;
}

/**
 * Extract status from infobox
 */
function extractStatus($) {
    const div = $('[data-source="statut"] .pi-data-value');
    if (div.length === 0) return null;

    const statusText = div.text().trim().toLowerCase();
    
    if (statusText.includes('vivant')) {
        return 'Alive';
    } else if (statusText.includes('décédé')) {
        return 'Dead';
    }
    
    return null;
}


/**
 * Save data to JSON
 */
async function saveToJSON(characters) {
    const filepath = `${OUTPUT_DIR}/characters.json`;
    fs.writeFileSync(filepath, JSON.stringify(characters, null, 2));
    console.log(`✓ Saved to ${filepath}`);
}

/**
 * Save data to CSV
 */
async function saveToCSV(characters) {
    const filepath = `${OUTPUT_DIR}/characters.csv`;
    const csvWriter = createObjectCsvWriter({
        path: filepath,
        header: [
            { id: 'id', title: 'ID' },
            { id: 'name', title: 'Name' },
            { id: 'gender', title: 'Gender' },
            { id: 'age', title: 'Age' },
            { id: 'height', title: 'Height' },
            { id: 'origin', title: 'Origin' },
            { id: 'status', title: 'Status' },
            { id: 'epithets', title: 'Epithets' },
            { id: 'devilFruitId', title: 'Devil Fruit ID' },
            { id: 'affiliations', title: 'Affiliations' },
            { id: 'bounty', title: 'Bounty' },
            { id: 'hakiObservation', title: 'Haki Observation' },
            { id: 'hakiArmament', title: 'Haki Armament' },
            { id: 'hakiConqueror', title: 'Haki Conqueror' },
            { id: 'firstAppearance', title: 'First Appearance' },
            { id: 'arcId', title: 'Arc ID' },
            { id: 'pictureUrl', title: 'Image URL' },
            { id: 'url', title: 'Fandom URL' }
        ],
    });

    const records = characters
        .filter((c) => c !== null)
        .map((c) => ({
            id: c.id || '',
            name: c.name || '',
            gender: c.gender || '',
            age: c.age || '',
            height: c.height || '',
            origin: c.origin || '',
            status: c.status || '',
            epithets: Array.isArray(c.epithets) ? c.epithets.join(', ') : (c.epithets || ''),
            devilFruitId: c.devilFruitId || '',
            affiliations: Array.isArray(c.affiliations) ? c.affiliations.join(', ') : (c.affiliations || ''),
            bounty: c.bounty ?? 0,
            hakiObservation: c.hakiObservation ? 1 : 0,
            hakiArmament: c.hakiArmament ? 1 : 0,
            hakiConqueror: c.hakiConqueror ? 1 : 0,
            firstAppearance: c.firstAppearance || '',
            arcId: c.arcId || '',
            pictureUrl: c.pictureUrl || '',
            url: c.url || ''
        }));

    await csvWriter.writeRecords(records);
    console.log(`✓ Saved to ${filepath}`);
}

/**
 * Fetch devil fruit data from fandom using provided URL
 */
async function fetchDevilFruit(devilFruitUrl, devilFruitId) {
    try {
        console.log(`Fetching devil fruit: ${devilFruitId}...`);

        const response = await fetchWithRetry(`${FANDOM_BASE_URL}/${devilFruitUrl}`);
        const data = await response.text();
        const $ = cheerio.load(data);

        const name = $('span.mw-page-title-main').text().trim();

        // Extract type from label in infobox
        let type = null;
        const typeDiv = $('[data-source="type"] .pi-data-value');
        if (typeDiv.length > 0) {
            const typeText = typeDiv.text().trim().toLowerCase();
            if (typeText.includes('zoan')) {
                type = 'Zoan';
            } else if (typeText.includes('paramecia')) {
                type = 'Paramecia';
            } else if (typeText.includes('logia')) {
                type = 'Logia';
            }
        }

        return {
            id: devilFruitId,
            name,
            type,
            url: devilFruitUrl
        };
    } catch (error) {
        console.error(`Error fetching devil fruit ${devilFruitUrl}:`, error.message);
        return null;
    }
}

/**
 * Save devil fruits to JSON
 */
async function saveDevilFruitsToJSON(devilFruits) {
    const filepath = `${OUTPUT_DIR}/devil-fruits.json`;
    fs.writeFileSync(filepath, JSON.stringify(devilFruits, null, 2));
    console.log(`✓ Saved to ${filepath}`);
}

/**
 * Save devil fruits to CSV
 */
async function saveDevilFruitsToCSV(devilFruits) {
    const filepath = `${OUTPUT_DIR}/devil-fruits.csv`;
    const csvWriter = createObjectCsvWriter({
        path: filepath,
        header: [
            { id: 'id', title: 'ID' },
            { id: 'name', title: 'Name' },
            { id: 'type', title: 'Type' },
            { id: 'url', title: 'URL' }
        ],
    });

    const records = devilFruits
        .filter((df) => df !== null)
        .map((df) => ({
            id: df.id || '',
            name: df.name || '',
            type: df.type || '',
            url: df.url || ''
        }));

    await csvWriter.writeRecords(records);
    console.log(`✓ Saved to ${filepath}`);
}

/**
 * Main execution
 */
async function main() {
    const format = process.argv[2] || 'all'; // json, csv, or all

    console.log(`\nOne Piece Scraper - Mode: ${format}\n`);

    // Step 1: Scraping Arcs
    console.log('=== Step 1: Scraping Arcs ===\n');
    const arcsList = await fetchAllArcs();
    
    if (arcsList.length > 0) {
        // Display arcs in table format
        arcsList.forEach((arc) => {
            console.table({
                ID: arc.id,
                Name: arc.name,
                StartChapter: arc.startChapter,
                EndChapter: arc.endChapter || 'Ongoing',
                URL: arc.url
            });
        });

        console.log(`\n✓ Found ${arcsList.length} arcs\n`);

        if (format === 'json' || format === 'all') {
            await saveArcsToJSON(arcsList);
        }
        if (format === 'csv' || format === 'all') {
            await saveArcsToCSV(arcsList);
        }
    } else {
        console.warn('No arcs found, continuing...\n');
    }

    // Step 2: Scraping Characters
    console.log('=== Step 1: Scraping Characters ===\n');
    const characterList = await fetchAllCharactersUrl();
    
    if (characterList.length === 0) {
        console.error('No characters found. Exiting.');
        return;
    }

    const characters = [];
    const devilFruitUrls = new Set();
    let failedCharacters = [...characterList];

    while (failedCharacters.length > 0) {
        const nextFailedCharacters = [];
        console.log(`\nFetching ${failedCharacters.length} characters...`);

        for (let i = 0; i < failedCharacters.length; i++) {
            const char = failedCharacters[i];
            const data = await fetchCharacter(char.url, char.name, char.pictureUrl, char.chapter);
            
            if (data) {
                console.table({
                    ID: data.id,
                    Name: data.name,
                    Gender: data.gender,
                    Age: data.age,
                    Status: data.status,
                    Epithets: data.epithets.join(', '),
                    Affiliations: data.affiliations.join(', '),
                    DevilFruitId: data.devilFruitId,
                    DevilFruitUrl: data.devilFruitUrl,
                    HakiObservation: data.hakiObservation ? 'Yes' : 'No',
                    HakiArmament: data.hakiArmament ? 'Yes' : 'No',
                    HakiConqueror: data.hakiConqueror ? 'Yes' : 'No',
                    Height: data.height,
                    Bounty: data.bounty,
                    Origin: data.origin,
                    FirstAppearance: data.firstAppearance,
                    pictureUrl: data.pictureUrl,
                    FandomURL: data.url
                });

                // Collect devil fruit URLs
                if (data.devilFruitUrl) {
                    devilFruitUrls.add(data.devilFruitUrl);
                }

                // Add arc IDs to character data
                if (data.firstAppearance) {
                    const arc = arcsList.find(a => a.startChapter <= parseInt(data.firstAppearance) && (a.endChapter === null || a.endChapter >= parseInt(data.firstAppearance)));
                    if (arc) {
                        data.arcId = arc.id;
                    }
                }

                characters.push(data);
            } else {
                // Add to retry list and wait before next character
                nextFailedCharacters.push(char);
                await new Promise(resolve => setTimeout(resolve, 1000));
            }
        }

        failedCharacters = nextFailedCharacters;
        if (failedCharacters.length > 0) {
            console.log(`⚠️  ${failedCharacters.length} characters failed. Retrying...`);
        }
    }

    console.log(`\n✓ Scraped ${characters.length} characters\n`);
    console.log(`✓ Found ${devilFruitUrls.size} unique devil fruits\n`);

    // Step 3: Scraping Devil Fruits
    console.log('=== Step 2: Scraping Devil Fruits ===\n');
    
    if (devilFruitUrls.size === 0) {
        console.warn('No devil fruits found from characters, skipping...\n');
    } else {
        const devilFruits = [];
        const devilFruitUrlArray = Array.from(devilFruitUrls);

        for (let i = 0; i < devilFruitUrlArray.length; i++) {
            const url = devilFruitUrlArray[i];
            const data = await fetchDevilFruit(url, normalizeId(url));
            
            if (data) {
                console.table({
                    ID: data.id,
                    Name: data.name,
                    Type: data.type,
                    URL: data.url
                });

                devilFruits.push(data);
            }
        }

        console.log(`\n✓ Scraped ${devilFruits.length} devil fruits\n`);

        if (format === 'json' || format === 'all') {
            await saveDevilFruitsToJSON(devilFruits);
        }
        if (format === 'csv' || format === 'all') {
            await saveDevilFruitsToCSV(devilFruits);
        }

        // Update characters with normalized devil fruit IDs
        const devilFruitMap = new Map(devilFruits.map(df => [df.id, df.id]));
        characters.forEach(char => {
            if (char.devilFruitUrl) {
                const normalizedId = normalizeId(char.devilFruitUrl);
                char.devilFruitId = devilFruitMap.get(normalizedId) || normalizedId;
            }
        });
    }

    // Save characters after devil fruit IDs are updated
    if (format === 'json' || format === 'all') {
        await saveToJSON(characters);
    }
    if (format === 'csv' || format === 'all') {
        await saveToCSV(characters);
    }

    console.log('\n✓ Done!\n');
}

main().catch(console.error);