- Added a new script to scrape devil fruits and characters from One Piece fandom. - Implemented functions to fetch, normalize, and save data in JSON, CSV, and SQL formats. - Created a structured output directory for scraped data. feat(database): update schema for devil fruits and characters - Defined new types for devil fruits and haki in the database schema. - Updated the character table to include fields for age, affiliations, devil fruit, haki, bounty, height, origin, first appearance, and picture URL. feat(ui): enhance main page and daily mode layout - Redesigned the main page with a new layout and styling for the OnePieceDle game. - Created a new daily mode page with sections for clues and user input for guesses. - Removed demo authentication routes and pages to streamline the application.
673 lines
22 KiB
JavaScript
673 lines
22 KiB
JavaScript
import * as cheerio from 'cheerio';
|
|
import fs from 'fs';
|
|
import { createObjectCsvWriter } from 'csv-writer';
|
|
|
|
const FANDOM_BASE_URL = 'https://onepiece.fandom.com/fr/wiki';
|
|
const OUTPUT_DIR = './scraped-data';
|
|
const DEVIL_FRUIT_CONCURRENCY = 5;
|
|
const CHARACTER_CONCURRENCY = 10;
|
|
|
|
// Create output directory
|
|
if (!fs.existsSync(OUTPUT_DIR)) {
|
|
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
}
|
|
|
|
/**
|
|
* Normalize string by removing accents and converting to lowercase
|
|
*/
|
|
function normalizeId(str) {
|
|
return decodeURIComponent(str)
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.replace(/[,:]/g, '')
|
|
.toLowerCase();
|
|
}
|
|
|
|
/**
|
|
* Fetch all devil fruits URLs from One Piece fandom
|
|
*/
|
|
async function fetchAllDevilFruitsUrl() {
|
|
try {
|
|
const url = `${FANDOM_BASE_URL}/Fruits_du_Démon`;
|
|
console.log('Fetching devil fruits list...');
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
|
|
},
|
|
});
|
|
const data = await response.text();
|
|
const $ = cheerio.load(data);
|
|
const devilFruits = [];
|
|
|
|
// Find the main navibox table
|
|
$('table.navibox.toccolours').each((mainTableIndex, mainTable) => {
|
|
const mainHeader = $(mainTable).find('th[colspan="3"]').first().find('span').last().text().trim();
|
|
if (mainHeader !== 'Fruits du Démon') return;
|
|
|
|
$(mainTable).find('table.collapsible').each((typeTableIndex, typeTable) => {
|
|
const typeHeader = $(typeTable).find('th[colspan="3"]').first().text().trim();
|
|
let type = null;
|
|
|
|
if (typeHeader.includes('Paramecia')) type = 'Paramecia';
|
|
else if (typeHeader.includes('Zoan')) type = 'Zoan';
|
|
else if (typeHeader.includes('Logia')) type = 'Logia';
|
|
else if (typeHeader.includes('Type Inconnu')) type = 'Unknown';
|
|
|
|
if (!type) return;
|
|
|
|
$(typeTable).find('tr.navibox-row').each((rowIndex, row) => {
|
|
const categoryHeader = $(row).find('th').text().trim();
|
|
|
|
if (!categoryHeader.includes('Canon') &&
|
|
!categoryHeader.includes('Standards') &&
|
|
!categoryHeader.includes('Antiques') &&
|
|
!categoryHeader.includes('Mythiques') &&
|
|
!categoryHeader.includes('Hors-Série')) {
|
|
return;
|
|
}
|
|
|
|
// Find all links in the row
|
|
$(row).find('td .hlist ul li a').each((linkIndex, link) => {
|
|
const name = $(link).text().trim();
|
|
const href = $(link).attr('href');
|
|
|
|
if (name && href && href.startsWith('/fr/wiki/')) {
|
|
// Clean the URL
|
|
const cleanUrl = href.replace('/fr/wiki/', '');
|
|
|
|
// Skip classification pages and category pages
|
|
if (cleanUrl.includes('Classification') ||
|
|
cleanUrl.includes('Catégorie:') ||
|
|
cleanUrl === 'Fruits_du_Démon_Artificiels' ||
|
|
cleanUrl === 'SMILE') {
|
|
return;
|
|
}
|
|
|
|
devilFruits.push({
|
|
id: normalizeId(cleanUrl),
|
|
name,
|
|
type,
|
|
url: cleanUrl,
|
|
});
|
|
}
|
|
});
|
|
});
|
|
});
|
|
});
|
|
|
|
console.log(`Found ${devilFruits.length} devil fruits.`);
|
|
return devilFruits;
|
|
} catch (error) {
|
|
console.error('Error fetching devil fruits list:', error.message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch devil fruit data from fandom using provided URL
|
|
*/
|
|
async function fetchDevilFruit(devilFruitUrl, devilFruitId, devilFruitName, devilFruitType) {
|
|
try {
|
|
console.log(`Fetching: ${devilFruitName}...`);
|
|
|
|
const response = await fetch(`${FANDOM_BASE_URL}/${devilFruitUrl}`, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
|
|
},
|
|
});
|
|
const data = await response.text();
|
|
const $ = cheerio.load(data);
|
|
|
|
// Extract devil fruit name from page title if different
|
|
const name = $('h1.mw-page-title-main').text().trim() || devilFruitName;
|
|
|
|
// Use the type from the list page
|
|
const type = devilFruitType;
|
|
|
|
return {
|
|
id: devilFruitId,
|
|
name,
|
|
type
|
|
};
|
|
} catch (error) {
|
|
console.error(`Error fetching ${devilFruitName}:`, error.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save devil fruits to JSON
|
|
*/
|
|
async function saveDevilFruitsToJSON(devilFruits) {
|
|
const filepath = `${OUTPUT_DIR}/devil-fruits.json`;
|
|
fs.writeFileSync(filepath, JSON.stringify(devilFruits, null, 2));
|
|
console.log(`✓ Saved to ${filepath}`);
|
|
}
|
|
|
|
/**
|
|
* Save devil fruits to SQL
|
|
*/
|
|
function saveDevilFruitsToSQL(devilFruits) {
|
|
const filepath = `${OUTPUT_DIR}/devil-fruits.sql`;
|
|
const escapeSql = (value) => (value ? `'${String(value).replace(/'/g, "''")}'` : 'NULL');
|
|
|
|
let sql = '';
|
|
|
|
devilFruits.forEach((df) => {
|
|
sql += `INSERT INTO devilFruit (id, name, type) \n`;
|
|
sql += `VALUES (${escapeSql(df.id)}, ${escapeSql(df.name)}, ${escapeSql(df.type)}) \n`;
|
|
sql += `ON CONFLICT(id) DO UPDATE SET \n`;
|
|
sql += ` name = excluded.name,\n`;
|
|
sql += ` type = excluded.type;\n\n`;
|
|
});
|
|
|
|
fs.writeFileSync(filepath, sql);
|
|
console.log(`✓ Saved to ${filepath}`);
|
|
}
|
|
|
|
/**
|
|
* Fetch all cannon characters from One Piece fandom
|
|
*/
|
|
async function fetchAllCharactersUrl() {
|
|
try {
|
|
const url = `${FANDOM_BASE_URL}/Liste_des_Personnages_Canon`;
|
|
console.log('Fetching character list...');
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
|
|
},
|
|
});
|
|
const data = await response.text();
|
|
const $ = cheerio.load(data);
|
|
const characters = [];
|
|
$('table.wikitable tbody tr').each((index, element) => {
|
|
if (index === 0) return; // Skip header row
|
|
const charpictureUrl = $(element).find('td:nth-child(1) a img').attr('data-src') || $(element).find('td:nth-child(1) a img').attr('src');
|
|
const charLink = $(element).find('td:nth-child(2) a').attr('href');
|
|
const charName = $(element).find('td:nth-child(2) a').text().trim();
|
|
if (charLink) {
|
|
const cleanUrl = charLink.replace('/fr/wiki/', '');
|
|
characters.push({
|
|
id: normalizeId(cleanUrl),
|
|
name: charName,
|
|
url: cleanUrl,
|
|
pictureUrl: charpictureUrl,
|
|
});
|
|
}
|
|
});
|
|
console.log(`Found ${characters.length} characters.`);
|
|
return characters;
|
|
} catch (error) {
|
|
console.error('Error fetching character list:', error.message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch character data from fandom using provided URL
|
|
*/
|
|
async function fetchCharacter(characterUrl, characterId, characterName, characterpictureUrl) {
|
|
try {
|
|
console.log(`Fetching: ${characterName}...`);
|
|
|
|
const response = await fetch(`${FANDOM_BASE_URL}/${characterUrl}`, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
|
|
},
|
|
});
|
|
// Log response status for debugging
|
|
const data = await response.text();
|
|
|
|
const $ = cheerio.load(data);
|
|
|
|
// Extract character name
|
|
const name = $('h1.mw-page-title-main').text().trim() || characterName.replace(/_/g, ' ');
|
|
|
|
// Extract gender from the specific categories link
|
|
let gender = null;
|
|
if ($('.page-header__categories a[title="Catégorie:Personnages Masculins"]').length > 0) {
|
|
gender = 'Male';
|
|
} else if ($('.page-header__categories a[title="Catégorie:Personnages Féminins"]').length > 0) {
|
|
gender = 'Female';
|
|
}
|
|
|
|
// Extract age
|
|
const age = extractAge($);
|
|
|
|
// Extract affiliations
|
|
const affiliations = extractAffiliations($);
|
|
|
|
// Extract devil fruit
|
|
const devilFruit = await extractDevilFruit($);
|
|
|
|
// Extract haki
|
|
let haki = [];
|
|
if ($('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'observation"]').length > 0) {
|
|
haki.push('Observation');
|
|
}
|
|
if ($('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'armement"]').length > 0) {
|
|
haki.push('Armament');
|
|
}
|
|
if ($('.page-header__categories a[title="Catégorie:Utilisateurs du Haki des rois"]').length > 0) {
|
|
haki.push('Conqueror');
|
|
}
|
|
|
|
// Extract bounty
|
|
const bounty = extractBounty($);
|
|
|
|
// Extract height
|
|
const height = extractHeight($);
|
|
|
|
// Extract first appearance
|
|
const firstAppearance = extractFirstAppearance($);
|
|
|
|
// Extract origin
|
|
const origin = extractOrigin($);
|
|
|
|
// Extract image URL and clean it
|
|
let pictureUrl = characterpictureUrl;
|
|
|
|
return {
|
|
id: characterId,
|
|
name,
|
|
gender,
|
|
age,
|
|
height,
|
|
origin,
|
|
devilFruit,
|
|
affiliations,
|
|
bounty,
|
|
haki,
|
|
firstAppearance,
|
|
pictureUrl
|
|
};
|
|
} catch (error) {
|
|
console.error(`Error fetching ${characterName}:`, error.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Extract age from infobox
|
|
*/
|
|
function extractAge($) {
|
|
const div = $('[data-source="âge"] .pi-data-value');
|
|
if (div.length === 0) return null;
|
|
|
|
let text = div.html();
|
|
if (!text) return null;
|
|
|
|
// Remove all sup blocks (citations)
|
|
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
|
|
|
|
// Get the last element and extract only digits
|
|
const parts = text.split('<br');
|
|
const lastPart = parts[parts.length - 1];
|
|
let cleanText = lastPart.replace(/<[^>]*>/g, '').trim();
|
|
|
|
// Remove content with parentheses
|
|
cleanText = cleanText.replace(/\([^)]*\)/g, '');
|
|
|
|
const digitsOnly = cleanText.replace(/\D/g, '');
|
|
return digitsOnly || null;
|
|
}
|
|
|
|
/**
|
|
* Extract affiliations from infobox
|
|
*/
|
|
function extractAffiliations($) {
|
|
const div = $('[data-source="affiliation"] .pi-data-value');
|
|
if (div.length === 0) return [];
|
|
|
|
const cleanedDiv = div.clone();
|
|
cleanedDiv.find('sup').remove();
|
|
|
|
let text = cleanedDiv.html();
|
|
if (!text) return [];
|
|
|
|
// Extract all link values
|
|
const linkValues = cleanedDiv.find('a').map((i, el) => $(el).text().trim()).get();
|
|
if (linkValues.length > 0) {
|
|
return linkValues;
|
|
}
|
|
|
|
// Fallback to parsing text
|
|
const cleanText = text.replace(/<[^>]*>/g, '').trim();
|
|
const parts = cleanText.split(/\s*\n\s*|\s*;\s*|\s*,\s*/).filter(Boolean);
|
|
return parts.length > 0 ? parts : [];
|
|
}
|
|
|
|
/**
|
|
* Extract devil fruit from infobox
|
|
*/
|
|
async function extractDevilFruit($) {
|
|
const link = $('[data-source="dfnom"] .pi-data-value a').first();
|
|
if (link.length === 0) return null;
|
|
|
|
const href = link.attr('href');
|
|
if (!href || !href.startsWith('/fr/wiki/')) return null;
|
|
|
|
const cleanUrl = href.replace('/fr/wiki/', '');
|
|
|
|
try {
|
|
// Fetch the page to follow redirects
|
|
const response = await fetch(`${FANDOM_BASE_URL}/${cleanUrl}`, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
|
|
},
|
|
redirect: 'follow' // Explicitly follow redirects
|
|
});
|
|
|
|
// Check if response was a redirect (301, 302, etc.)
|
|
if (response.status === 301 || response.status === 302) {
|
|
// Use the final redirected URL
|
|
const finalUrl = new URL(response.url);
|
|
const pathname = finalUrl.pathname;
|
|
const finalPath = pathname.replace('/fr/wiki/', '');
|
|
if (finalPath) {
|
|
return normalizeId(finalPath);
|
|
}
|
|
} else {
|
|
// Use the current URL if no redirect
|
|
const finalUrl = new URL(response.url);
|
|
const pathname = finalUrl.pathname;
|
|
const finalPath = pathname.replace('/fr/wiki/', '');
|
|
if (finalPath) {
|
|
return normalizeId(finalPath);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error fetching devil fruit page: ${error.message}`);
|
|
}
|
|
|
|
// Fallback to the original href
|
|
return normalizeId(cleanUrl);
|
|
}
|
|
|
|
/**
|
|
* Extract bounty from infobox
|
|
*/
|
|
function extractBounty($) {
|
|
const div = $('[data-source="prime"] .pi-data-value');
|
|
if (div.length === 0) return null;
|
|
|
|
let text = div.html();
|
|
if (!text) return null;
|
|
|
|
// Remove all sup blocks (citations)
|
|
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
|
|
|
|
// Extract the first value before any <br> tag
|
|
const firstValue = text.split('<br')[0].trim();
|
|
let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
|
|
|
|
// Remove spaces and dots
|
|
cleanText = cleanText.replace(/[\s.]/g, '');
|
|
|
|
return cleanText || null;
|
|
}
|
|
|
|
/**
|
|
* Extract height from infobox
|
|
*/
|
|
function extractHeight($) {
|
|
const div = $('[data-source="taille"] .pi-data-value');
|
|
if (div.length === 0) return null;
|
|
|
|
let text = div.html();
|
|
if (!text) return null;
|
|
|
|
// Remove all sup blocks (citations)
|
|
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
|
|
|
|
// Extract the last value after any <br> tag
|
|
const lastValue = text.split('<br>').pop().trim();
|
|
let cleanText = lastValue.replace(/<[^>]*>/g, '').trim();
|
|
|
|
// Remove content with parentheses
|
|
cleanText = cleanText.replace(/\([^)]*\)/g, '');
|
|
|
|
// Normalize units for meters or centimeters
|
|
const normalized = cleanText.toLowerCase().replace(/\s/g, '');
|
|
if (normalized.includes('cm')) {
|
|
const digitsOnly = normalized.replace(/\D/g, '');
|
|
return digitsOnly || null;
|
|
}
|
|
|
|
if (normalized.includes('m')) {
|
|
const parts = normalized.split('m').filter(Boolean);
|
|
return parts.length > 0 ? parts.join('.') : null;
|
|
}
|
|
|
|
return normalized.replace(/\D/g, '') || null;
|
|
}
|
|
|
|
/**
|
|
* Extract first appearance from infobox
|
|
*/
|
|
function extractFirstAppearance($) {
|
|
const div = $('[data-source="première"] .pi-data-value');
|
|
if (div.length === 0) return null;
|
|
|
|
let text = div.html();
|
|
if (!text) return null;
|
|
|
|
// Remove all sup blocks (citations)
|
|
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
|
|
|
|
// Extract digits after "Chapitre"
|
|
const cleanText = text.replace(/<[^>]*>/g, '').trim();
|
|
const match = cleanText.match(/Chapitre\s+(\d+)/i);
|
|
return match ? match[1] : null;
|
|
}
|
|
|
|
/**
|
|
* Extract origin from infobox
|
|
*/
|
|
function extractOrigin($) {
|
|
const div = $('[data-source="origine"] .pi-data-value');
|
|
if (div.length === 0) return null;
|
|
|
|
let text = div.html();
|
|
if (!text) return null;
|
|
|
|
// Remove all sup blocks (citations)
|
|
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
|
|
|
|
// Extract the first value before any <br> tag
|
|
const firstValue = text.split('<br')[0].trim();
|
|
let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
|
|
|
|
// Remove content with parentheses
|
|
cleanText = cleanText.replace(/\([^)]*\)/g, '').trim();
|
|
|
|
return cleanText || null;
|
|
}
|
|
|
|
|
|
/**
|
|
* Save data to JSON
|
|
*/
|
|
async function saveToJSON(characters) {
|
|
const filepath = `${OUTPUT_DIR}/characters.json`;
|
|
fs.writeFileSync(filepath, JSON.stringify(characters, null, 2));
|
|
console.log(`✓ Saved to ${filepath}`);
|
|
}
|
|
|
|
/**
|
|
* Save data to CSV
|
|
*/
|
|
async function saveToCSV(characters) {
|
|
const filepath = `${OUTPUT_DIR}/characters.csv`;
|
|
const csvWriter = createObjectCsvWriter({
|
|
path: filepath,
|
|
header: [
|
|
{ id: 'id', title: 'ID' },
|
|
{ id: 'name', title: 'Name' },
|
|
{ id: 'gender', title: 'Gender' },
|
|
{ id: 'age', title: 'Age' },
|
|
{ id: 'height', title: 'Height' },
|
|
{ id: 'origin', title: 'Origin' },
|
|
{ id: 'devilFruit', title: 'Devil Fruit' },
|
|
{ id: 'affiliations', title: 'Affiliations' },
|
|
{ id: 'bounty', title: 'Bounty' },
|
|
{ id: 'haki', title: 'Haki' },
|
|
{ id: 'firstAppearance', title: 'First Appearance' },
|
|
{ id: 'pictureUrl', title: 'Image URL' }
|
|
],
|
|
});
|
|
|
|
const records = characters
|
|
.filter((c) => c !== null)
|
|
.map((c) => ({
|
|
id: c.id || '',
|
|
name: c.name || '',
|
|
gender: c.gender || '',
|
|
age: c.age || '',
|
|
height: c.height || '',
|
|
origin: c.origin || '',
|
|
devilFruit: c.devilFruit || '',
|
|
affiliations: Array.isArray(c.affiliations) ? c.affiliations.join(', ') : (c.affiliations || ''),
|
|
bounty: c.bounty || '',
|
|
haki: Array.isArray(c.haki) ? c.haki.join(', ') : (c.haki || ''),
|
|
firstAppearance: c.firstAppearance || '',
|
|
pictureUrl: c.pictureUrl || ''
|
|
}));
|
|
|
|
await csvWriter.writeRecords(records);
|
|
console.log(`✓ Saved to ${filepath}`);
|
|
}
|
|
|
|
/**
|
|
* Save data to SQL
|
|
*/
|
|
function saveToSQL(characters) {
|
|
const filepath = `${OUTPUT_DIR}/characters.sql`;
|
|
const escapeSql = (value) => (value ? `'${String(value).replace(/'/g, "''")}'` : 'NULL');
|
|
|
|
let sql = '';
|
|
|
|
characters
|
|
.filter((c) => c !== null)
|
|
.forEach((c) => {
|
|
const affiliations = Array.isArray(c.affiliations) ? c.affiliations.join(', ') : c.affiliations;
|
|
const hakiValue = Array.isArray(c.haki) && c.haki.length > 0 ? JSON.stringify(c.haki) : null;
|
|
|
|
sql += `INSERT INTO character (id, name, gender, age, height, origin, devilFruit, affiliations, bounty, haki, firstAppearance, pictureUrl) \n`;
|
|
sql += `VALUES (${escapeSql(c.id)}, ${escapeSql(c.name)}, ${escapeSql(c.gender)}, ${escapeSql(c.age)}, ${escapeSql(c.height)}, ${escapeSql(c.origin)}, ${escapeSql(c.devilFruit)}, ${escapeSql(affiliations)}, ${escapeSql(c.bounty)}, ${escapeSql(hakiValue)}, ${escapeSql(c.firstAppearance)}, ${escapeSql(c.pictureUrl)}) \n`;
|
|
sql += `ON CONFLICT(id) DO UPDATE SET \n`;
|
|
sql += ` name = excluded.name,\n`;
|
|
sql += ` gender = excluded.gender,\n`;
|
|
sql += ` age = excluded.age,\n`;
|
|
sql += ` height = excluded.height,\n`;
|
|
sql += ` origin = excluded.origin,\n`;
|
|
sql += ` devilFruit = excluded.devilFruit,\n`;
|
|
sql += ` affiliations = excluded.affiliations,\n`;
|
|
sql += ` bounty = excluded.bounty,\n`;
|
|
sql += ` haki = excluded.haki,\n`;
|
|
sql += ` firstAppearance = excluded.firstAppearance,\n`;
|
|
sql += ` pictureUrl = excluded.pictureUrl;\n\n`;
|
|
});
|
|
|
|
fs.writeFileSync(filepath, sql);
|
|
console.log(`✓ Saved to ${filepath}`);
|
|
}
|
|
|
|
/**
|
|
* Main execution
|
|
*/
|
|
async function main() {
|
|
const format = process.argv[2] || 'all'; // json, csv, sql, or all
|
|
|
|
console.log(`\nOne Piece Scraper - Mode: ${format}\n`);
|
|
|
|
// Step 1: Scraping Devil Fruits
|
|
console.log('=== Step 1: Scraping Devil Fruits ===\n');
|
|
const devilFruitList = await fetchAllDevilFruitsUrl();
|
|
|
|
if (devilFruitList.length === 0) {
|
|
console.warn('No devil fruits found, continuing with characters...\n');
|
|
} else {
|
|
const devilFruits = [];
|
|
|
|
for (let i = 0; i < devilFruitList.length; i += DEVIL_FRUIT_CONCURRENCY) {
|
|
const batch = devilFruitList.slice(i, i + DEVIL_FRUIT_CONCURRENCY);
|
|
const results = await Promise.all(
|
|
batch.map((df) => fetchDevilFruit(df.url, df.id, df.name, df.type))
|
|
);
|
|
|
|
results.filter(Boolean).forEach((data) => {
|
|
console.table({
|
|
ID: data.id,
|
|
Name: data.name,
|
|
Type: data.type
|
|
});
|
|
|
|
devilFruits.push(data);
|
|
});
|
|
}
|
|
|
|
console.log(`\n✓ Scraped ${devilFruits.length} devil fruits\n`);
|
|
|
|
if (format === 'json' || format === 'all') {
|
|
await saveDevilFruitsToJSON(devilFruits);
|
|
}
|
|
if (format === 'sql' || format === 'all') {
|
|
saveDevilFruitsToSQL(devilFruits);
|
|
}
|
|
}
|
|
|
|
// Step 2: Scraping Characters
|
|
console.log('=== Step 2: Scraping Characters ===\n');
|
|
const characterList = await fetchAllCharactersUrl();
|
|
|
|
if (characterList.length === 0) {
|
|
console.error('No characters found. Exiting.');
|
|
return;
|
|
}
|
|
|
|
const characters = [];
|
|
|
|
for (let i = 0; i < characterList.length; i += CHARACTER_CONCURRENCY) {
|
|
const batch = characterList.slice(i, i + CHARACTER_CONCURRENCY);
|
|
const results = await Promise.all(
|
|
batch.map((char) => fetchCharacter(char.url, char.id, char.name, char.pictureUrl))
|
|
);
|
|
results.filter(Boolean).forEach((data) => {
|
|
console.table({
|
|
ID: data.id,
|
|
Name: data.name,
|
|
Gender: data.gender,
|
|
Age: data.age,
|
|
Affiliations: data.affiliations.join(', '),
|
|
DevilFruit: data.devilFruit,
|
|
Haki: data.haki.join(', '),
|
|
Height: data.height,
|
|
Bounty: data.bounty,
|
|
Origin: data.origin,
|
|
FirstAppearance: data.firstAppearance,
|
|
pictureUrl: data.pictureUrl
|
|
});
|
|
|
|
characters.push(data);
|
|
});
|
|
}
|
|
|
|
console.log(`\n✓ Scraped ${characters.length} characters\n`);
|
|
|
|
if (format === 'json' || format === 'all') {
|
|
await saveToJSON(characters);
|
|
}
|
|
if (format === 'csv' || format === 'all') {
|
|
await saveToCSV(characters);
|
|
}
|
|
if (format === 'sql' || format === 'all') {
|
|
saveToSQL(characters);
|
|
}
|
|
|
|
console.log('\n✓ Done!\n');
|
|
}
|
|
|
|
main().catch(console.error);
|