feat(scraper): implement One Piece data scraper for devil fruits and characters

- Added a new script to scrape devil fruits and characters from One Piece fandom.
- Implemented functions to fetch, normalize, and save data in JSON, CSV, and SQL formats.
- Created a structured output directory for scraped data.

feat(database): update schema for devil fruits and characters

- Defined new types for devil fruits and haki in the database schema.
- Updated the character table to include fields for age, affiliations, devil fruit, haki, bounty, height, origin, first appearance, and picture URL.

feat(ui): enhance main page and daily mode layout

- Redesigned the main page with a new layout and styling for the OnePieceDle game.
- Created a new daily mode page with sections for clues and user input for guesses.
- Removed demo authentication routes and pages to streamline the application.
This commit is contained in:
2026-02-27 01:14:44 +01:00
parent c494866a70
commit 6f7bae2307
17 changed files with 2407 additions and 165 deletions

104
scripts/import-sql.js Normal file
View File

@@ -0,0 +1,104 @@
import { createClient } from '@libsql/client';
import fs from 'fs';
// Load environment variables
const DATABASE_URL = process.env.DATABASE_URL || 'file:local.db';
const client = createClient({
url: DATABASE_URL
});
async function importSQL() {
try {
let totalSuccess = 0;
let totalErrors = 0;
// Step 1: Import Devil Fruits
if (fs.existsSync('./scraped-data/devil-fruits.sql')) {
console.log('\n=== Importing Devil Fruits ===\n');
const devilFruitsSql = fs.readFileSync('./scraped-data/devil-fruits.sql', 'utf-8');
const dfStatements = devilFruitsSql.split(';\n\n').filter(s => s.trim());
console.log(`Found ${dfStatements.length} devil fruit statements\n`);
let successCount = 0;
let errorCount = 0;
for (let i = 0; i < dfStatements.length; i++) {
const statement = dfStatements[i];
if (statement.trim()) {
try {
await client.execute(statement.trim() + ';');
successCount++;
process.stdout.write(`\rExecuted: ${successCount}/${dfStatements.length}`);
} catch (error) {
errorCount++;
const valuesMatch = statement.match(/VALUES\s*\(([^)]+)\)/);
const values = valuesMatch ? valuesMatch[1] : 'N/A';
console.error(`\n✗ Error at statement ${i + 1}:`);
console.error(` Values: ${values}`);
console.error(` Message: ${error.message}`);
}
}
}
console.log(`\n\n✓ Devil Fruits imported!`);
console.log(` Success: ${successCount}`);
console.log(` Errors: ${errorCount}`);
totalSuccess += successCount;
totalErrors += errorCount;
} else {
console.log('\n⚠ No devil-fruits.sql found, skipping...\n');
}
// Step 2: Import Characters
if (fs.existsSync('./scraped-data/characters.sql')) {
console.log('\n=== Importing Characters ===\n');
const charactersSql = fs.readFileSync('./scraped-data/characters.sql', 'utf-8');
const charStatements = charactersSql.split(';\n\n').filter(s => s.trim());
console.log(`Found ${charStatements.length} character statements\n`);
let successCount = 0;
let errorCount = 0;
for (let i = 0; i < charStatements.length; i++) {
const statement = charStatements[i];
if (statement.trim()) {
try {
await client.execute(statement.trim() + ';');
successCount++;
process.stdout.write(`\rExecuted: ${successCount}/${charStatements.length}`);
} catch (error) {
errorCount++;
const valuesMatch = statement.match(/VALUES\s*\(([^)]+)\)/);
const values = valuesMatch ? valuesMatch[1] : 'N/A';
console.error(`\n✗ Error at statement ${i + 1}:`);
console.error(` Values: ${values}`);
console.error(` Message: ${error.message}`);
}
}
}
console.log(`\n\n✓ Characters imported!`);
console.log(` Success: ${successCount}`);
console.log(` Errors: ${errorCount}`);
totalSuccess += successCount;
totalErrors += errorCount;
} else {
console.log('\n⚠ No characters.sql found, skipping...\n');
}
console.log(`\n=== Total Import Summary ===`);
console.log(` Total Success: ${totalSuccess}`);
console.log(` Total Errors: ${totalErrors}\n`);
} catch (error) {
console.error('✗ Import failed:', error.message);
process.exit(1);
}
}
importSQL().catch(console.error);

672
scripts/scrape-onepiece.js Normal file
View File

@@ -0,0 +1,672 @@
import * as cheerio from 'cheerio';
import fs from 'fs';
import { createObjectCsvWriter } from 'csv-writer';
const FANDOM_BASE_URL = 'https://onepiece.fandom.com/fr/wiki';
const OUTPUT_DIR = './scraped-data';
const DEVIL_FRUIT_CONCURRENCY = 5;
const CHARACTER_CONCURRENCY = 10;
// Create output directory
if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
/**
* Normalize string by removing accents and converting to lowercase
*/
function normalizeId(str) {
return decodeURIComponent(str)
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[,:]/g, '')
.toLowerCase();
}
/**
* Fetch all devil fruits URLs from One Piece fandom
*/
async function fetchAllDevilFruitsUrl() {
try {
const url = `${FANDOM_BASE_URL}/Fruits_du_Démon`;
console.log('Fetching devil fruits list...');
const response = await fetch(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
},
});
const data = await response.text();
const $ = cheerio.load(data);
const devilFruits = [];
// Find the main navibox table
$('table.navibox.toccolours').each((mainTableIndex, mainTable) => {
const mainHeader = $(mainTable).find('th[colspan="3"]').first().find('span').last().text().trim();
if (mainHeader !== 'Fruits du Démon') return;
$(mainTable).find('table.collapsible').each((typeTableIndex, typeTable) => {
const typeHeader = $(typeTable).find('th[colspan="3"]').first().text().trim();
let type = null;
if (typeHeader.includes('Paramecia')) type = 'Paramecia';
else if (typeHeader.includes('Zoan')) type = 'Zoan';
else if (typeHeader.includes('Logia')) type = 'Logia';
else if (typeHeader.includes('Type Inconnu')) type = 'Unknown';
if (!type) return;
$(typeTable).find('tr.navibox-row').each((rowIndex, row) => {
const categoryHeader = $(row).find('th').text().trim();
if (!categoryHeader.includes('Canon') &&
!categoryHeader.includes('Standards') &&
!categoryHeader.includes('Antiques') &&
!categoryHeader.includes('Mythiques') &&
!categoryHeader.includes('Hors-Série')) {
return;
}
// Find all links in the row
$(row).find('td .hlist ul li a').each((linkIndex, link) => {
const name = $(link).text().trim();
const href = $(link).attr('href');
if (name && href && href.startsWith('/fr/wiki/')) {
// Clean the URL
const cleanUrl = href.replace('/fr/wiki/', '');
// Skip classification pages and category pages
if (cleanUrl.includes('Classification') ||
cleanUrl.includes('Catégorie:') ||
cleanUrl === 'Fruits_du_Démon_Artificiels' ||
cleanUrl === 'SMILE') {
return;
}
devilFruits.push({
id: normalizeId(cleanUrl),
name,
type,
url: cleanUrl,
});
}
});
});
});
});
console.log(`Found ${devilFruits.length} devil fruits.`);
return devilFruits;
} catch (error) {
console.error('Error fetching devil fruits list:', error.message);
return [];
}
}
/**
* Fetch devil fruit data from fandom using provided URL
*/
async function fetchDevilFruit(devilFruitUrl, devilFruitId, devilFruitName, devilFruitType) {
try {
console.log(`Fetching: ${devilFruitName}...`);
const response = await fetch(`${FANDOM_BASE_URL}/${devilFruitUrl}`, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
},
});
const data = await response.text();
const $ = cheerio.load(data);
// Extract devil fruit name from page title if different
const name = $('h1.mw-page-title-main').text().trim() || devilFruitName;
// Use the type from the list page
const type = devilFruitType;
return {
id: devilFruitId,
name,
type
};
} catch (error) {
console.error(`Error fetching ${devilFruitName}:`, error.message);
return null;
}
}
/**
* Save devil fruits to JSON
*/
async function saveDevilFruitsToJSON(devilFruits) {
const filepath = `${OUTPUT_DIR}/devil-fruits.json`;
fs.writeFileSync(filepath, JSON.stringify(devilFruits, null, 2));
console.log(`✓ Saved to ${filepath}`);
}
/**
* Save devil fruits to SQL
*/
function saveDevilFruitsToSQL(devilFruits) {
const filepath = `${OUTPUT_DIR}/devil-fruits.sql`;
const escapeSql = (value) => (value ? `'${String(value).replace(/'/g, "''")}'` : 'NULL');
let sql = '';
devilFruits.forEach((df) => {
sql += `INSERT INTO devilFruit (id, name, type) \n`;
sql += `VALUES (${escapeSql(df.id)}, ${escapeSql(df.name)}, ${escapeSql(df.type)}) \n`;
sql += `ON CONFLICT(id) DO UPDATE SET \n`;
sql += ` name = excluded.name,\n`;
sql += ` type = excluded.type;\n\n`;
});
fs.writeFileSync(filepath, sql);
console.log(`✓ Saved to ${filepath}`);
}
/**
* Fetch all cannon characters from One Piece fandom
*/
async function fetchAllCharactersUrl() {
try {
const url = `${FANDOM_BASE_URL}/Liste_des_Personnages_Canon`;
console.log('Fetching character list...');
const response = await fetch(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
},
});
const data = await response.text();
const $ = cheerio.load(data);
const characters = [];
$('table.wikitable tbody tr').each((index, element) => {
if (index === 0) return; // Skip header row
const charpictureUrl = $(element).find('td:nth-child(1) a img').attr('data-src') || $(element).find('td:nth-child(1) a img').attr('src');
const charLink = $(element).find('td:nth-child(2) a').attr('href');
const charName = $(element).find('td:nth-child(2) a').text().trim();
if (charLink) {
const cleanUrl = charLink.replace('/fr/wiki/', '');
characters.push({
id: normalizeId(cleanUrl),
name: charName,
url: cleanUrl,
pictureUrl: charpictureUrl,
});
}
});
console.log(`Found ${characters.length} characters.`);
return characters;
} catch (error) {
console.error('Error fetching character list:', error.message);
return [];
}
}
/**
* Fetch character data from fandom using provided URL
*/
async function fetchCharacter(characterUrl, characterId, characterName, characterpictureUrl) {
try {
console.log(`Fetching: ${characterName}...`);
const response = await fetch(`${FANDOM_BASE_URL}/${characterUrl}`, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
},
});
// Log response status for debugging
const data = await response.text();
const $ = cheerio.load(data);
// Extract character name
const name = $('h1.mw-page-title-main').text().trim() || characterName.replace(/_/g, ' ');
// Extract gender from the specific categories link
let gender = null;
if ($('.page-header__categories a[title="Catégorie:Personnages Masculins"]').length > 0) {
gender = 'Male';
} else if ($('.page-header__categories a[title="Catégorie:Personnages Féminins"]').length > 0) {
gender = 'Female';
}
// Extract age
const age = extractAge($);
// Extract affiliations
const affiliations = extractAffiliations($);
// Extract devil fruit
const devilFruit = await extractDevilFruit($);
// Extract haki
let haki = [];
if ($('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'observation"]').length > 0) {
haki.push('Observation');
}
if ($('.page-header__categories a[title="Catégorie:Utilisateurs du Haki de l\'armement"]').length > 0) {
haki.push('Armament');
}
if ($('.page-header__categories a[title="Catégorie:Utilisateurs du Haki des rois"]').length > 0) {
haki.push('Conqueror');
}
// Extract bounty
const bounty = extractBounty($);
// Extract height
const height = extractHeight($);
// Extract first appearance
const firstAppearance = extractFirstAppearance($);
// Extract origin
const origin = extractOrigin($);
// Extract image URL and clean it
let pictureUrl = characterpictureUrl;
return {
id: characterId,
name,
gender,
age,
height,
origin,
devilFruit,
affiliations,
bounty,
haki,
firstAppearance,
pictureUrl
};
} catch (error) {
console.error(`Error fetching ${characterName}:`, error.message);
return null;
}
}
/**
* Extract age from infobox
*/
function extractAge($) {
const div = $('[data-source="âge"] .pi-data-value');
if (div.length === 0) return null;
let text = div.html();
if (!text) return null;
// Remove all sup blocks (citations)
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
// Get the last element and extract only digits
const parts = text.split('<br');
const lastPart = parts[parts.length - 1];
let cleanText = lastPart.replace(/<[^>]*>/g, '').trim();
// Remove content with parentheses
cleanText = cleanText.replace(/\([^)]*\)/g, '');
const digitsOnly = cleanText.replace(/\D/g, '');
return digitsOnly || null;
}
/**
* Extract affiliations from infobox
*/
function extractAffiliations($) {
const div = $('[data-source="affiliation"] .pi-data-value');
if (div.length === 0) return [];
const cleanedDiv = div.clone();
cleanedDiv.find('sup').remove();
let text = cleanedDiv.html();
if (!text) return [];
// Extract all link values
const linkValues = cleanedDiv.find('a').map((i, el) => $(el).text().trim()).get();
if (linkValues.length > 0) {
return linkValues;
}
// Fallback to parsing text
const cleanText = text.replace(/<[^>]*>/g, '').trim();
const parts = cleanText.split(/\s*\n\s*|\s*;\s*|\s*,\s*/).filter(Boolean);
return parts.length > 0 ? parts : [];
}
/**
* Extract devil fruit from infobox
*/
async function extractDevilFruit($) {
const link = $('[data-source="dfnom"] .pi-data-value a').first();
if (link.length === 0) return null;
const href = link.attr('href');
if (!href || !href.startsWith('/fr/wiki/')) return null;
const cleanUrl = href.replace('/fr/wiki/', '');
try {
// Fetch the page to follow redirects
const response = await fetch(`${FANDOM_BASE_URL}/${cleanUrl}`, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)',
},
redirect: 'follow' // Explicitly follow redirects
});
// Check if response was a redirect (301, 302, etc.)
if (response.status === 301 || response.status === 302) {
// Use the final redirected URL
const finalUrl = new URL(response.url);
const pathname = finalUrl.pathname;
const finalPath = pathname.replace('/fr/wiki/', '');
if (finalPath) {
return normalizeId(finalPath);
}
} else {
// Use the current URL if no redirect
const finalUrl = new URL(response.url);
const pathname = finalUrl.pathname;
const finalPath = pathname.replace('/fr/wiki/', '');
if (finalPath) {
return normalizeId(finalPath);
}
}
} catch (error) {
console.error(`Error fetching devil fruit page: ${error.message}`);
}
// Fallback to the original href
return normalizeId(cleanUrl);
}
/**
* Extract bounty from infobox
*/
function extractBounty($) {
const div = $('[data-source="prime"] .pi-data-value');
if (div.length === 0) return null;
let text = div.html();
if (!text) return null;
// Remove all sup blocks (citations)
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
// Extract the first value before any <br> tag
const firstValue = text.split('<br')[0].trim();
let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
// Remove spaces and dots
cleanText = cleanText.replace(/[\s.]/g, '');
return cleanText || null;
}
/**
* Extract height from infobox
*/
function extractHeight($) {
const div = $('[data-source="taille"] .pi-data-value');
if (div.length === 0) return null;
let text = div.html();
if (!text) return null;
// Remove all sup blocks (citations)
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
// Extract the last value after any <br> tag
const lastValue = text.split('<br>').pop().trim();
let cleanText = lastValue.replace(/<[^>]*>/g, '').trim();
// Remove content with parentheses
cleanText = cleanText.replace(/\([^)]*\)/g, '');
// Normalize units for meters or centimeters
const normalized = cleanText.toLowerCase().replace(/\s/g, '');
if (normalized.includes('cm')) {
const digitsOnly = normalized.replace(/\D/g, '');
return digitsOnly || null;
}
if (normalized.includes('m')) {
const parts = normalized.split('m').filter(Boolean);
return parts.length > 0 ? parts.join('.') : null;
}
return normalized.replace(/\D/g, '') || null;
}
/**
* Extract first appearance from infobox
*/
function extractFirstAppearance($) {
const div = $('[data-source="première"] .pi-data-value');
if (div.length === 0) return null;
let text = div.html();
if (!text) return null;
// Remove all sup blocks (citations)
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
// Extract digits after "Chapitre"
const cleanText = text.replace(/<[^>]*>/g, '').trim();
const match = cleanText.match(/Chapitre\s+(\d+)/i);
return match ? match[1] : null;
}
/**
* Extract origin from infobox
*/
function extractOrigin($) {
const div = $('[data-source="origine"] .pi-data-value');
if (div.length === 0) return null;
let text = div.html();
if (!text) return null;
// Remove all sup blocks (citations)
text = text.replace(/<sup[^>]*>.*?<\/sup>/gi, '');
// Extract the first value before any <br> tag
const firstValue = text.split('<br')[0].trim();
let cleanText = firstValue.replace(/<[^>]*>/g, '').trim();
// Remove content with parentheses
cleanText = cleanText.replace(/\([^)]*\)/g, '').trim();
return cleanText || null;
}
/**
* Save data to JSON
*/
async function saveToJSON(characters) {
const filepath = `${OUTPUT_DIR}/characters.json`;
fs.writeFileSync(filepath, JSON.stringify(characters, null, 2));
console.log(`✓ Saved to ${filepath}`);
}
/**
* Save data to CSV
*/
async function saveToCSV(characters) {
const filepath = `${OUTPUT_DIR}/characters.csv`;
const csvWriter = createObjectCsvWriter({
path: filepath,
header: [
{ id: 'id', title: 'ID' },
{ id: 'name', title: 'Name' },
{ id: 'gender', title: 'Gender' },
{ id: 'age', title: 'Age' },
{ id: 'height', title: 'Height' },
{ id: 'origin', title: 'Origin' },
{ id: 'devilFruit', title: 'Devil Fruit' },
{ id: 'affiliations', title: 'Affiliations' },
{ id: 'bounty', title: 'Bounty' },
{ id: 'haki', title: 'Haki' },
{ id: 'firstAppearance', title: 'First Appearance' },
{ id: 'pictureUrl', title: 'Image URL' }
],
});
const records = characters
.filter((c) => c !== null)
.map((c) => ({
id: c.id || '',
name: c.name || '',
gender: c.gender || '',
age: c.age || '',
height: c.height || '',
origin: c.origin || '',
devilFruit: c.devilFruit || '',
affiliations: Array.isArray(c.affiliations) ? c.affiliations.join(', ') : (c.affiliations || ''),
bounty: c.bounty || '',
haki: Array.isArray(c.haki) ? c.haki.join(', ') : (c.haki || ''),
firstAppearance: c.firstAppearance || '',
pictureUrl: c.pictureUrl || ''
}));
await csvWriter.writeRecords(records);
console.log(`✓ Saved to ${filepath}`);
}
/**
* Save data to SQL
*/
function saveToSQL(characters) {
const filepath = `${OUTPUT_DIR}/characters.sql`;
const escapeSql = (value) => (value ? `'${String(value).replace(/'/g, "''")}'` : 'NULL');
let sql = '';
characters
.filter((c) => c !== null)
.forEach((c) => {
const affiliations = Array.isArray(c.affiliations) ? c.affiliations.join(', ') : c.affiliations;
const hakiValue = Array.isArray(c.haki) && c.haki.length > 0 ? JSON.stringify(c.haki) : null;
sql += `INSERT INTO character (id, name, gender, age, height, origin, devilFruit, affiliations, bounty, haki, firstAppearance, pictureUrl) \n`;
sql += `VALUES (${escapeSql(c.id)}, ${escapeSql(c.name)}, ${escapeSql(c.gender)}, ${escapeSql(c.age)}, ${escapeSql(c.height)}, ${escapeSql(c.origin)}, ${escapeSql(c.devilFruit)}, ${escapeSql(affiliations)}, ${escapeSql(c.bounty)}, ${escapeSql(hakiValue)}, ${escapeSql(c.firstAppearance)}, ${escapeSql(c.pictureUrl)}) \n`;
sql += `ON CONFLICT(id) DO UPDATE SET \n`;
sql += ` name = excluded.name,\n`;
sql += ` gender = excluded.gender,\n`;
sql += ` age = excluded.age,\n`;
sql += ` height = excluded.height,\n`;
sql += ` origin = excluded.origin,\n`;
sql += ` devilFruit = excluded.devilFruit,\n`;
sql += ` affiliations = excluded.affiliations,\n`;
sql += ` bounty = excluded.bounty,\n`;
sql += ` haki = excluded.haki,\n`;
sql += ` firstAppearance = excluded.firstAppearance,\n`;
sql += ` pictureUrl = excluded.pictureUrl;\n\n`;
});
fs.writeFileSync(filepath, sql);
console.log(`✓ Saved to ${filepath}`);
}
/**
* Main execution
*/
async function main() {
const format = process.argv[2] || 'all'; // json, csv, sql, or all
console.log(`\nOne Piece Scraper - Mode: ${format}\n`);
// Step 1: Scraping Devil Fruits
console.log('=== Step 1: Scraping Devil Fruits ===\n');
const devilFruitList = await fetchAllDevilFruitsUrl();
if (devilFruitList.length === 0) {
console.warn('No devil fruits found, continuing with characters...\n');
} else {
const devilFruits = [];
for (let i = 0; i < devilFruitList.length; i += DEVIL_FRUIT_CONCURRENCY) {
const batch = devilFruitList.slice(i, i + DEVIL_FRUIT_CONCURRENCY);
const results = await Promise.all(
batch.map((df) => fetchDevilFruit(df.url, df.id, df.name, df.type))
);
results.filter(Boolean).forEach((data) => {
console.table({
ID: data.id,
Name: data.name,
Type: data.type
});
devilFruits.push(data);
});
}
console.log(`\n✓ Scraped ${devilFruits.length} devil fruits\n`);
if (format === 'json' || format === 'all') {
await saveDevilFruitsToJSON(devilFruits);
}
if (format === 'sql' || format === 'all') {
saveDevilFruitsToSQL(devilFruits);
}
}
// Step 2: Scraping Characters
console.log('=== Step 2: Scraping Characters ===\n');
const characterList = await fetchAllCharactersUrl();
if (characterList.length === 0) {
console.error('No characters found. Exiting.');
return;
}
const characters = [];
for (let i = 0; i < characterList.length; i += CHARACTER_CONCURRENCY) {
const batch = characterList.slice(i, i + CHARACTER_CONCURRENCY);
const results = await Promise.all(
batch.map((char) => fetchCharacter(char.url, char.id, char.name, char.pictureUrl))
);
results.filter(Boolean).forEach((data) => {
console.table({
ID: data.id,
Name: data.name,
Gender: data.gender,
Age: data.age,
Affiliations: data.affiliations.join(', '),
DevilFruit: data.devilFruit,
Haki: data.haki.join(', '),
Height: data.height,
Bounty: data.bounty,
Origin: data.origin,
FirstAppearance: data.firstAppearance,
pictureUrl: data.pictureUrl
});
characters.push(data);
});
}
console.log(`\n✓ Scraped ${characters.length} characters\n`);
if (format === 'json' || format === 'all') {
await saveToJSON(characters);
}
if (format === 'csv' || format === 'all') {
await saveToCSV(characters);
}
if (format === 'sql' || format === 'all') {
saveToSQL(characters);
}
console.log('\n✓ Done!\n');
}
main().catch(console.error);