refactor: update package.json and scripts for One Piece scraper

- Changed the scrape script to use tsx for TypeScript execution.
- Added new TypeScript script for scraping One Piece data.
- Refactored package.json to include dependencies for the new scraper.
- Removed unused dependencies and organized devDependencies.

feat: implement One Piece data scraping functionality

- Added functionality to scrape arcs, characters, and devil fruits from One Piece fandom.
- Implemented data extraction methods for character attributes and devil fruit details.
- Added JSON and CSV export capabilities for scraped data.

fix: update auth configuration to handle missing secret

- Modified the auth configuration to use a default secret if BETTER_AUTH_SECRET is not set.

fix: improve database client initialization

- Updated database client creation to use a local database file if DATABASE_URL is not set.

chore: switch Svelte adapter to node

- Changed Svelte adapter from auto to node for better server-side rendering support.
This commit is contained in:
2026-03-01 15:17:17 +01:00
parent b8b3f8bddc
commit 56bd6f5545
10 changed files with 1976 additions and 666 deletions

View File

@@ -10,4 +10,3 @@ coverage
.env
.env.*
local.db
drizzle/meta

View File

@@ -5,6 +5,7 @@ COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build
FROM node:24-alpine AS runner
@@ -12,15 +13,10 @@ WORKDIR /app
ENV NODE_ENV=production
COPY --from=builder /app ./
COPY docker-entrypoint.sh /app/docker-entrypoint.sh
RUN chmod +x /app/docker-entrypoint.sh
# Create non-root user
RUN addgroup -g 1000 node && adduser -D -u 1000 -G node node
RUN chown -R node:node /app
USER node
EXPOSE 4173
EXPOSE 3000
ENTRYPOINT ["/app/docker-entrypoint.sh"]

View File

@@ -1,9 +1,9 @@
#!/bin/bash
#!/bin/sh
set -e
# Migrate the database
npm run db:migrate
# Start the production server
exec npm run preview --host 0.0.0.0
node build/index.js

View File

@@ -1,11 +1,9 @@
import { defineConfig } from 'drizzle-kit';
if (!process.env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
export default defineConfig({
schema: './src/lib/server/db/schema.ts',
dialect: 'sqlite',
dbCredentials: { url: process.env.DATABASE_URL },
dbCredentials: { url: process.env.DATABASE_URL || 'file:local.db' },
verbose: true,
strict: true
});

2419
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -19,24 +19,19 @@
"db:import": "npx tsx scripts/import-json.ts",
"db:set-daily-mode": "npx tsx scripts/set-daily-mode.ts",
"auth:schema": "npx @better-auth/cli generate --config src/lib/server/auth.ts --output src/lib/server/db/auth.schema.ts --yes",
"scrape": "node scripts/scrape-onepiece.js"
"scrape": "npx tsx scripts/scrape-onepiece.ts"
},
"devDependencies": {
"@eslint/compat": "^2.0.2",
"@eslint/js": "^9.39.2",
"@libsql/client": "^0.17.0",
"@sveltejs/adapter-auto": "^7.0.0",
"@sveltejs/kit": "^2.50.2",
"@sveltejs/vite-plugin-svelte": "^6.2.4",
"@tailwindcss/forms": "^0.5.11",
"@tailwindcss/typography": "^0.5.19",
"@tailwindcss/vite": "^4.1.18",
"@types/node": "^24",
"better-auth": "^1.4.18",
"cheerio": "^1.0.0-rc.12",
"csv-writer": "^1.6.0",
"drizzle-kit": "^0.31.8",
"drizzle-orm": "^0.45.1",
"eslint": "^9.39.2",
"eslint-config-prettier": "^10.1.8",
"eslint-plugin-svelte": "^3.14.0",
@@ -50,5 +45,13 @@
"typescript": "^5.9.3",
"typescript-eslint": "^8.54.0",
"vite": "^7.3.1"
},
"dependencies": {
"tsx": "^4.21.0",
"drizzle-orm": "^0.45.1",
"drizzle-kit": "^0.31.8",
"better-auth": "^1.4.18",
"@sveltejs/adapter-node": "^5.5.4",
"@libsql/client": "^0.17.0"
}
}

View File

@@ -1,32 +1,71 @@
import * as cheerio from 'cheerio';
import fs from 'fs';
import https from 'https';
import { createObjectCsvWriter } from 'csv-writer';
// Type definitions
interface Arc {
id: string;
name: string;
startChapter: number;
endChapter: number | null;
url: string;
}
interface Character {
id: string;
name: string;
gender: string | null;
age: number | null;
height: number | null;
origin: string | null;
devilFruitId: string | null;
devilFruitUrl: string | null;
affiliations: string[];
bounty: number | null;
hakiObservation: boolean;
hakiArmament: boolean;
hakiConqueror: boolean;
epithets: string[];
firstAppearance: number;
status: string | null;
pictureUrl: string | null;
url: string;
arcId?: string;
}
interface CharacterListItem {
name: string;
url: string;
pictureUrl: string | null;
chapter: string;
}
interface DevilFruitData {
devilFruitId: string;
devilFruitUrl: string;
}
interface DevilFruit {
id: string;
name: string;
type: string | null;
url: string;
}
const FANDOM_BASE_URL = 'https://onepiece.fandom.com/fr/wiki';
const OUTPUT_DIR = './scraped-data';
const MAX_RETRIES = 0; // Set to 0 to disable retries, can be increased if needed
const INITIAL_RETRY_DELAY = 1000;
// Keep same HTTP session like a normal browser - maintain connection pool and allow cookie persistence
const httpsAgent = new https.Agent({
keepAlive: true,
keepAliveMsecs: 1000,
maxFreeSockets: 10,
maxSockets: 50,
maxConnections: 50,
timeout: 30000
});
// Store cookies across requests (simulate browser behavior)
const cookies = new Map();
const cookies = new Map<string, string>();
function getCookieHeader() {
function getCookieHeader(): string {
const cookieArray = Array.from(cookies.values()).map(c => c.split(';')[0]);
return cookieArray.length > 0 ? cookieArray.join('; ') : '';
}
function saveCookies(setCookieHeader) {
function saveCookies(setCookieHeader: string | string[] | null): void {
if (setCookieHeader) {
const cookiesList = Array.isArray(setCookieHeader) ? setCookieHeader : [setCookieHeader];
cookiesList.forEach(cookie => {
@@ -45,14 +84,14 @@ if (!fs.existsSync(OUTPUT_DIR)) {
/**
* Retry a fetch request with exponential backoff
*/
async function fetchWithRetry(url, options = {}, retries = 0) {
async function fetchWithRetry(url: string, options: RequestInit = {}, retries: number = 0): Promise<Response> {
try {
const headers = {
const headers: Record<string, string> = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:150.0) Firefox/150.0',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
...options.headers
...((options.headers as Record<string, string>) || {})
};
// Add cookies from previous requests
@@ -63,9 +102,8 @@ async function fetchWithRetry(url, options = {}, retries = 0) {
const response = await fetch(url, {
headers,
agent: httpsAgent,
...options
});
} as any);
// Save cookies from response
const setCookie = response.headers.get('set-cookie');
@@ -92,7 +130,7 @@ async function fetchWithRetry(url, options = {}, retries = 0) {
// If it's a network error and we have retries left, retry
if (retries < MAX_RETRIES) {
const delay = INITIAL_RETRY_DELAY * Math.pow(2, retries);
console.log(`⚠️ Network error: ${error.message}, retrying in ${delay}ms...`);
console.log(`⚠️ Network error: ${(error as Error).message}, retrying in ${delay}ms...`);
await new Promise(resolve => setTimeout(resolve, delay));
return fetchWithRetry(url, options, retries + 1);
}
@@ -106,7 +144,7 @@ async function fetchWithRetry(url, options = {}, retries = 0) {
/**
* Normalize string by decoding URI components, punctuation, and replacing spaces with underscores
*/
function normalizeId(str) {
function normalizeId(str: string): string {
return decodeURIComponent(str)
.normalize('NFD')
.replace(/[,:.\(\)]/g, '')
@@ -117,14 +155,14 @@ function normalizeId(str) {
/**
* Fetch all arcs from One Piece fandom
*/
async function fetchAllArcs() {
async function fetchAllArcs(): Promise<Arc[]> {
try {
const url = `${FANDOM_BASE_URL}/Chapitres_et_Tomes`;
console.log('Fetching arcs list...');
const response = await fetchWithRetry(url);
const data = await response.text();
const $ = cheerio.load(data);
const arcs = [];
const arcs: Arc[] = [];
// Find all arc links in the table
$('table.wikitable td a').each((index, element) => {
@@ -132,7 +170,7 @@ async function fetchAllArcs() {
const href = $(element).attr('href');
// Check if it's an arc link (contains "Arc" and chapter info)
if (text.includes('Arc') && text.includes('Ch.')) {
if (text.includes('Arc') && text.includes('Ch.') && href) {
// Extract arc name and chapter range
// Example text: "Arc Ville d'Orange(Ch.8 à 21)[T.1 à 3]"
console.log(`Processing arc link: ${text} (${href})`);
@@ -164,7 +202,7 @@ async function fetchAllArcs() {
console.log(`Found ${arcs.length} arcs.`);
return arcs;
} catch (error) {
console.error('Error fetching arcs list:', error.message);
console.error('Error fetching arcs list:', (error as Error).message);
return [];
}
}
@@ -172,7 +210,7 @@ async function fetchAllArcs() {
/**
* Save arcs to JSON
*/
async function saveArcsToJSON(arcs) {
async function saveArcsToJSON(arcs: Arc[]): Promise<void> {
const filepath = `${OUTPUT_DIR}/arcs.json`;
fs.writeFileSync(filepath, JSON.stringify(arcs, null, 2));
console.log(`✓ Saved to ${filepath}`);
@@ -181,7 +219,7 @@ async function saveArcsToJSON(arcs) {
/**
* Save arcs to CSV
*/
async function saveArcsToCSV(arcs) {
async function saveArcsToCSV(arcs: Arc[]): Promise<void> {
const filepath = `${OUTPUT_DIR}/arcs.csv`;
const csvWriter = createObjectCsvWriter({
path: filepath,
@@ -211,14 +249,14 @@ async function saveArcsToCSV(arcs) {
/**
* Fetch all cannon characters from One Piece fandom
*/
async function fetchAllCharactersUrl() {
async function fetchAllCharactersUrl(): Promise<CharacterListItem[]> {
try {
const url = `${FANDOM_BASE_URL}/Liste_des_Personnages_Canon`;
console.log('Fetching character list...');
const response = await fetchWithRetry(url);
const data = await response.text();
const $ = cheerio.load(data);
const characters = [];
const characters: CharacterListItem[] = [];
$('table.wikitable tbody tr').each((index, element) => {
if (index === 0) return; // Skip header row
let charpictureUrl = $(element).find('td:nth-child(1) a img').attr('data-src') || $(element).find('td:nth-child(1) a img').attr('src');
@@ -240,7 +278,7 @@ async function fetchAllCharactersUrl() {
characters.push({
name: charName,
url: charUrl,
pictureUrl: charpictureUrl,
pictureUrl: charpictureUrl || null,
chapter: charChapter,
});
}
@@ -248,7 +286,7 @@ async function fetchAllCharactersUrl() {
console.log(`Found ${characters.length} characters.`);
return characters;
} catch (error) {
console.error('Error fetching character list:', error.message);
console.error('Error fetching character list:', (error as Error).message);
return [];
}
}
@@ -256,7 +294,12 @@ async function fetchAllCharactersUrl() {
/**
* Fetch character data from fandom using provided URL
*/
async function fetchCharacter(characterUrl, characterName, characterpictureUrl, characterChapter) {
async function fetchCharacter(
characterUrl: string,
characterName: string,
characterpictureUrl: string | null,
characterChapter: string
): Promise<Character | null> {
try {
console.log(`Fetching: ${characterName}...`);
@@ -269,10 +312,10 @@ async function fetchCharacter(characterUrl, characterName, characterpictureUrl,
let finalCharacterId = normalizeId(characterUrl);
try {
const finalUrl = new URL(response.url);
const characterUrl = finalUrl.pathname.replace('/fr/wiki/', '');
if (characterUrl) {
finalCharacterUrl = characterUrl;
finalCharacterId = normalizeId(characterUrl);
const characterUrlPath = finalUrl.pathname.replace('/fr/wiki/', '');
if (characterUrlPath) {
finalCharacterUrl = characterUrlPath;
finalCharacterId = normalizeId(characterUrlPath);
}
} catch {
// If HTTP is not ok or redirected URL, throw an error to be caught in the outer block
@@ -292,7 +335,7 @@ async function fetchCharacter(characterUrl, characterName, characterpictureUrl,
finalCharacterId = normalizeId(finalCharacterUrl + '_' + name);
// Extract gender from the specific categories link
let gender = null;
let gender: string | null = null;
if ($('.page-header__categories a[title="Catégorie:Personnages Masculins"]').length > 0) {
gender = 'Male';
} else if ($('.page-header__categories a[title="Catégorie:Personnages Féminins"]').length > 0) {
@@ -360,7 +403,7 @@ async function fetchCharacter(characterUrl, characterName, characterpictureUrl,
url: finalCharacterUrl
};
} catch (error) {
console.error(`Error fetching ${characterName}:`, error.message);
console.error(`Error fetching ${characterName}:`, (error as Error).message);
return null;
}
}
@@ -369,7 +412,7 @@ async function fetchCharacter(characterUrl, characterName, characterpictureUrl,
/**
* Extract age from infobox
*/
function extractAge($) {
function extractAge($: cheerio.CheerioAPI): number | null {
const div = $('[data-source="âge"] .pi-data-value');
if (div.length === 0) return null;
@@ -394,7 +437,7 @@ function extractAge($) {
/**
* Extract affiliations from infobox
*/
function extractAffiliations($) {
function extractAffiliations($: cheerio.CheerioAPI): string[] {
const div = $('[data-source="affiliation"] .pi-data-value');
if (div.length === 0) return [];
@@ -420,7 +463,7 @@ function extractAffiliations($) {
* Extract epithets from infobox
* Epithets are always between double quotes
*/
function extractEpithets($) {
function extractEpithets($: cheerio.CheerioAPI): string[] {
const div = $('[data-source="épithète"] .pi-data-value');
if (div.length === 0) return [];
@@ -446,7 +489,7 @@ function extractEpithets($) {
* Extract devil fruit from infobox
* Returns both normalized ID and URL
*/
async function extractDevilFruit($) {
async function extractDevilFruit($: cheerio.CheerioAPI): Promise<DevilFruitData | null> {
const link = $('[data-source="dfnom"] .pi-data-value a').first();
if (link.length === 0) return null;
@@ -473,7 +516,7 @@ async function extractDevilFruit($) {
};
}
} catch (error) {
console.error(`Error fetching devil fruit page: ${error.message}`);
console.error(`Error fetching devil fruit page: ${(error as Error).message}`);
}
// Fallback to the original href
@@ -486,7 +529,7 @@ async function extractDevilFruit($) {
/**
* Extract bounty from infobox
*/
function extractBounty($) {
function extractBounty($: cheerio.CheerioAPI): number | null {
const div = $('[data-source="prime"] .pi-data-value');
if (div.length === 0) return 0;
@@ -512,13 +555,13 @@ function extractBounty($) {
// Remove all non-digits
cleanText = cleanText.replace(/\D/g, '');
return cleanText || 0;
return cleanText ? parseInt(cleanText) : 0;
}
/**
* Extract height from infobox
*/
function extractHeight($) {
function extractHeight($: cheerio.CheerioAPI): number | null {
const div = $('[data-source="taille"] .pi-data-value');
if (div.length === 0) return null;
@@ -539,7 +582,7 @@ function extractHeight($) {
content = text.split('<br>').pop();
}
let cleanText = content.replace(/<[^>]*>/g, '').trim();
let cleanText = (content || '').replace(/<[^>]*>/g, '').trim();
// Remove content with parentheses
cleanText = cleanText.replace(/\([^)]*\)/g, '');
@@ -548,21 +591,21 @@ function extractHeight($) {
const normalized = cleanText.toLowerCase().replace(/\s/g, '');
if (normalized.includes('cm')) {
const digitsOnly = normalized.replace(/\D/g, '');
return digitsOnly || null;
return parseFloat(digitsOnly) || null;
}
if (normalized.includes('m')) {
const parts = normalized.split('m').filter(Boolean);
return parts.length > 0 ? parts.join('.') : null;
return parts.length > 0 ? parseFloat(parts.join('.')) : null;
}
return normalized.replace(/\D/g, '') || null;
return normalized.length > 0 ? parseFloat(normalized.replace(/\D/g, '')) : null;
}
/**
* Extract origin from infobox
*/
function extractOrigin($) {
function extractOrigin($: cheerio.CheerioAPI): string | null {
const div = $('[data-source="origine"] .pi-data-value');
if (div.length === 0) return null;
@@ -585,7 +628,7 @@ function extractOrigin($) {
/**
* Extract status from infobox
*/
function extractStatus($) {
function extractStatus($: cheerio.CheerioAPI): string | null {
const div = $('[data-source="statut"] .pi-data-value');
if (div.length === 0) return null;
@@ -604,7 +647,7 @@ function extractStatus($) {
/**
* Save data to JSON
*/
async function saveToJSON(characters) {
async function saveToJSON(characters: Character[]): Promise<void> {
const filepath = `${OUTPUT_DIR}/characters.json`;
fs.writeFileSync(filepath, JSON.stringify(characters, null, 2));
console.log(`✓ Saved to ${filepath}`);
@@ -613,7 +656,7 @@ async function saveToJSON(characters) {
/**
* Save data to CSV
*/
async function saveToCSV(characters) {
async function saveToCSV(characters: Character[]): Promise<void> {
const filepath = `${OUTPUT_DIR}/characters.csv`;
const csvWriter = createObjectCsvWriter({
path: filepath,
@@ -669,7 +712,7 @@ async function saveToCSV(characters) {
/**
* Fetch devil fruit data from fandom using provided URL
*/
async function fetchDevilFruit(devilFruitUrl, devilFruitId) {
async function fetchDevilFruit(devilFruitUrl: string, devilFruitId: string): Promise<DevilFruit | null> {
try {
console.log(`Fetching devil fruit: ${devilFruitId}...`);
@@ -680,7 +723,7 @@ async function fetchDevilFruit(devilFruitUrl, devilFruitId) {
const name = $('span.mw-page-title-main').text().trim();
// Extract type from label in infobox
let type = null;
let type: string | null = null;
const typeDiv = $('[data-source="type"] .pi-data-value');
if (typeDiv.length > 0) {
const typeText = typeDiv.text().trim().toLowerCase();
@@ -700,7 +743,7 @@ async function fetchDevilFruit(devilFruitUrl, devilFruitId) {
url: devilFruitUrl
};
} catch (error) {
console.error(`Error fetching devil fruit ${devilFruitUrl}:`, error.message);
console.error(`Error fetching devil fruit ${devilFruitUrl}:`, (error as Error).message);
return null;
}
}
@@ -708,7 +751,7 @@ async function fetchDevilFruit(devilFruitUrl, devilFruitId) {
/**
* Save devil fruits to JSON
*/
async function saveDevilFruitsToJSON(devilFruits) {
async function saveDevilFruitsToJSON(devilFruits: DevilFruit[]): Promise<void> {
const filepath = `${OUTPUT_DIR}/devil-fruits.json`;
fs.writeFileSync(filepath, JSON.stringify(devilFruits, null, 2));
console.log(`✓ Saved to ${filepath}`);
@@ -717,7 +760,7 @@ async function saveDevilFruitsToJSON(devilFruits) {
/**
* Save devil fruits to CSV
*/
async function saveDevilFruitsToCSV(devilFruits) {
async function saveDevilFruitsToCSV(devilFruits: DevilFruit[]): Promise<void> {
const filepath = `${OUTPUT_DIR}/devil-fruits.csv`;
const csvWriter = createObjectCsvWriter({
path: filepath,
@@ -745,7 +788,7 @@ async function saveDevilFruitsToCSV(devilFruits) {
/**
* Main execution
*/
async function main() {
async function main(): Promise<void> {
const format = process.argv[2] || 'all'; // json, csv, or all
console.log(`\nOne Piece Scraper - Mode: ${format}\n`);
@@ -787,12 +830,12 @@ async function main() {
return;
}
const characters = [];
const devilFruitUrls = new Set();
let failedCharacters = [...characterList];
const characters: Character[] = [];
const devilFruitUrls = new Set<string>();
let failedCharacters: CharacterListItem[] = [...characterList];
while (failedCharacters.length > 0) {
const nextFailedCharacters = [];
const nextFailedCharacters: CharacterListItem[] = [];
console.log(`\nFetching ${failedCharacters.length} characters...`);
for (let i = 0; i < failedCharacters.length; i++) {
@@ -828,7 +871,7 @@ async function main() {
// Add arc IDs to character data
if (data.firstAppearance) {
const arc = arcsList.find(a => a.startChapter <= parseInt(data.firstAppearance) && (a.endChapter === null || a.endChapter >= parseInt(data.firstAppearance)));
const arc = arcsList.find(a => a.startChapter <= data.firstAppearance && (a.endChapter === null || a.endChapter >= data.firstAppearance));
if (arc) {
data.arcId = arc.id;
}
@@ -857,7 +900,7 @@ async function main() {
if (devilFruitUrls.size === 0) {
console.warn('No devil fruits found from characters, skipping...\n');
} else {
const devilFruits = [];
const devilFruits: DevilFruit[] = [];
const devilFruitUrlArray = Array.from(devilFruitUrls);
for (let i = 0; i < devilFruitUrlArray.length; i++) {
@@ -886,7 +929,7 @@ async function main() {
}
// Update characters with normalized devil fruit IDs
const devilFruitMap = new Map(devilFruits.map(df => [df.id, df.id]));
const devilFruitMap = new Map<string, string>(devilFruits.map(df => [df.id, df.id]));
characters.forEach(char => {
if (char.devilFruitUrl) {
const normalizedId = normalizeId(char.devilFruitUrl);

View File

@@ -7,7 +7,7 @@ import { db } from '$lib/server/db';
export const auth = betterAuth({
baseURL: env.ORIGIN,
secret: env.BETTER_AUTH_SECRET,
secret: env.BETTER_AUTH_SECRET || 'secret',
database: drizzleAdapter(db, { provider: 'sqlite' }),
emailAndPassword: { enabled: true },
plugins: [sveltekitCookies(getRequestEvent)] // make sure this is the last plugin in the array

View File

@@ -3,8 +3,6 @@ import { createClient } from '@libsql/client';
import * as schema from './schema';
import { env } from '$env/dynamic/private';
if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
const client = createClient({ url: env.DATABASE_URL });
const client = createClient({ url: env.DATABASE_URL || 'file:local.db' });
export const db = drizzle(client, { schema });

View File

@@ -1,4 +1,4 @@
import adapter from '@sveltejs/adapter-auto';
import adapter from '@sveltejs/adapter-node';
/** @type {import('@sveltejs/kit').Config} */
const config = {