refactor: enhance character data transformation and improve fetching logic in character-related scripts

This commit is contained in:
2026-03-14 18:32:43 +01:00
parent 8b08950719
commit b1cc691422
8 changed files with 129 additions and 102 deletions

View File

@@ -119,6 +119,7 @@ function transformCharacterData(item: CharacterRecord) {
return {
id: item.id,
name: item.name,
frName: toNullable(item.frName),
gender: toNullable(item.gender),
age: toNullable(item.age),
affiliations: toJsonArray(item.affiliations),
@@ -137,7 +138,8 @@ function transformCharacterData(item: CharacterRecord) {
frEpithets: toJsonArray(item.frEpithets),
status: toNullable(item.status),
arcId: toNullable(item.arcId),
url: toNullable(item.url)
url: toNullable(item.url),
frUrl: toNullable(item.frUrl)
};
}

View File

@@ -276,13 +276,12 @@ async function saveArcsToCSV(arcs: Arc[]): Promise<void> {
}
/**
* Fetch all cannon characters from One Piece fandom using API
* Fetch all cannon characters from One Piece fandom, including their full data.
*/
async function fetchAllCharactersUrl(): Promise<CharacterListItem[]> {
async function fetchAllCharacters(arcsList: Arc[]): Promise<Character[]> {
try {
const apiUrl = `${FANDOM_API_BASE}List_of_Canon_Characters`;
console.log('Fetching character list via API...');
const response = await fetchWithRetry(apiUrl);
const response = await fetchWithRetry(`${FANDOM_API_BASE}List_of_Canon_Characters`);
const jsonData = await response.json();
// Extract HTML from API response
@@ -292,7 +291,7 @@ async function fetchAllCharactersUrl(): Promise<CharacterListItem[]> {
}
const $ = cheerio.load(htmlContent);
const characters: CharacterListItem[] = [];
const characterList: CharacterListItem[] = [];
$('table.fandom-table tbody tr').each((index, element) => {
if (index === 0) return; // Skip header row
let charUrl = $(element).find('td:nth-child(2) a').attr('href');
@@ -304,27 +303,99 @@ async function fetchAllCharactersUrl(): Promise<CharacterListItem[]> {
charChapter = charChapter.replace(/\D/g, '');
// If charChapter is empty, skip the character as it means they don't have a proper page and are just mentioned in the list
if (!charChapter) {
return;
}
if (parseInt(charChapter, 10) === 0) {
if (!charChapter || parseInt(charChapter, 10) === 0) {
return;
}
if (charUrl) {
charUrl = charUrl.replace('/wiki/', '');
characters.push({
characterList.push({
name: charName,
url: charUrl,
chapter: parseInt(charChapter, 10)
});
}
});
console.log(`Found ${characters.length} characters.`);
if (characterList.length === 0) {
console.error('No characters found.');
return [];
}
console.log(`Found ${characterList.length} characters.`);
// Fetch the french character list to get the picture URLs
console.log('Fetching French character list via API...');
const frResponse = await fetchWithRetry(`${FR_FANDOM_API_BASE}Liste_des_Personnages_Canon`);
const frJsonData = await frResponse.json();
// Create a map of character name to picture URL from the French list
const frHtmlContent = frJsonData.parse?.text?.['*'];
const fr$ = cheerio.load(frHtmlContent);
const frCharacterPictureMap: Record<string, string> = {};
fr$('table.wikitable tbody tr').each((index, element) => {
if (index === 0) return; // Skip header row
const charName = fr$(element).find('td:nth-child(2) a').text().trim();
const pictureUrl = fr$(element).find('td:nth-child(1) img').attr('data-src') || fr$(element).find('td:nth-child(1) img').attr('src') || null;
if (charName && pictureUrl) {
frCharacterPictureMap[charName] = pictureUrl;
}
});
const characters: Character[] = [];
let failedCharacters: CharacterListItem[] = [...characterList];
while (failedCharacters.length > 0) {
const nextFailedCharacters: CharacterListItem[] = [];
console.log(`\nFetching ${failedCharacters.length} characters...`);
for (let i = 0; i < failedCharacters.length; i += FETCH_CONCURRENCY) {
const batch = failedCharacters.slice(i, i + FETCH_CONCURRENCY);
const batchResults = await Promise.all(
batch.map(async (char) => {
const data = await fetchCharacter(char.url, char.name, char.chapter, arcsList, frCharacterPictureMap);
return { char, data };
})
);
for (const { char, data } of batchResults) {
if (data) {
console.table({
ID: data.id,
Name: data.name,
Gender: data.gender,
Age: data.age,
Status: data.status,
Epithets: data.epithets.join(', '),
Affiliations: data.affiliations.join(', '),
DevilFruitId: data.devilFruitId,
DevilFruitUrl: data.devilFruitUrl,
HakiObservation: data.hakiObservation ? 'Yes' : 'No',
HakiArmament: data.hakiArmament ? 'Yes' : 'No',
HakiConqueror: data.hakiConqueror ? 'Yes' : 'No',
Height: data.height,
Bounty: data.bounty,
Origin: data.origin,
FirstAppearance: data.firstAppearance,
pictureUrl: data.pictureUrl,
FandomURL: data.url
});
characters.push(data);
} else {
nextFailedCharacters.push(char);
}
}
}
failedCharacters = nextFailedCharacters;
if (failedCharacters.length > 0) {
console.log(`⚠️ ${failedCharacters.length} characters failed. Retrying...`);
}
}
console.log(`\n✓ Scraped ${characters.length} characters\n`);
return characters;
} catch (error) {
console.error('Error fetching character list:', (error as Error).message);
console.error('Error fetching characters:', (error as Error).message);
return [];
}
}
@@ -336,7 +407,8 @@ async function fetchCharacter(
characterUrl: string,
characterName: string,
characterChapter: number,
arcsList: Arc[]
arcsList: Arc[],
frCharacterPictureMap: Record<string, string>
): Promise<Character | null> {
try {
console.log(`Fetching: ${characterName}...`);
@@ -453,6 +525,8 @@ async function fetchCharacter(
frName = name;
}
const pictureUrl = frCharacterPictureMap[frName || ''] || null;
return {
id: finalCharacterId,
name,
@@ -475,7 +549,7 @@ async function fetchCharacter(
firstAppearance,
arcId,
status,
pictureUrl: 'Image_Non_Disponible',
pictureUrl,
url: characterUrl,
frUrl
};
@@ -934,72 +1008,17 @@ async function main(): Promise<void> {
}
// Step 2: Scraping Characters
console.log('=== Step 1: Scraping Characters ===\n');
const characterList = await fetchAllCharactersUrl();
console.log('=== Step 2: Scraping Characters ===\n');
const characters = await fetchAllCharacters(arcsList);
if (characterList.length === 0) {
if (characters.length === 0) {
console.error('No characters found. Exiting.');
return;
}
const characters: Character[] = [];
const devilFruitUrls = new Set<string>();
let failedCharacters: CharacterListItem[] = [...characterList];
while (failedCharacters.length > 0) {
const nextFailedCharacters: CharacterListItem[] = [];
console.log(`\nFetching ${failedCharacters.length} characters...`);
for (let i = 0; i < failedCharacters.length; i += FETCH_CONCURRENCY) {
const batch = failedCharacters.slice(i, i + FETCH_CONCURRENCY);
const batchResults = await Promise.all(
batch.map(async (char) => {
const data = await fetchCharacter(char.url, char.name, char.chapter, arcsList);
return { char, data };
})
);
for (const { char, data } of batchResults) {
if (data) {
console.table({
ID: data.id,
Name: data.name,
Gender: data.gender,
Age: data.age,
Status: data.status,
Epithets: data.epithets.join(', '),
Affiliations: data.affiliations.join(', '),
DevilFruitId: data.devilFruitId,
DevilFruitUrl: data.devilFruitUrl,
HakiObservation: data.hakiObservation ? 'Yes' : 'No',
HakiArmament: data.hakiArmament ? 'Yes' : 'No',
HakiConqueror: data.hakiConqueror ? 'Yes' : 'No',
Height: data.height,
Bounty: data.bounty,
Origin: data.origin,
FirstAppearance: data.firstAppearance,
pictureUrl: data.pictureUrl,
FandomURL: data.url
});
if (data.devilFruitUrl) {
devilFruitUrls.add(data.devilFruitUrl);
}
characters.push(data);
} else {
nextFailedCharacters.push(char);
}
}
}
failedCharacters = nextFailedCharacters;
if (failedCharacters.length > 0) {
console.log(`⚠️ ${failedCharacters.length} characters failed. Retrying...`);
}
}
console.log(`\n✓ Scraped ${characters.length} characters\n`);
const devilFruitUrls = new Set<string>(
characters.filter((c) => c.devilFruitUrl).map((c) => c.devilFruitUrl!)
);
console.log(`✓ Found ${devilFruitUrls.size} unique devil fruits\n`);
// Step 3: Scraping Devil Fruits

View File

@@ -1,6 +1,7 @@
<script lang="ts">
import { page } from '$app/stores';
import ProfileButton from '$lib/components/ProfileButton.svelte';
import { resolve } from '$app/paths';
let { children, data } = $props();
@@ -29,9 +30,9 @@
<h2 class="text-lg font-black uppercase tracking-[0.15em] text-amber-50">Admin</h2>
</div>
<nav class="flex-1 space-y-2 px-3">
{#each navItems as item}
{#each navItems as item (item.label)}
<a
href={item.href}
href={resolve(item.href)}
class={`flex items-center gap-3 rounded-lg px-4 py-3 text-sm font-medium transition-colors ${
isActive(item.href, $page.url.pathname)
? 'bg-amber-600 text-white'
@@ -45,7 +46,7 @@
</nav>
<div class="border-t border-white/5 p-3">
<a
href="/"
href={resolve('/')}
class="flex items-center gap-2 rounded-lg px-4 py-3 text-sm font-medium text-gray-300 transition-colors hover:bg-slate-800 hover:text-white"
title="Return to site"
>

View File

@@ -26,6 +26,7 @@ async function upsertCharacterFromScrapeValidation(characterId: string): Promise
.values({
id: scraped.id,
name: scraped.name,
frName: scraped.frName,
gender: scraped.gender,
age: scraped.age,
affiliations: scraped.affiliations,
@@ -36,17 +37,21 @@ async function upsertCharacterFromScrapeValidation(characterId: string): Promise
bounty: scraped.bounty,
height: scraped.height,
origin: scraped.origin,
frOrigin: scraped.frOrigin,
firstAppearance: scraped.firstAppearance,
pictureUrl: scraped.pictureUrl,
epithets: scraped.epithets,
frEpithets: scraped.frEpithets,
status: scraped.status,
arcId: scraped.arcId,
url: scraped.url
url: scraped.url,
frUrl: scraped.frUrl,
})
.onConflictDoUpdate({
target: character.id,
set: {
name: scraped.name,
frName: scraped.frName,
gender: scraped.gender,
age: scraped.age,
affiliations: scraped.affiliations,
@@ -57,12 +62,15 @@ async function upsertCharacterFromScrapeValidation(characterId: string): Promise
bounty: scraped.bounty,
height: scraped.height,
origin: scraped.origin,
frOrigin: scraped.frOrigin,
firstAppearance: scraped.firstAppearance,
pictureUrl: scraped.pictureUrl,
epithets: scraped.epithets,
frEpithets: scraped.frEpithets,
status: scraped.status,
arcId: scraped.arcId,
url: scraped.url
url: scraped.url,
frUrl: scraped.frUrl
}
});
@@ -101,6 +109,7 @@ export async function load() {
const differences: Record<string, { current: any; scraped: any }> = {};
const fieldsToCompare = [
'name',
'frName',
'gender',
'age',
'affiliations',
@@ -111,12 +120,15 @@ export async function load() {
'bounty',
'height',
'origin',
'frOrigin',
'firstAppearance',
'pictureUrl',
'epithets',
'frEpithets',
'status',
'arcId',
'url'
'url',
'frUrl'
];
for (const field of fieldsToCompare) {

View File

@@ -1,6 +1,4 @@
<script lang="ts">
import { page } from '$app/stores';
let { data, form } = $props();
const newCharacters = $derived(data.changes.filter((c: any) => c.type === 'new'));

View File

@@ -13,7 +13,10 @@
let { data }: Props = $props();
let configItems = $state<ConfigItem[]>([]);
let configItems = $derived(data.config.map((item) => ({
key: item.key,
value: item.value ?? ''
})));
let newKey = $state('');
let newValue = $state('');
let editingKey = $state<string | null>(null);
@@ -21,12 +24,7 @@
let isSaving = $state(false);
let saveMessage = $state<{ type: 'success' | 'error'; text: string } | null>(null);
$effect(() => {
configItems = data.config.map((item) => ({
key: item.key,
value: item.value ?? ''
}));
});
;
const startEdit = (item: ConfigItem) => {
editingKey = item.key;
@@ -70,6 +68,7 @@
saveMessage = { type: 'error', text: 'Failed to add config' };
}
} catch (error) {
console.error('Error adding config:', error);
saveMessage = { type: 'error', text: 'Error adding config' };
} finally {
isSaving = false;
@@ -99,6 +98,7 @@
saveMessage = { type: 'error', text: 'Failed to delete config' };
}
} catch (error) {
console.error('Error deleting config:', error);
saveMessage = { type: 'error', text: 'Error deleting config' };
} finally {
isSaving = false;
@@ -155,7 +155,7 @@
</tr>
</thead>
<tbody>
{#each configItems as item}
{#each configItems as item (item.key)}
{#if editingKey === item.key}
<tr class="border-b border-white/5 bg-slate-800/50">
<td class="px-6 py-4 text-sm text-white">{item.key}</td>

View File

@@ -11,7 +11,6 @@
let searchQuery = $state('');
let filterType = $state<'all' | 'Paramecia' | 'Zoan' | 'Logia' | 'Unknown'>('all');
let isEditModalOpen = $state(false);
let selectedFruitId = $state<string | null>(null);
let isSaving = $state(false);
let saveMessage = $state<{ type: 'success' | 'error'; text: string } | null>(null);
@@ -33,14 +32,12 @@
});
const openEditModal = (fruit: any) => {
selectedFruitId = fruit.id;
editForm = { ...fruit };
isEditModalOpen = true;
};
const closeModal = () => {
isEditModalOpen = false;
selectedFruitId = null;
editForm = {
id: '',
name: '',
@@ -88,6 +85,7 @@
}, 3000);
}
} catch (error) {
console.error('Error deleting devil fruit:', error);
saveMessage = {
type: 'error',
text: 'Error deleting devil fruit'
@@ -150,7 +148,7 @@
</tr>
</thead>
<tbody>
{#each filteredFruits as fruit}
{#each filteredFruits as fruit (fruit.id)}
<tr class="border-b border-white/5 hover:bg-slate-800/50">
<td class="px-6 py-4 text-sm text-white">{fruit.name}</td>
<td class="px-6 py-4 text-sm">
@@ -233,7 +231,7 @@
bind:value={editForm.type}
class="mt-1 w-full rounded-lg bg-slate-700 px-4 py-2 text-sm text-white outline-none transition focus:ring-2 focus:ring-amber-600"
>
{#each fruitTypes as type}
{#each fruitTypes as type (type)}
<option value={type}>{type}</option>
{/each}
</select>

View File

@@ -13,7 +13,6 @@
let isEditModalOpen = $state(false);
let isSaving = $state(false);
let saveMessage = $state<{ type: 'success' | 'error'; message: string } | null>(null);
let selectedUserId = $state<string | null>(null);
let editForm = $state<any>({
id: '',
@@ -35,7 +34,6 @@
});
const openEditModal = (usr: any) => {
selectedUserId = usr.id;
editForm = { ...usr };
isEditModalOpen = true;
saveMessage = null;
@@ -43,7 +41,6 @@
const closeModal = () => {
isEditModalOpen = false;
selectedUserId = null;
editForm = {
id: '',
name: '',
@@ -120,7 +117,7 @@
</tr>
</thead>
<tbody>
{#each filteredUsers as usr}
{#each filteredUsers as usr (usr.id)}
<tr class="border-b border-white/5 hover:bg-slate-800/50">
<td class="px-6 py-4 text-sm text-white">{usr.name}</td>
<td class="px-6 py-4 text-sm text-gray-400">{usr.email}</td>