agency-skills-suite/agency-archivist/scripts/extract_archive.js
AgentePotente 94642c3501 PORTING: Python → Node.js per agency-archivist scripts
Motivazione:
- Node.js già installato (v25.7.0), zero privilegi necessari
- Nessuna dipendenza npm richiesta (usa built-in modules)
- Tool di sistema per estrazione: unzip, tar, identify (ImageMagick)
- Più gestibile in ambienti senza sudo

Cambiamenti:
- extract_archive.py → extract_archive.js (11.6KB)
  - Usa execSync per unzip/tar/unrar
  - Stessa logica, zero dipendenze esterne

- scan_resources.py → scan_resources.js (13.4KB)
  - Usa ImageMagick identify per metadata immagini
  - ffprobe opzionale per video
  - Genera tag e use case automaticamente

- generate_catalog.py → generate_catalog.js (8.7KB)
  - Stesso output markdown
  - Zero dipendenze

- README.md aggiornato con comandi Node.js
- SKILL.md aggiornato con riferimenti corretti

Dipendenze opzionali (tool di sistema):
- unrar: Supporto archivi RAR
- ffmpeg/ffprobe: Metadata video avanzati
2026-03-10 23:44:14 +01:00

366 lines
11 KiB
JavaScript
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* extract_archive.js — Estrae archivi (zip, tar, rar) e organizza risorse
*
* Usage:
* node extract_archive.js <path_or_url> --client <client_name>
* node extract_archive.js brand_assets.zip --client demo_co_srl
* node extract_archive.js https://example.com/assets.zip --client demo_co_srl
*
* Options:
* --keep-archive Mantieni file originale
* --verbose Log dettagliato
* --dry-run Simula senza estrazione
*/
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
const os = require('os');
// Mapping parole chiave → cartelle
const CATEGORY_KEYWORDS = {
'images/logo': ['logo', 'marchio', 'brand', 'logotipo'],
'images/prodotto': ['prodotto', 'product', 'item', 'articolo'],
'images/team': ['team', 'staff', 'ufficio', 'office', 'persone', 'people'],
'images/stock': ['sfondo', 'background', 'texture', 'stock'],
'videos/promo': ['promo', 'reel', 'trailer', 'advertisement'],
'videos/tutorial': ['tutorial', 'howto', 'demo', 'dimostrazione', 'guida'],
'documents/brand': ['brand', 'guideline', 'manual', 'linee guida'],
'documents/product': ['scheda', 'datasheet', 'spec', 'specifiche'],
};
function getFileType(filename) {
const ext = path.extname(filename).toLowerCase().slice(1);
const imageExts = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'svg', 'bmp', 'tiff'];
const videoExts = ['mp4', 'mov', 'avi', 'mkv', 'webm', 'wmv'];
const docExts = ['pdf', 'doc', 'docx', 'txt', 'md', 'ppt', 'pptx', 'xls', 'xlsx'];
if (imageExts.includes(ext)) return 'images';
if (videoExts.includes(ext)) return 'videos';
if (docExts.includes(ext)) return 'documents';
return 'other';
}
function categorizeFile(filename, fileType) {
const filenameLower = filename.toLowerCase();
for (const [category, keywords] of Object.entries(CATEGORY_KEYWORDS)) {
const baseType = category.split('/')[0];
if (baseType === fileType) {
for (const keyword of keywords) {
if (filenameLower.includes(keyword)) {
return category;
}
}
}
}
return fileType !== 'other' ? `${fileType}/` : 'misc/';
}
function formatSize(bytes) {
const units = ['B', 'KB', 'MB', 'GB'];
let size = bytes;
for (const unit of units) {
if (size < 1024) return `${size.toFixed(1)} ${unit}`;
size /= 1024;
}
return `${size.toFixed(1)} TB`;
}
function downloadFile(url, destPath, verbose = false) {
try {
if (verbose) console.log(`📥 Download: ${url}`);
// Usa curl o wget (più affidabili di https module per download grandi)
execSync(`curl -L -o "${destPath}" "${url}"`, { stdio: verbose ? 'inherit' : 'pipe' });
if (verbose) console.log(`✅ Download completato: ${destPath}`);
return true;
} catch (error) {
console.error(`❌ Errore download: ${error.message}`);
return false;
}
}
function extractArchive(archivePath, extractTo, verbose = false) {
const filename = path.basename(archivePath);
const ext = path.extname(filename).toLowerCase();
try {
// Crea cartella temporanea
if (!fs.existsSync(extractTo)) {
fs.mkdirSync(extractTo, { recursive: true });
}
if (ext === '.zip') {
execSync(`unzip -o "${archivePath}" -d "${extractTo}"`, {
stdio: verbose ? 'inherit' : 'pipe'
});
// Lista file estratti
const output = execSync(`unzip -l "${archivePath}" | tail -n +4 | head -n -2`, { encoding: 'utf8' });
return output.split('\n').filter(line => line.trim()).map(line => {
const parts = line.trim().split(/\s+/);
return parts[parts.length - 1];
});
} else if (ext === '.gz' && filename.includes('.tar')) {
execSync(`tar -xzf "${archivePath}" -C "${extractTo}"`, {
stdio: verbose ? 'inherit' : 'pipe'
});
const output = execSync(`tar -tzf "${archivePath}"`, { encoding: 'utf8' });
return output.split('\n').filter(line => line.trim());
} else if (ext === '.rar') {
try {
execSync(`unrar x -o+ "${archivePath}" "${extractTo}"`, {
stdio: verbose ? 'inherit' : 'pipe'
});
const output = execSync(`unrar l "${archivePath}" | tail -n +5 | head -n -2`, { encoding: 'utf8' });
return output.split('\n').filter(line => line.trim()).map(line => {
const parts = line.trim().split(/\s+/);
return parts[parts.length - 1];
});
} catch (error) {
console.error('❌ Supporto RAR non disponibile. Installa: sudo apt-get install unrar');
return [];
}
} else {
console.error(`❌ Formato ${ext} non supportato. Usa zip, tar.gz, o rar.`);
return [];
}
} catch (error) {
console.error(`❌ Errore estrazione: ${error.message}`);
return [];
}
}
function organizeFiles(tempDir, assetsDir, client, verbose = false) {
const organized = [];
// Crea struttura cartelle
const folders = [
'images/logo', 'images/prodotto', 'images/team', 'images/stock',
'videos/promo', 'videos/tutorial',
'documents/brand', 'documents/product'
];
for (const folder of folders) {
fs.mkdirSync(path.join(assetsDir, folder), { recursive: true });
}
// Walk ricorsivo
function walkDir(dir) {
const files = [];
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
files.push(...walkDir(fullPath));
} else if (!entry.name.startsWith('.') && entry.name !== 'Thumbs.db') {
files.push(fullPath);
}
}
return files;
}
const allFiles = walkDir(tempDir);
for (const srcPath of allFiles) {
const filename = path.basename(srcPath);
const fileType = getFileType(filename);
const category = categorizeFile(filename, fileType);
const destFolder = path.join(assetsDir, category);
let destPath = path.join(destFolder, filename);
// Gestisci duplicati
let counter = 1;
const base = path.basename(filename, path.extname(filename));
const ext = path.extname(filename);
while (fs.existsSync(destPath)) {
destPath = path.join(destFolder, `${base}_${counter}${ext}`);
counter++;
}
// Copia file
fs.copyFileSync(srcPath, destPath);
const stats = fs.statSync(destPath);
organized.push({
original: filename,
destination: path.relative(assetsDir, destPath),
type: fileType,
category: category,
size: stats.size
});
if (verbose) {
console.log(` 📁 ${filename}${category}/`);
}
}
return organized;
}
function logOperation(client, archiveName, organizedFiles, opsLogPath) {
const timestamp = new Date().toISOString().slice(0, 16).replace('T', ' ');
const images = organizedFiles.filter(f => f.type === 'images');
const videos = organizedFiles.filter(f => f.type === 'videos');
const docs = organizedFiles.filter(f => f.type === 'documents');
const logEntry = `
## ${timestamp} — Archivist Upload
- **Archivio:** \`${archiveName}\`
- **File estratti:** ${organizedFiles.length}
- **Status:** ✅ Completato
### Dettagli
| Tipo | Count | Dimensione Totale |
|------|-------|-------------------|
| Immagini | ${images.length} | ${formatSize(images.reduce((sum, f) => sum + f.size, 0))} |
| Video | ${videos.length} | ${formatSize(videos.reduce((sum, f) => sum + f.size, 0))} |
| Documenti | ${docs.length} | ${formatSize(docs.reduce((sum, f) => sum + f.size, 0))} |
`;
fs.appendFileSync(opsLogPath, logEntry);
}
function main() {
const args = process.argv.slice(2);
// Parse arguments
let pathOrUrl = null;
let client = null;
let keepArchive = false;
let verbose = false;
let dryRun = false;
for (let i = 0; i < args.length; i++) {
if (args[i] === '--client' && args[i + 1]) {
client = args[++i];
} else if (args[i] === '--keep-archive') {
keepArchive = true;
} else if (args[i] === '--verbose') {
verbose = true;
} else if (args[i] === '--dry-run') {
dryRun = true;
} else if (!args[i].startsWith('--')) {
pathOrUrl = args[i];
}
}
if (!pathOrUrl || !client) {
console.error('Usage: node extract_archive.js <path_or_url> --client <client_name>');
console.error('Options: --keep-archive, --verbose, --dry-run');
process.exit(1);
}
// Path
const workspace = path.join(os.homedir(), '.openclaw', 'workspace', 'agency-skills-suite');
const clientDir = path.join(workspace, 'clients', client);
const assetsDir = path.join(clientDir, 'assets');
const archiveDir = path.join(assetsDir, 'archive');
const opsLog = path.join(clientDir, 'ops', 'run_log.md');
// Verifica cartella cliente
if (!fs.existsSync(clientDir)) {
console.error(`❌ Cartella cliente non trovata: ${clientDir}`);
console.error(' Crea prima il progetto con agency-orchestrator');
process.exit(1);
}
// Crea cartelle
fs.mkdirSync(archiveDir, { recursive: true });
fs.mkdirSync(path.join(clientDir, 'ops'), { recursive: true });
// URL o path locale?
const isUrl = pathOrUrl.startsWith('http://') || pathOrUrl.startsWith('https://') || pathOrUrl.startsWith('ftp://');
let archivePath;
let archiveName;
if (isUrl) {
archiveName = path.basename(pathOrUrl.split('?')[0]);
archivePath = path.join(archiveDir, archiveName);
if (dryRun) {
console.log(`🔍 [DRY-RUN] Download: ${pathOrUrl}${archivePath}`);
process.exit(0);
}
if (!downloadFile(pathOrUrl, archivePath, verbose)) {
process.exit(1);
}
} else {
archivePath = pathOrUrl;
archiveName = path.basename(archivePath);
if (!fs.existsSync(archivePath)) {
console.error(`❌ File non trovato: ${archivePath}`);
process.exit(1);
}
if (dryRun) {
console.log(`🔍 [DRY-RUN] Estrai: ${archivePath}${assetsDir}`);
process.exit(0);
}
// Copia in archive/
fs.copyFileSync(archivePath, path.join(archiveDir, archiveName));
}
if (verbose) {
console.log(`\n📦 Archivio: ${archiveName}`);
console.log(`📁 Destinazione: ${assetsDir}`);
console.log();
}
// Estrai in temporanea
const tempDir = path.join(archiveDir, '.temp_extract');
fs.mkdirSync(tempDir, { recursive: true });
console.log('🔄 Estrazione in corso...');
const extracted = extractArchive(path.join(archiveDir, archiveName), tempDir, verbose);
if (extracted.length === 0) {
console.error('❌ Nessun file estratto');
fs.rmSync(tempDir, { recursive: true, force: true });
process.exit(1);
}
// Organizza file
console.log('\n🗂 Organizzazione file...');
const organized = organizeFiles(tempDir, assetsDir, client, verbose);
// Pulisci temporanea
fs.rmSync(tempDir, { recursive: true, force: true });
// Log operazione
logOperation(client, archiveName, organized, opsLog);
// Elimina archivio originale (se non --keep-archive)
if (!keepArchive) {
fs.unlinkSync(path.join(archiveDir, archiveName));
if (verbose) console.log('\n🗑 Archivio originale eliminato');
}
// Riepilogo
console.log('\n✅ Completato!');
console.log(` 📦 File estratti: ${organized.length}`);
console.log(` 📁 Cartella: ${assetsDir}`);
console.log(` 📝 Log: ${opsLog}`);
console.log(`\n👉 Prossimo step: node scripts/scan_resources.js --client ${client}`);
}
main();