agency-skills-suite/agency-archivist/scripts/scan_resources.py
AgentePotente b1f3ba033e STEP 2: agency-archivist - Nuova skill gestione risorse
- agency-archivist/SKILL.md: Skill per upload, estrazione, catalogazione
- scripts/extract_archive.py: Estrazione zip/URL in clients/{client}/assets/
- scripts/scan_resources.py: Scansione metadata (2 passate: base + vision)
- scripts/generate_catalog.py: Generazione catalogo.md con tag e use case
- references/resource_types.md: Tipologie risorse e use case per skill
- agency-orchestrator/SKILL.md: Integrazione archivist in Fase 1
  - Step opzionale upload risorse
  - Sezione dedicata gestione risorse
  - Comportamento proattivo (richiesta risorse mancanti)
  - Pattern per altre skill

Integrazione completa: orchestrator → archivist → visual-generator/design/web/social
2026-03-10 23:29:41 +01:00

345 lines
12 KiB
Python
Executable file

#!/usr/bin/env python3
"""
scan_resources.py — Scansiona risorse in clients/{client}/assets/ ed estrae metadata
Usage:
python scan_resources.py --client <client_name> --pass 1|2
python scan_resources.py --client demo_co_srl --pass 1
python scan_resources.py --client demo_co_srl --pass 2 --vision
Options:
--pass 1 Solo metadata base (veloce, sempre disponibile)
--pass 2 Analisi contenuto (richiede modello vision)
--vision Abilita analisi visione (opzionale, richiede API)
--output Path output JSON (default: assets/.metadata.json)
--verbose Log dettagliato
"""
import os
import sys
import argparse
import json
from pathlib import Path
from datetime import datetime
from PIL import Image
import mimetypes
def get_file_metadata(filepath):
"""Estrae metadata base da file."""
stat = os.stat(filepath)
metadata = {
'filename': os.path.basename(filepath),
'path': str(filepath),
'extension': filepath.suffix.lower().lstrip('.'),
'size_bytes': stat.st_size,
'size_formatted': format_size(stat.st_size),
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
'mime_type': mimetypes.guess_type(filepath)[0] or 'application/octet-stream'
}
# Metadata specifici per immagini
if metadata['mime_type'].startswith('image/'):
try:
with Image.open(filepath) as img:
metadata['width'] = img.width
metadata['height'] = img.height
metadata['resolution'] = f"{img.width}x{img.height}"
metadata['mode'] = img.mode
metadata['format'] = img.format
# Colori dominanti (semplificato)
if img.mode in ('RGB', 'RGBA'):
img_resized = img.resize((50, 50))
colors = img_resized.getcolors(2500)
if colors:
# Top 3 colori
top_colors = sorted(colors, reverse=True)[:3]
metadata['dominant_colors'] = [
rgb_to_hex(c[1]) for c in top_colors if c[1][0] is not None
]
except Exception as e:
metadata['error'] = f"Errore lettura immagine: {e}"
# Metadata specifici per video (semplificato, richiede opencv per dettagli)
elif metadata['mime_type'].startswith('video/'):
metadata['type'] = 'video'
# Nota: per durata e risoluzione video serve opencv o ffprobe
return metadata
def format_size(size_bytes):
"""Formatta dimensione in KB/MB/GB."""
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024
return f"{size_bytes:.1f} TB"
def rgb_to_hex(rgb):
"""Converte tuple RGB in esadecimale."""
try:
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]), int(rgb[1]), int(rgb[2]))
except:
return '#000000'
def categorize_file(filename, filepath):
"""Assegna categoria basata su path e nome file."""
path_str = str(filepath).lower()
filename_lower = filename.lower()
# Dalla cartella
if '/logo/' in path_str:
return 'logo'
elif '/prodotto/' in path_str or '/product/' in path_str:
return 'prodotto'
elif '/team/' in path_str or '/people/' in path_str:
return 'team'
elif '/stock/' in path_str or '/background/' in path_str:
return 'stock'
elif '/promo/' in path_str or '/reel/' in path_str:
return 'promo'
elif '/tutorial/' in path_str or '/howto/' in path_str:
return 'tutorial'
elif '/brand/' in path_str or '/guideline/' in path_str:
return 'brand_guidelines'
elif '/product/' in path_str or '/datasheet/' in path_str:
return 'product_docs'
# Dal nome file
keywords = {
'logo': ['logo', 'marchio', 'brand'],
'prodotto': ['prodotto', 'product', 'item'],
'team': ['team', 'staff', 'ufficio', 'people'],
'stock': ['sfondo', 'background', 'texture'],
'promo': ['promo', 'reel', 'trailer'],
'tutorial': ['tutorial', 'howto', 'demo'],
}
for category, words in keywords.items():
for word in words:
if word in filename_lower:
return category
return 'generic'
def generate_tags(metadata, category):
"""Genera tag automatici dai metadata."""
tags = []
# Tag da categoria
tags.append(category)
# Tag da tipo file
ext = metadata.get('extension', '')
if ext in ['png']:
tags.append('trasparente' if metadata.get('mode') == 'RGBA' else 'png')
elif ext in ['jpg', 'jpeg']:
tags.append('jpg')
elif ext in ['svg']:
tags.append('vettoriale')
# Tag da dimensioni
if metadata.get('width'):
w = metadata['width']
h = metadata.get('height', 0)
if w >= 1920 and h >= 1080:
tags.append('fullhd')
if w >= 3000:
tags.append('highres')
if w == h:
tags.append('quadrato')
elif w > h:
tags.append('orizzontale')
else:
tags.append('verticale')
# Tag da colori
if 'dominant_colors' in metadata:
colors = metadata['dominant_colors']
if '#ffffff' in colors or '#f0f0f0' in colors:
tags.append('sfondochiaro')
if '#000000' in colors or '#1a1a1a' in colors:
tags.append('sfondoscuro')
return list(set(tags))
def scan_directory(assets_dir, pass_level=1, verbose=False):
"""Scansiona directory assets/ ed estrae metadata."""
resources = []
# Cartelle da scansionare
folders_to_scan = ['images', 'videos', 'documents']
for folder in folders_to_scan:
folder_path = assets_dir / folder
if not folder_path.exists():
continue
if verbose:
print(f"📁 Scansione {folder}/...")
# Walk ricorsivo
for root, dirs, files in os.walk(folder_path):
for filename in files:
# Salta file nascosti
if filename.startswith('.'):
continue
filepath = Path(root) / filename
if verbose:
print(f" 🔍 {filename}")
# Metadata base (Pass 1)
metadata = get_file_metadata(filepath)
# Categoria
rel_path = filepath.relative_to(assets_dir)
category = categorize_file(filename, filepath)
metadata['category'] = category
# Tag
metadata['tags'] = generate_tags(metadata, category)
# Use case suggeriti (basati su categoria)
metadata['use_cases'] = suggest_use_cases(category, metadata)
# Descrizione base (nome file + categoria)
metadata['description'] = generate_base_description(filename, category, metadata)
resources.append(metadata)
return resources
def suggest_use_cases(category, metadata):
"""Suggerisce use case basati su categoria e metadata."""
use_cases = {
'logo': ['Header sito', 'Social profile', 'Firma email', 'Biglietti da visita'],
'prodotto': ['E-commerce', 'Social post', 'Catalogo', 'Ads'],
'team': ['About page', 'LinkedIn', 'Presentazioni', 'Stampa'],
'stock': ['Sfondi sito', 'Social post', 'Presentazioni', 'Blog'],
'promo': ['Social ads', 'Homepage', 'YouTube', 'Email marketing'],
'tutorial': ['Sito web', 'YouTube', 'Supporto clienti', 'Onboarding'],
'brand_guidelines': ['Design system', 'Coerenza brand', 'Linee guida team'],
'product_docs': ['Schede prodotto', 'Supporto vendite', 'FAQ'],
'generic': ['Utilizzo generale']
}
base_cases = use_cases.get(category, ['Utilizzo generale'])
# Aggiungi in base a risoluzione
if metadata.get('width', 0) >= 1920:
base_cases.append('Stampa alta qualità')
return base_cases
def generate_base_description(filename, category, metadata):
"""Genera descrizione base dal nome file e metadata."""
# Rimuovi estensione e underscore
name = os.path.splitext(filename)[0].replace('_', ' ').replace('-', ' ')
# Capitalizza
name = name.title()
# Aggiungi dettagli
parts = [name]
if metadata.get('resolution'):
parts.append(f"({metadata['resolution']})")
if metadata.get('size_formatted'):
parts.append(f"{metadata['size_formatted']}")
return ' '.join(parts)
def analyze_with_vision(resources, verbose=False):
"""
Analisi avanzata con modello vision (placeholder per integrazione futura).
Questa funzione richiede integrazione con API di modelli vision
(es. GPT-4V, Claude Vision, etc.) per analizzare contenuto immagini.
Per ora è un placeholder che descrive l'integrazione futura.
"""
if verbose:
print("\n👁️ Analisi visione (placeholder)")
print(" Integrazione futura con API modelli vision:")
print(" - GPT-4V (OpenAI)")
print(" - Claude Vision (Anthropic)")
print(" - Gemini Vision (Google)")
print("\n Per ogni immagine:")
print(" 1. Invia immagine a API")
print(" 2. Ricevi descrizione semantica")
print(" 3. Estrai: oggetti, contesto, colori, testo")
print(" 4. Aggiorna metadata['description'] e metadata['tags']")
# Placeholder: nessun cambiamento ai resources
return resources
def save_metadata(resources, output_path):
"""Salva metadata in JSON."""
with open(output_path, 'w') as f:
json.dump({
'generated': datetime.now().isoformat(),
'total_resources': len(resources),
'resources': resources
}, f, indent=2, ensure_ascii=False)
return output_path
def main():
parser = argparse.ArgumentParser(description='Scansiona risorse ed estrae metadata')
parser.add_argument('--client', required=True, help='Nome cliente')
parser.add_argument('--pass', type=int, choices=[1, 2], default=1, dest='pass_level',
help='Livello analisi: 1=base, 2=vision')
parser.add_argument('--vision', action='store_true', help='Abilita analisi visione')
parser.add_argument('--output', help='Path output JSON (default: assets/.metadata.json)')
parser.add_argument('--verbose', action='store_true', help='Log dettagliato')
args = parser.parse_args()
# Path
workspace = Path.home() / '.openclaw' / 'workspace' / 'agency-skills-suite'
client_dir = workspace / 'clients' / args.client
assets_dir = client_dir / 'assets'
if not client_dir.exists():
print(f"❌ Cartella cliente non trovata: {client_dir}")
sys.exit(1)
if not assets_dir.exists():
print(f"❌ Cartella assets non trovata: {assets_dir}")
print(" Esegui prima: python scripts/extract_archive.py")
sys.exit(1)
# Output path
output_path = args.output if args.output else assets_dir / '.metadata.json'
if args.verbose:
print(f"🔍 Scansione: {assets_dir}")
print(f"📝 Output: {output_path}")
print(f"📊 Pass: {args.pass_level} {'(vision)' if args.vision else '(base)'}")
print()
# Scansione
resources = scan_directory(assets_dir, args.pass_level, args.verbose)
# Analisi visione (opzionale)
if args.pass_level == 2 or args.vision:
resources = analyze_with_vision(resources, args.verbose)
# Salva metadata
save_metadata(resources, output_path)
# Riepilogo
print(f"\n✅ Scansione completata!")
print(f" 📊 Risorse trovate: {len(resources)}")
print(f" 📁 Immagini: {sum(1 for r in resources if r['mime_type'].startswith('image/'))}")
print(f" 🎬 Video: {sum(1 for r in resources if r['mime_type'].startswith('video/'))}")
print(f" 📄 Documenti: {sum(1 for r in resources if r['mime_type'].startswith('application/') or r['extension'] in ['pdf', 'doc', 'docx'])}")
print(f" 💾 Metadata: {output_path}")
print(f"\n👉 Prossimo step: python scripts/generate_catalog.py --client {args.client}")
if __name__ == '__main__':
main()