- agency-archivist/SKILL.md: Skill per upload, estrazione, catalogazione
- scripts/extract_archive.py: Estrazione zip/URL in clients/{client}/assets/
- scripts/scan_resources.py: Scansione metadata (2 passate: base + vision)
- scripts/generate_catalog.py: Generazione catalogo.md con tag e use case
- references/resource_types.md: Tipologie risorse e use case per skill
- agency-orchestrator/SKILL.md: Integrazione archivist in Fase 1
- Step opzionale upload risorse
- Sezione dedicata gestione risorse
- Comportamento proattivo (richiesta risorse mancanti)
- Pattern per altre skill
Integrazione completa: orchestrator → archivist → visual-generator/design/web/social
345 lines
12 KiB
Python
Executable file
345 lines
12 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
scan_resources.py — Scansiona risorse in clients/{client}/assets/ ed estrae metadata
|
|
|
|
Usage:
|
|
python scan_resources.py --client <client_name> --pass 1|2
|
|
python scan_resources.py --client demo_co_srl --pass 1
|
|
python scan_resources.py --client demo_co_srl --pass 2 --vision
|
|
|
|
Options:
|
|
--pass 1 Solo metadata base (veloce, sempre disponibile)
|
|
--pass 2 Analisi contenuto (richiede modello vision)
|
|
--vision Abilita analisi visione (opzionale, richiede API)
|
|
--output Path output JSON (default: assets/.metadata.json)
|
|
--verbose Log dettagliato
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from PIL import Image
|
|
import mimetypes
|
|
|
|
def get_file_metadata(filepath):
|
|
"""Estrae metadata base da file."""
|
|
stat = os.stat(filepath)
|
|
|
|
metadata = {
|
|
'filename': os.path.basename(filepath),
|
|
'path': str(filepath),
|
|
'extension': filepath.suffix.lower().lstrip('.'),
|
|
'size_bytes': stat.st_size,
|
|
'size_formatted': format_size(stat.st_size),
|
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
'mime_type': mimetypes.guess_type(filepath)[0] or 'application/octet-stream'
|
|
}
|
|
|
|
# Metadata specifici per immagini
|
|
if metadata['mime_type'].startswith('image/'):
|
|
try:
|
|
with Image.open(filepath) as img:
|
|
metadata['width'] = img.width
|
|
metadata['height'] = img.height
|
|
metadata['resolution'] = f"{img.width}x{img.height}"
|
|
metadata['mode'] = img.mode
|
|
metadata['format'] = img.format
|
|
|
|
# Colori dominanti (semplificato)
|
|
if img.mode in ('RGB', 'RGBA'):
|
|
img_resized = img.resize((50, 50))
|
|
colors = img_resized.getcolors(2500)
|
|
if colors:
|
|
# Top 3 colori
|
|
top_colors = sorted(colors, reverse=True)[:3]
|
|
metadata['dominant_colors'] = [
|
|
rgb_to_hex(c[1]) for c in top_colors if c[1][0] is not None
|
|
]
|
|
except Exception as e:
|
|
metadata['error'] = f"Errore lettura immagine: {e}"
|
|
|
|
# Metadata specifici per video (semplificato, richiede opencv per dettagli)
|
|
elif metadata['mime_type'].startswith('video/'):
|
|
metadata['type'] = 'video'
|
|
# Nota: per durata e risoluzione video serve opencv o ffprobe
|
|
|
|
return metadata
|
|
|
|
def format_size(size_bytes):
|
|
"""Formatta dimensione in KB/MB/GB."""
|
|
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
if size_bytes < 1024:
|
|
return f"{size_bytes:.1f} {unit}"
|
|
size_bytes /= 1024
|
|
return f"{size_bytes:.1f} TB"
|
|
|
|
def rgb_to_hex(rgb):
|
|
"""Converte tuple RGB in esadecimale."""
|
|
try:
|
|
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]), int(rgb[1]), int(rgb[2]))
|
|
except:
|
|
return '#000000'
|
|
|
|
def categorize_file(filename, filepath):
|
|
"""Assegna categoria basata su path e nome file."""
|
|
path_str = str(filepath).lower()
|
|
filename_lower = filename.lower()
|
|
|
|
# Dalla cartella
|
|
if '/logo/' in path_str:
|
|
return 'logo'
|
|
elif '/prodotto/' in path_str or '/product/' in path_str:
|
|
return 'prodotto'
|
|
elif '/team/' in path_str or '/people/' in path_str:
|
|
return 'team'
|
|
elif '/stock/' in path_str or '/background/' in path_str:
|
|
return 'stock'
|
|
elif '/promo/' in path_str or '/reel/' in path_str:
|
|
return 'promo'
|
|
elif '/tutorial/' in path_str or '/howto/' in path_str:
|
|
return 'tutorial'
|
|
elif '/brand/' in path_str or '/guideline/' in path_str:
|
|
return 'brand_guidelines'
|
|
elif '/product/' in path_str or '/datasheet/' in path_str:
|
|
return 'product_docs'
|
|
|
|
# Dal nome file
|
|
keywords = {
|
|
'logo': ['logo', 'marchio', 'brand'],
|
|
'prodotto': ['prodotto', 'product', 'item'],
|
|
'team': ['team', 'staff', 'ufficio', 'people'],
|
|
'stock': ['sfondo', 'background', 'texture'],
|
|
'promo': ['promo', 'reel', 'trailer'],
|
|
'tutorial': ['tutorial', 'howto', 'demo'],
|
|
}
|
|
|
|
for category, words in keywords.items():
|
|
for word in words:
|
|
if word in filename_lower:
|
|
return category
|
|
|
|
return 'generic'
|
|
|
|
def generate_tags(metadata, category):
|
|
"""Genera tag automatici dai metadata."""
|
|
tags = []
|
|
|
|
# Tag da categoria
|
|
tags.append(category)
|
|
|
|
# Tag da tipo file
|
|
ext = metadata.get('extension', '')
|
|
if ext in ['png']:
|
|
tags.append('trasparente' if metadata.get('mode') == 'RGBA' else 'png')
|
|
elif ext in ['jpg', 'jpeg']:
|
|
tags.append('jpg')
|
|
elif ext in ['svg']:
|
|
tags.append('vettoriale')
|
|
|
|
# Tag da dimensioni
|
|
if metadata.get('width'):
|
|
w = metadata['width']
|
|
h = metadata.get('height', 0)
|
|
if w >= 1920 and h >= 1080:
|
|
tags.append('fullhd')
|
|
if w >= 3000:
|
|
tags.append('highres')
|
|
if w == h:
|
|
tags.append('quadrato')
|
|
elif w > h:
|
|
tags.append('orizzontale')
|
|
else:
|
|
tags.append('verticale')
|
|
|
|
# Tag da colori
|
|
if 'dominant_colors' in metadata:
|
|
colors = metadata['dominant_colors']
|
|
if '#ffffff' in colors or '#f0f0f0' in colors:
|
|
tags.append('sfondochiaro')
|
|
if '#000000' in colors or '#1a1a1a' in colors:
|
|
tags.append('sfondoscuro')
|
|
|
|
return list(set(tags))
|
|
|
|
def scan_directory(assets_dir, pass_level=1, verbose=False):
|
|
"""Scansiona directory assets/ ed estrae metadata."""
|
|
resources = []
|
|
|
|
# Cartelle da scansionare
|
|
folders_to_scan = ['images', 'videos', 'documents']
|
|
|
|
for folder in folders_to_scan:
|
|
folder_path = assets_dir / folder
|
|
if not folder_path.exists():
|
|
continue
|
|
|
|
if verbose:
|
|
print(f"📁 Scansione {folder}/...")
|
|
|
|
# Walk ricorsivo
|
|
for root, dirs, files in os.walk(folder_path):
|
|
for filename in files:
|
|
# Salta file nascosti
|
|
if filename.startswith('.'):
|
|
continue
|
|
|
|
filepath = Path(root) / filename
|
|
|
|
if verbose:
|
|
print(f" 🔍 {filename}")
|
|
|
|
# Metadata base (Pass 1)
|
|
metadata = get_file_metadata(filepath)
|
|
|
|
# Categoria
|
|
rel_path = filepath.relative_to(assets_dir)
|
|
category = categorize_file(filename, filepath)
|
|
metadata['category'] = category
|
|
|
|
# Tag
|
|
metadata['tags'] = generate_tags(metadata, category)
|
|
|
|
# Use case suggeriti (basati su categoria)
|
|
metadata['use_cases'] = suggest_use_cases(category, metadata)
|
|
|
|
# Descrizione base (nome file + categoria)
|
|
metadata['description'] = generate_base_description(filename, category, metadata)
|
|
|
|
resources.append(metadata)
|
|
|
|
return resources
|
|
|
|
def suggest_use_cases(category, metadata):
|
|
"""Suggerisce use case basati su categoria e metadata."""
|
|
use_cases = {
|
|
'logo': ['Header sito', 'Social profile', 'Firma email', 'Biglietti da visita'],
|
|
'prodotto': ['E-commerce', 'Social post', 'Catalogo', 'Ads'],
|
|
'team': ['About page', 'LinkedIn', 'Presentazioni', 'Stampa'],
|
|
'stock': ['Sfondi sito', 'Social post', 'Presentazioni', 'Blog'],
|
|
'promo': ['Social ads', 'Homepage', 'YouTube', 'Email marketing'],
|
|
'tutorial': ['Sito web', 'YouTube', 'Supporto clienti', 'Onboarding'],
|
|
'brand_guidelines': ['Design system', 'Coerenza brand', 'Linee guida team'],
|
|
'product_docs': ['Schede prodotto', 'Supporto vendite', 'FAQ'],
|
|
'generic': ['Utilizzo generale']
|
|
}
|
|
|
|
base_cases = use_cases.get(category, ['Utilizzo generale'])
|
|
|
|
# Aggiungi in base a risoluzione
|
|
if metadata.get('width', 0) >= 1920:
|
|
base_cases.append('Stampa alta qualità')
|
|
|
|
return base_cases
|
|
|
|
def generate_base_description(filename, category, metadata):
|
|
"""Genera descrizione base dal nome file e metadata."""
|
|
# Rimuovi estensione e underscore
|
|
name = os.path.splitext(filename)[0].replace('_', ' ').replace('-', ' ')
|
|
|
|
# Capitalizza
|
|
name = name.title()
|
|
|
|
# Aggiungi dettagli
|
|
parts = [name]
|
|
|
|
if metadata.get('resolution'):
|
|
parts.append(f"({metadata['resolution']})")
|
|
|
|
if metadata.get('size_formatted'):
|
|
parts.append(f"{metadata['size_formatted']}")
|
|
|
|
return ' '.join(parts)
|
|
|
|
def analyze_with_vision(resources, verbose=False):
|
|
"""
|
|
Analisi avanzata con modello vision (placeholder per integrazione futura).
|
|
|
|
Questa funzione richiede integrazione con API di modelli vision
|
|
(es. GPT-4V, Claude Vision, etc.) per analizzare contenuto immagini.
|
|
|
|
Per ora è un placeholder che descrive l'integrazione futura.
|
|
"""
|
|
if verbose:
|
|
print("\n👁️ Analisi visione (placeholder)")
|
|
print(" Integrazione futura con API modelli vision:")
|
|
print(" - GPT-4V (OpenAI)")
|
|
print(" - Claude Vision (Anthropic)")
|
|
print(" - Gemini Vision (Google)")
|
|
print("\n Per ogni immagine:")
|
|
print(" 1. Invia immagine a API")
|
|
print(" 2. Ricevi descrizione semantica")
|
|
print(" 3. Estrai: oggetti, contesto, colori, testo")
|
|
print(" 4. Aggiorna metadata['description'] e metadata['tags']")
|
|
|
|
# Placeholder: nessun cambiamento ai resources
|
|
return resources
|
|
|
|
def save_metadata(resources, output_path):
|
|
"""Salva metadata in JSON."""
|
|
with open(output_path, 'w') as f:
|
|
json.dump({
|
|
'generated': datetime.now().isoformat(),
|
|
'total_resources': len(resources),
|
|
'resources': resources
|
|
}, f, indent=2, ensure_ascii=False)
|
|
|
|
return output_path
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Scansiona risorse ed estrae metadata')
|
|
parser.add_argument('--client', required=True, help='Nome cliente')
|
|
parser.add_argument('--pass', type=int, choices=[1, 2], default=1, dest='pass_level',
|
|
help='Livello analisi: 1=base, 2=vision')
|
|
parser.add_argument('--vision', action='store_true', help='Abilita analisi visione')
|
|
parser.add_argument('--output', help='Path output JSON (default: assets/.metadata.json)')
|
|
parser.add_argument('--verbose', action='store_true', help='Log dettagliato')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Path
|
|
workspace = Path.home() / '.openclaw' / 'workspace' / 'agency-skills-suite'
|
|
client_dir = workspace / 'clients' / args.client
|
|
assets_dir = client_dir / 'assets'
|
|
|
|
if not client_dir.exists():
|
|
print(f"❌ Cartella cliente non trovata: {client_dir}")
|
|
sys.exit(1)
|
|
|
|
if not assets_dir.exists():
|
|
print(f"❌ Cartella assets non trovata: {assets_dir}")
|
|
print(" Esegui prima: python scripts/extract_archive.py")
|
|
sys.exit(1)
|
|
|
|
# Output path
|
|
output_path = args.output if args.output else assets_dir / '.metadata.json'
|
|
|
|
if args.verbose:
|
|
print(f"🔍 Scansione: {assets_dir}")
|
|
print(f"📝 Output: {output_path}")
|
|
print(f"📊 Pass: {args.pass_level} {'(vision)' if args.vision else '(base)'}")
|
|
print()
|
|
|
|
# Scansione
|
|
resources = scan_directory(assets_dir, args.pass_level, args.verbose)
|
|
|
|
# Analisi visione (opzionale)
|
|
if args.pass_level == 2 or args.vision:
|
|
resources = analyze_with_vision(resources, args.verbose)
|
|
|
|
# Salva metadata
|
|
save_metadata(resources, output_path)
|
|
|
|
# Riepilogo
|
|
print(f"\n✅ Scansione completata!")
|
|
print(f" 📊 Risorse trovate: {len(resources)}")
|
|
print(f" 📁 Immagini: {sum(1 for r in resources if r['mime_type'].startswith('image/'))}")
|
|
print(f" 🎬 Video: {sum(1 for r in resources if r['mime_type'].startswith('video/'))}")
|
|
print(f" 📄 Documenti: {sum(1 for r in resources if r['mime_type'].startswith('application/') or r['extension'] in ['pdf', 'doc', 'docx'])}")
|
|
print(f" 💾 Metadata: {output_path}")
|
|
print(f"\n👉 Prossimo step: python scripts/generate_catalog.py --client {args.client}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|