#!/usr/bin/env python3 """ extract_archive.py โ€” Estrae archivi (zip, tar, rar) e organizza risorse in clients/{client}/assets/ Usage: python extract_archive.py --client python extract_archive.py brand_assets.zip --client demo_co_srl python extract_archive.py https://example.com/assets.zip --client demo_co_srl Options: --keep-archive Mantieni file originale (default: elimina dopo estrazione) --verbose Log dettagliato --dry-run Simula senza estrazione """ import os import sys import argparse import zipfile import tarfile import shutil import hashlib from pathlib import Path from datetime import datetime # Try to import rarfile (optional, requires unrar) try: import rarfile HAS_RAR = True except ImportError: HAS_RAR = False # Mapping parole chiave โ†’ cartelle CATEGORY_KEYWORDS = { 'images/logo': ['logo', 'marchio', 'brand', 'logotipo'], 'images/prodotto': ['prodotto', 'product', 'item', 'articolo'], 'images/team': ['team', 'staff', 'ufficio', 'office', 'persone', 'people'], 'images/stock': ['sfondo', 'background', 'texture', 'stock'], 'videos/promo': ['promo', 'reel', 'trailer', 'advertisement'], 'videos/tutorial': ['tutorial', 'howto', 'demo', 'dimostrazione', 'guida'], 'documents/brand': ['brand', 'guideline', 'manual', 'linee guida'], 'documents/product': ['scheda', 'datasheet', 'spec', 'specifiche'], } def get_file_type(filename): """Determina tipo file dall'estensione.""" ext = filename.lower().split('.')[-1] image_exts = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'svg', 'bmp', 'tiff'] video_exts = ['mp4', 'mov', 'avi', 'mkv', 'webm', 'wmv'] doc_exts = ['pdf', 'doc', 'docx', 'txt', 'md', 'ppt', 'pptx', 'xls', 'xlsx'] if ext in image_exts: return 'images' elif ext in video_exts: return 'videos' elif ext in doc_exts: return 'documents' else: return 'other' def categorize_file(filename, file_type): """Assegna categoria basata su parole chiave nel nome.""" filename_lower = filename.lower() for category, keywords in CATEGORY_KEYWORDS.items(): base_type = category.split('/')[0] if base_type == file_type: for keyword in keywords: if keyword in filename_lower: return category # Fallback: cartella base per tipo return f"{file_type}/" if file_type != 'other' else 'misc/' def get_file_size(path): """Restituisce dimensione file in bytes.""" return os.path.getsize(path) def format_size(size_bytes): """Formatta dimensione in KB/MB/GB.""" for unit in ['B', 'KB', 'MB', 'GB']: if size_bytes < 1024: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024 return f"{size_bytes:.1f} TB" def download_file(url, dest_path, verbose=False): """Download file da URL.""" import urllib.request if verbose: print(f"๐Ÿ“ฅ Download: {url}") try: urllib.request.urlretrieve(url, dest_path) if verbose: print(f"โœ… Download completato: {dest_path}") return True except Exception as e: print(f"โŒ Errore download: {e}") return False def extract_archive(archive_path, extract_to, verbose=False): """Estrae archivio e restituisce lista file estratti.""" extracted_files = [] # Determina formato filename = os.path.basename(archive_path) ext = filename.lower().split('.')[-1] try: if ext == 'zip' or filename.endswith('.tar.gz') or filename.endswith('.tgz'): if ext == 'zip': with zipfile.ZipFile(archive_path, 'r') as zip_ref: zip_ref.extractall(extract_to) extracted_files = zip_ref.namelist() else: with tarfile.open(archive_path, 'r:gz') as tar_ref: tar_ref.extractall(extract_to) extracted_files = tar_ref.getnames() elif ext == 'rar': if not HAS_RAR: print("โŒ Supporto RAR non disponibile. Installa: pip install rarfile unrar") return [] with rarfile.RarFile(archive_path, 'r') as rar_ref: rar_ref.extractall(extract_to) extracted_files = rar_ref.namelist() else: print(f"โŒ Formato .{ext} non supportato. Usa zip, tar.gz, o rar.") return [] if verbose: print(f"โœ… Estratti {len(extracted_files)} file") return extracted_files except Exception as e: print(f"โŒ Errore estrazione: {e}") return [] def organize_files(temp_dir, assets_dir, client, verbose=False): """Organizza file estratti per categoria.""" organized = [] # Crea struttura cartelle for folder in ['images/logo', 'images/prodotto', 'images/team', 'images/stock', 'videos/promo', 'videos/tutorial', 'documents/brand', 'documents/product']: os.makedirs(os.path.join(assets_dir, folder), exist_ok=True) # Processa ogni file for root, dirs, files in os.walk(temp_dir): for filename in files: # Salta file nascosti e system if filename.startswith('.') or filename == 'Thumbs.db': continue src_path = os.path.join(root, filename) file_type = get_file_type(filename) category = categorize_file(filename, file_type) # Path destinazione dest_folder = os.path.join(assets_dir, category) dest_path = os.path.join(dest_folder, filename) # Gestisci nomi duplicati base, ext = os.path.splitext(filename) counter = 1 while os.path.exists(dest_path): dest_path = os.path.join(dest_folder, f"{base}_{counter}{ext}") counter += 1 # Copia file shutil.copy2(src_path, dest_path) organized.append({ 'original': filename, 'destination': os.path.relpath(dest_path, assets_dir), 'type': file_type, 'category': category, 'size': get_file_size(dest_path) }) if verbose: print(f" ๐Ÿ“ {filename} โ†’ {category}/") return organized def log_operation(client, archive_name, organized_files, ops_log_path): """Registra operazione nel run log.""" timestamp = datetime.now().strftime('%Y-%m-%d %H:%M') log_entry = f""" ## {timestamp} โ€” Archivist Upload - **Archivio:** `{archive_name}` - **File estratti:** {len(organized_files)} - **Status:** โœ… Completato ### Dettagli | Tipo | Count | Dimensione Totale | |------|-------|-------------------| | Immagini | {sum(1 for f in organized_files if f['type'] == 'images')} | {format_size(sum(f['size'] for f in organized_files if f['type'] == 'images'))} | | Video | {sum(1 for f in organized_files if f['type'] == 'videos')} | {format_size(sum(f['size'] for f in organized_files if f['type'] == 'videos'))} | | Documenti | {sum(1 for f in organized_files if f['type'] == 'documents')} | {format_size(sum(f['size'] for f in organized_files if f['type'] == 'documents'))} | """ with open(ops_log_path, 'a') as f: f.write(log_entry) def main(): parser = argparse.ArgumentParser(description='Estrae archivi e organizza risorse') parser.add_argument('path_or_url', help='Path archivio o URL') parser.add_argument('--client', required=True, help='Nome cliente (cartella clients/{client}/)') parser.add_argument('--keep-archive', action='store_true', help='Mantieni archivio originale') parser.add_argument('--verbose', action='store_true', help='Log dettagliato') parser.add_argument('--dry-run', action='store_true', help='Simula senza estrazione') args = parser.parse_args() # Workspace root workspace = Path.home() / '.openclaw' / 'workspace' / 'agency-skills-suite' clients_dir = workspace / 'clients' # Cartella cliente client_dir = clients_dir / args.client assets_dir = client_dir / 'assets' archive_dir = assets_dir / 'archive' ops_log = client_dir / 'ops' / 'run_log.md' # Verifica esistenza cartella cliente if not client_dir.exists(): print(f"โŒ Cartella cliente non trovata: {client_dir}") print(f" Crea prima il progetto con agency-orchestrator") sys.exit(1) # Crea cartelle necessarie os.makedirs(archive_dir, exist_ok=True) os.makedirs(client_dir / 'ops', exist_ok=True) # Determina se รจ URL o path locale is_url = args.path_or_url.startswith('http://') or args.path_or_url.startswith('https://') or args.path_or_url.startswith('ftp://') if is_url: # Download archive_name = os.path.basename(args.path_or_url.split('?')[0]) archive_path = archive_dir / archive_name if args.dry_run: print(f"๐Ÿ” [DRY-RUN] Download: {args.path_or_url} โ†’ {archive_path}") sys.exit(0) if not download_file(args.path_or_url, str(archive_path), args.verbose): sys.exit(1) else: # Path locale archive_path = Path(args.path_or_url) archive_name = archive_path.name if not archive_path.exists(): print(f"โŒ File non trovato: {archive_path}") sys.exit(1) if args.dry_run: print(f"๐Ÿ” [DRY-RUN] Estrai: {archive_path} โ†’ {assets_dir}") sys.exit(0) # Copia in archive/ shutil.copy2(archive_path, archive_dir / archive_name) if args.verbose: print(f"\n๐Ÿ“ฆ Archivio: {archive_name}") print(f"๐Ÿ“ Destinazione: {assets_dir}") print() # Estrai in temporanea temp_dir = archive_dir / '.temp_extract' os.makedirs(temp_dir, exist_ok=True) print("๐Ÿ”„ Estrazione in corso...") extracted = extract_archive(str(archive_dir / archive_name), str(temp_dir), args.verbose) if not extracted: print("โŒ Nessun file estratto") shutil.rmtree(temp_dir) sys.exit(1) # Organizza file print("\n๐Ÿ—‚๏ธ Organizzazione file...") organized = organize_files(temp_dir, assets_dir, args.client, args.verbose) # Pulisci temporanea shutil.rmtree(temp_dir) # Log operazione log_operation(args.client, archive_name, organized, ops_log) # Elimina archivio originale (se non --keep-archive) if not args.keep_archive: os.remove(archive_dir / archive_name) if args.verbose: print(f"\n๐Ÿ—‘๏ธ Archivio originale eliminato") # Riepilogo print(f"\nโœ… Completato!") print(f" ๐Ÿ“ฆ File estratti: {len(organized)}") print(f" ๐Ÿ“ Cartella: {assets_dir}") print(f" ๐Ÿ“ Log: {ops_log}") print(f"\n๐Ÿ‘‰ Prossimo step: python scripts/scan_resources.py --client {args.client}") if __name__ == '__main__': main()