From 95b43a174137950da15151f1d181e9ddeedd6f64 Mon Sep 17 00:00:00 2001 From: Nick Krzemienski Date: Tue, 14 Jan 2025 10:29:59 -0500 Subject: [PATCH] WIP: updating readme and backend to use scdl --- README.md | 90 ++++++++++++++++++++---- backend/Dockerfile | 4 ++ backend/app/tasks.py | 145 ++++++++++++++++++++++++++++++--------- backend/requirements.txt | 3 + docker-compose.yml | 17 +++++ 5 files changed, 214 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index b822a07..f52a734 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ FlacJacket is a web application that analyzes long audio files (like DJ mixes an - Track metadata display - High-quality track downloads - Real-time analysis status updates +- Task monitoring dashboard ## Tech Stack @@ -17,24 +18,48 @@ FlacJacket is a web application that analyzes long audio files (like DJ mixes an - Flask (Python web framework) - SQLAlchemy (Database ORM) - Celery (Async task processing) -- yt-dlp (YouTube/SoundCloud downloading) +- yt-dlp (YouTube downloading) +- scdl (SoundCloud downloading) - librosa (Audio processing) +- PostgreSQL (Database) +- Redis (Message broker) +- Flower (Celery monitoring) +- Structlog (Structured logging) ### Frontend - Next.js 14 - TypeScript - Tailwind CSS -- Axios for API calls +- Material-UI components +- React Hooks for state management ## Setup ### Prerequisites -- Python 3.8+ -- Node.js 18+ -- Redis (for Celery) -- PostgreSQL (recommended) or SQLite +- Docker and Docker Compose +- Git -### Backend Setup +### Quick Start with Docker + +1. Clone the repository: +```bash +git clone https://github.com/yourusername/flacjacket.git +cd flacjacket +``` + +2. Start the application: +```bash +docker compose up -d +``` + +The services will be available at: +- Frontend: http://localhost:3003 +- Backend API: http://localhost:5001 +- Task Monitor: http://localhost:5555 + +### Manual Development Setup + +#### Backend Setup 1. Create a Python virtual environment: ```bash @@ -69,7 +94,12 @@ flask run celery -A app.celery worker --loglevel=info ``` -### Frontend Setup +7. Start Flower monitoring (optional): +```bash +celery -A app.celery flower +``` + +#### Frontend Setup 1. Install dependencies: ```bash @@ -82,15 +112,47 @@ npm install npm run dev ``` -The application will be available at http://localhost:3000 - ## API Endpoints -- `POST /api/analyze` - Start a new analysis +### Analysis +- `POST /api/analysis` - Start a new analysis + ```json + { + "url": "https://soundcloud.com/example/track" + } + ``` - `GET /api/analysis/:id` - Get analysis status and results -- `GET /api/analysis` - List all analyses -- `GET /api/tracks/:id` - Get track details -- `GET /api/tracks/:id/download` - Download a track +- `GET /api/analyses` - List all analyses +- `DELETE /api/analysis/:id` - Delete an analysis + +### Health Check +- `GET /api/health` - Check API health status + +## Architecture + +The application is composed of several Docker containers: +- `frontend`: Next.js web application +- `backend`: Flask API server +- `celery_worker`: Async task processor for audio analysis +- `postgres`: PostgreSQL database +- `redis`: Message broker for Celery +- `flower`: Celery task monitoring dashboard + +### Monitoring and Logging + +The application includes comprehensive monitoring and logging: + +1. **Task Monitor Dashboard** + - Access at http://localhost:5555 + - Real-time task status and progress + - Historical task data and statistics + - Worker status and resource usage + +2. **Structured Logging** + - Detailed task execution logs + - Error tracking and debugging information + - Performance metrics + - Audit trail for all operations ## Contributing diff --git a/backend/Dockerfile b/backend/Dockerfile index 957893c..a3c4f8d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -7,12 +7,16 @@ RUN apt-get update && apt-get install -y \ ffmpeg \ libpq-dev \ gcc \ + git \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +# Install scdl from source for latest version +RUN pip install git+https://github.com/flyingrub/scdl.git + # Copy application code COPY . . diff --git a/backend/app/tasks.py b/backend/app/tasks.py index 29c2c6e..e69f437 100644 --- a/backend/app/tasks.py +++ b/backend/app/tasks.py @@ -4,47 +4,120 @@ import yt_dlp import librosa import numpy as np +from urllib.parse import urlparse +import subprocess +import structlog +from celery.utils.log import get_task_logger from .extensions import celery, db from .models import Analysis, Track +# Set up structured logging +logger = structlog.wrap_logger(get_task_logger(__name__)) + +def is_soundcloud_url(url): + parsed = urlparse(url) + return 'soundcloud.com' in parsed.netloc + def download_audio(url, output_path): - ydl_opts = { - 'format': 'bestaudio/best', - 'outtmpl': output_path, - 'postprocessors': [{ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'wav', - }], - } - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.download([url]) + log = logger.bind(url=url, output_path=output_path) + log.info('starting_audio_download') + + if is_soundcloud_url(url): + log.info('using_soundcloud_downloader') + # Use scdl for SoundCloud URLs + try: + subprocess.run([ + 'scdl', + '-l', url, # URL to download + '--path', os.path.dirname(output_path), # Download directory + '--onlymp3', # Only download MP3 format + '--name-format', os.path.basename(output_path), # Output filename + '--no-playlist', # Don't download playlists + ], check=True) + log.info('soundcloud_download_complete') + + # Convert downloaded MP3 to WAV using ffmpeg + log.info('converting_to_wav') + subprocess.run([ + 'ffmpeg', + '-i', output_path + '.mp3', + '-acodec', 'pcm_s16le', + '-ar', '44100', + output_path + '.wav' + ], check=True) + log.info('wav_conversion_complete') + + # Remove the MP3 file + os.remove(output_path + '.mp3') + log.info('cleanup_complete') + + except subprocess.CalledProcessError as e: + log.error('soundcloud_download_failed', error=str(e)) + raise Exception(f"Failed to download from SoundCloud: {str(e)}") + else: + log.info('using_youtube_downloader') + # Use yt-dlp for other URLs (YouTube, etc.) + ydl_opts = { + 'format': 'bestaudio/best', + 'outtmpl': output_path, + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'wav', + }], + 'logger': logger.bind(context='yt-dlp'), + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + try: + ydl.download([url]) + log.info('youtube_download_complete') + except Exception as e: + log.error('youtube_download_failed', error=str(e)) + raise Exception(f"Failed to download from YouTube: {str(e)}") def analyze_audio(file_path): - # Load the audio file - y, sr = librosa.load(file_path) - - # Perform onset detection - onset_frames = librosa.onset.onset_detect(y=y, sr=sr) - onset_times = librosa.frames_to_time(onset_frames, sr=sr) - - # Use onset times to segment the audio - segments = [] - for i in range(len(onset_times) - 1): - start_time = onset_times[i] - end_time = onset_times[i + 1] - if end_time - start_time > 30: # Only consider segments longer than 30 seconds - segments.append({ - 'start_time': float(start_time), - 'end_time': float(end_time), - 'confidence': 0.8 # Placeholder confidence - }) + log = logger.bind(file_path=file_path) + log.info('starting_audio_analysis') - return segments + try: + # Load the audio file + log.info('loading_audio_file') + y, sr = librosa.load(file_path) + + # Perform onset detection + log.info('performing_onset_detection') + onset_frames = librosa.onset.onset_detect(y=y, sr=sr) + onset_times = librosa.frames_to_time(onset_frames, sr=sr) + + # Use onset times to segment the audio + log.info('segmenting_audio') + segments = [] + for i in range(len(onset_times) - 1): + start_time = onset_times[i] + end_time = onset_times[i + 1] + if end_time - start_time > 30: # Only consider segments longer than 30 seconds + segments.append({ + 'start_time': float(start_time), + 'end_time': float(end_time), + 'confidence': 0.8 # Placeholder confidence + }) + + log.info('analysis_complete', segments_count=len(segments)) + return segments + except Exception as e: + log.error('analysis_failed', error=str(e)) + raise -@celery.task -def process_audio_url(analysis_id): +@celery.task(bind=True) +def process_audio_url(self, analysis_id): + log = logger.bind( + analysis_id=analysis_id, + task_id=self.request.id, + ) + log.info('starting_audio_processing') + analysis = Analysis.query.get(analysis_id) if not analysis: + log.error('analysis_not_found') return analysis.status = 'processing' @@ -53,14 +126,22 @@ def process_audio_url(analysis_id): try: # Create temporary directory for processing with tempfile.TemporaryDirectory() as temp_dir: + log.info('created_temp_directory', path=temp_dir) + # Download the audio output_path = os.path.join(temp_dir, 'audio') + log.info('downloading_audio') download_audio(analysis.url, output_path) + # Update task state + self.update_state(state='ANALYZING') + # Analyze the audio file + log.info('analyzing_audio') segments = analyze_audio(output_path + '.wav') # Create track entries + log.info('creating_track_entries') for i, segment in enumerate(segments): track = Track( analysis_id=analysis_id, @@ -73,8 +154,10 @@ def process_audio_url(analysis_id): analysis.status = 'completed' analysis.completed_at = datetime.utcnow() + log.info('processing_completed') except Exception as e: + log.error('processing_failed', error=str(e)) analysis.status = 'failed' analysis.error_message = str(e) diff --git a/backend/requirements.txt b/backend/requirements.txt index 175b82a..ae50413 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -14,3 +14,6 @@ pytest-flask==1.3.0 celery==5.3.6 redis==5.0.1 gunicorn==21.2.0 +scdl==2.7.3 +flower==2.0.1 +structlog==23.2.0 diff --git a/docker-compose.yml b/docker-compose.yml index effa147..5fc86d6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,10 +44,25 @@ services: volumes: - ./backend:/app - backend_uploads:/app/uploads + - celery_logs:/var/log/celery depends_on: - postgres - redis + flower: + build: ./backend + command: celery -A app.celery flower --port=5555 + environment: + CELERY_BROKER_URL: redis://redis:6379/0 + CELERY_RESULT_BACKEND: redis://redis:6379/0 + ports: + - "5555:5555" + volumes: + - flower_data:/data + depends_on: + - redis + - celery_worker + frontend: build: ./frontend environment: @@ -60,3 +75,5 @@ services: volumes: postgres_data: backend_uploads: + celery_logs: + flower_data: