40 Commits
v1.0L ... web

Author SHA1 Message Date
26c460f3de Update tooltip implementation and add TooltipContent and TooltipTrigger components 2025-07-12 01:43:34 -03:00
46de6b755a Fix theme 2025-07-12 01:32:13 -03:00
Francisco Pessano
8179ee4a2d Updated package.json 2025-07-12 01:28:44 -03:00
Francisco Pessano
b318550a29 Add dark mode, tooltips, and fix column alignment 2025-07-12 01:28:21 -03:00
Francisco Pessano
c34044d8a1 Updated pnpm-lock.yaml 2025-07-12 01:23:44 -03:00
4dfbc0c26f Refactor VideoClassifierApp to use props for videos and update VirtualTable to utilize @tanstack/react-virtual 2025-07-12 01:21:50 -03:00
3b3dcbe1d8 Fix react rendering 2025-07-12 00:50:38 -03:00
Francisco Pessano
09f34a71cd Fix VirtualizerOptions import 2025-07-12 00:40:36 -03:00
Francisco Pessano
59c4412122 Updated package.json 2025-07-12 00:40:02 -03:00
Francisco Pessano
ed8ad48310 YouTube Video Classifier Platform 2025-07-12 00:37:07 -03:00
Francisco Pessano
5c59b2a301 Updated pnpm-lock.yaml 2025-07-12 00:27:49 -03:00
e6e2f5f9cd Init Astro + Tailwind + Shadcn-ui 2025-07-12 00:25:57 -03:00
fa6007c1f3 Revise README to reflect YouTube Video Classifier features and setup instructions 2025-07-12 00:25:57 -03:00
89314f9c74 Enhance logging and configuration for YouTube Video Classifier
- Added fallback model configuration in config.ini
- Updated requirements.txt to include rich for enhanced logging
- Refactored script.py to implement rich logging for better visibility
- Modified functions to include channel link extraction and updated CSV saving logic
- Improved error handling and user feedback throughout the script
2025-07-12 00:25:56 -03:00
8c4177dca0 Add language detection and detailed sub-tags generation for YouTube videos 2025-07-12 00:25:56 -03:00
7cf2b903a8 Update requirements and refactor script for video classification automation
- Updated pynput version in requirements.txt
- Refactored script.py to enhance video classification functionality using Ollama
- Added methods for video information extraction, classification, and CSV handling
- Improved browser initialization and error handling
2025-07-12 00:25:53 -03:00
bb86ef17f3 Remove unused image files from the project 2025-07-12 00:25:48 -03:00
5e10b5a8b6 Remove Dockerfile, docker-compose.yml, and entrypoint script for YouTube Video Classifier 2025-07-12 00:25:47 -03:00
cfc2301cb2 Update Ollama host in configuration file to use localhost 2025-07-12 00:25:47 -03:00
da8aee58f4 Add .gitignore to exclude virtual environment directory 2025-07-12 00:25:47 -03:00
f69a8e78f9 Remove devcontainer usage 2025-07-12 00:25:40 -03:00
a770350f48 Add configuration file for YouTube Video Classifier 2025-07-12 00:25:40 -03:00
15e9a5dec8 Add setup script for Qwen2.5VL model in Ollama container 2025-07-12 00:25:40 -03:00
fd4645b710 Add test script for verifying Ollama connection and Qwen2.5-VL model 2025-07-12 00:25:40 -03:00
ae4ec9b96e Docker and scripts 2025-07-12 00:25:40 -03:00
0b98aa9799 Add demo script for YouTube video classification and include sample thumbnail 2025-07-12 00:25:40 -03:00
19c25908da Devcontainer setup 2025-07-12 00:25:40 -03:00
Francisco Pessano
f94ae9c31a Added .gitignore 2025-07-12 00:23:34 -03:00
Emi
78f971ec59 new comment 2025-07-07 19:27:53 -03:00
Emi
762540437f fixing one reference error 2025-07-07 17:43:12 -03:00
Emi
62d39224ca update readme 2025-07-07 17:42:41 -03:00
Emi
8972dc6aa0 relevant changes to fix errors 2025-07-07 15:38:39 -03:00
Emi
d2570ac709 new images 2025-07-07 15:37:34 -03:00
Emi
cbe20e3800 changes on readme 2025-07-06 20:39:01 -03:00
Emi
82e147ece5 changes on readme 2025-07-05 23:37:56 -03:00
Emi
cccd556675 refactor on readme 2025-07-05 23:34:45 -03:00
Emi
72b8b39076 fix error on browser location error 2025-07-05 23:21:38 -03:00
Emi
08dc823afb readme to linux branch 2025-07-05 23:18:00 -03:00
Emi
a1b11a2265 little changes on script 2025-07-05 23:16:40 -03:00
Emi
fa5df4c16c first commit to linux 2025-07-05 22:47:37 -03:00
43 changed files with 8026 additions and 36 deletions

28
.gitignore vendored Normal file
View File

@@ -0,0 +1,28 @@
venv
temp_thumbnail.png
video_classifications.csv
# build output
dist/
# generated types
.astro/
# dependencies
node_modules/
# logs
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# environment variables
.env
.env.production
# macOS-specific files
.DS_Store
# jetbrains setting folder
.idea/

5
.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"recommendations": [
"astro-build.astro-vscode"
]
}

214
README.md Normal file
View File

@@ -0,0 +1,214 @@
# YouTube Video Classifier
An AI-powered tool that automatically classifies YouTube videos in your "Watch Later" playlist based on their titles and thumbnails using vision-language models through Ollama.
## Features ✨
- 🤖 **AI-Powered Classification**: Uses Ollama with Qwen2.5-VL and fallback models to analyze video titles and thumbnails
- 🔄 **Robust LLM Integration**: Automatic fallback between models with increasing timeouts for reliability
- 📊 **Comprehensive CSV Storage**: Saves detailed video information including classifications, metadata, and thumbnails
- 🌐 **Multi-language Detection**: Automatically detects video language using AI
- 🏷️ **Smart Tagging**: Generates detailed sub-tags for better content organization
- 🎯 **Smart Categories**: Uses existing classifications or creates new ones automatically
- 🖥️ **Browser Automation**: Selenium-based interaction with YouTube for reliable data extraction
- 🎨 **Beautiful Logging**: Rich console output with colors and emojis for better UX
- ⌨️ **Easy Control**: Press 'q' at any time to safely quit the process
## Quick Start
### Prerequisites
- Python 3.11.10+
- Ollama installed locally
- Chrome or Chromium browser
### Setup
1. **Install Ollama**: Download from [https://ollama.ai](https://ollama.ai)
2. **Pull Required Models**:
```bash
ollama pull qwen2.5vl:7b
ollama pull gemma2:2b
```
3. **Start Ollama Service**:
```bash
ollama serve
```
4. **Clone and Setup Project**:
```bash
git clone <repository-url>
cd youtube-video-classifier
# Create virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install dependencies
pip install -r requirements.txt
```
5. **Configure Settings** (optional):
Edit `config.ini` to customize your setup
6. **Run the Classifier**:
```bash
python script.py
```
## How It Works 🔄
1. **Browser Initialization**: Opens Chrome/Chromium and navigates to your YouTube "Watch Later" playlist
2. **Video Detection**: Finds and extracts information from playlist videos using Selenium
3. **Data Extraction**: Captures video title, thumbnail, channel info, duration, and upload date
4. **AI Analysis**: Uses Ollama models to:
- Classify the video into categories
- Detect the primary language
- Generate detailed sub-tags
5. **Smart Fallback**: If primary model fails/times out, automatically switches to fallback model
6. **Data Storage**: Saves all information to CSV with base64-encoded thumbnails
7. **Playlist Management**: Removes processed videos from "Watch Later" playlist
8. **Continuous Processing**: Continues until all videos are processed or user quits
## Configuration
The `config.ini` file allows you to customize various settings:
```ini
[DEFAULT]
# Ollama settings
ollama_host = http://localhost:11434
ollama_model = qwen2.5vl:7b
ollama_fallback_model = gemma2:2b
# File paths
classifications_csv = video_classifications.csv
playlist_url = https://www.youtube.com/playlist?list=WL
# LLM timeout settings (in seconds)
llm_primary_timeout = 60
llm_fallback_timeout = 60
# Processing settings
enable_delete = false
enable_playlist_creation = false
```
## CSV Output Format 📋
The script creates a comprehensive CSV file with the following columns:
- `video_title`: Title of the video
- `video_url`: YouTube URL of the video
- `thumbnail_url`: Path to the saved thumbnail
- `classification`: AI-generated category
- `language`: Detected language of the video
- `channel_name`: Name of the YouTube channel
- `channel_link`: URL to the channel
- `video_length_seconds`: Duration in seconds
- `video_date`: Upload date
- `detailed_subtags`: AI-generated specific tags
- `playlist_name`: Source playlist name
- `playlist_link`: Source playlist URL
- `image_data`: Base64-encoded thumbnail data
- `timestamp`: When the classification was made
## File Structure 📁
```
├── script.py # Main classification script
├── config.ini # Configuration settings
├── requirements.txt # Python dependencies
├── video_classifications.csv # Generated results (created when first run)
└── README.md # This file
```
## Features in Detail
### AI Classification System
- **Primary Model**: Qwen2.5-VL 7B for high-quality vision-language analysis
- **Fallback Model**: Gemma2 2B for faster processing when primary model is slow
- **Timeout Management**: Automatically increases timeout periods if models are struggling
- **Continuous Retry**: Keeps trying until successful or user cancels
### Data Extraction
- **Video Metadata**: Title, URL, duration, upload date
- **Channel Information**: Name and link to channel
- **Thumbnail Capture**: Screenshots saved as base64 in CSV
- **Playlist Context**: Source playlist name and URL
### Browser Automation
- **Multiple Chrome Paths**: Automatically finds Chrome/Chromium installation
- **WebDriver Management**: Handles chromedriver setup and fallbacks
- **Robust Selectors**: Multiple CSS selectors for reliable element finding
- **Error Recovery**: Graceful handling of UI changes and loading delays
### User Experience
- **Rich Console Output**: Colored logging with emojis and status indicators
- **Progress Tracking**: Clear indication of current processing status
- **Safe Exit**: Press 'q' at any time to cleanly stop processing
- **Error Reporting**: Detailed error messages for troubleshooting
## Testing Your Setup
Before running the main script, you can test individual components:
1. **Test Ollama Connection**:
```python
import requests
response = requests.get('http://localhost:11434/api/tags')
print(response.json())
```
2. **Test Browser Automation**:
Run the script and check if Chrome opens correctly
3. **Test Model Response**:
The script will verify model availability on startup
## Troubleshooting 🔧
### Common Issues
**Ollama Connection Error**:
- Ensure Ollama is running: `ollama serve`
- Check the host URL in config.ini
- Verify models are installed: `ollama list`
**Browser Issues**:
- Install Chrome or Chromium
- Update chromedriver if needed
- Check if browser is in PATH
**Model Timeout**:
- The script automatically handles timeouts with fallback
- Consider increasing timeout values in config.ini
- Ensure sufficient system resources
**Selenium Errors**:
- YouTube may have changed their HTML structure
- Check for browser updates
- Verify you're logged into YouTube
### Performance Tips
- **For faster processing**: Use smaller models like `gemma2:2b` as primary
- **For better accuracy**: Use larger models like `qwen2.5vl:7b` as primary
- **For stability**: Keep both models installed for automatic fallback
- **For large playlists**: Consider running in smaller batches
## Contributing
1. Fork the repository
2. Create a feature branch
3. Test your changes thoroughly
4. Submit a pull request
## License
MIT License - see LICENSE file for details
---
**Note**: This tool is for personal use and educational purposes. Please respect YouTube's Terms of Service and rate limits.

30
config.ini Normal file
View File

@@ -0,0 +1,30 @@
# Configuration file for YouTube Video Classifier
[DEFAULT]
# Ollama settings
ollama_host = http://localhost:11434
ollama_model = qwen2.5vl:7b
ollama_fallback_model = gemma2:2b
# File paths
classifications_csv = video_classifications.csv
browser_image = brave.png
# YouTube settings
playlist_url = https://www.youtube.com/playlist?list=WL
# Image recognition settings
confidence_threshold = 0.8
search_timeout = 0.5
sleep_duration = 0.2
# Processing settings
restart_tab_frequency = 90
enable_delete = false
enable_playlist_creation = false
# LLM timeout settings (in seconds)
llm_primary_timeout = 60
llm_fallback_timeout = 60

140
demo_classification.py Normal file
View File

@@ -0,0 +1,140 @@
#!/usr/bin/env python3
"""
Demo script showing how the video classification works
"""
import requests
import base64
import time
import configparser
config = configparser.ConfigParser()
config.read('config.ini')
ollama_host = config.get('DEFAULT', 'ollama_host', fallback='http://ollama:11434')
def classify_demo_video(video_obj):
"""Demonstrate video classification."""
try:
# If there's a thumbnail, convert image to base64
if video_obj.get('thumbnail'):
with open(video_obj['thumbnail'], "rb") as image_file:
print(image_file.read())
image_data = base64.b64encode(image_file.read()).decode('utf-8')
print(image_data)
else:
image_data = None
existing_classifications = ["Tech Reviews", "Cooking", "Gaming", "Music"]
prompt = f"""
Please classify this YouTube video based on its title and thumbnail.
Video Title: {video_obj['title']}
Existing Classifications: {", ".join(existing_classifications)}
Instructions:
1. If the video fits into one of the existing classifications, use that exact classification name.
2. If the video doesn't fit any existing classification, create a new appropriate classification name.
3. Classification names should be concise (1-3 words) and descriptive.
4. Examples of good classifications: "Tech Reviews", "Cooking", "Gaming", "Education", "Music", "Comedy", etc.
5. Respond with ONLY the classification name, nothing else.
"""
print(f"Classifying: '{video_obj['title']}'")
print(f"Using thumbnail: {video_obj.get('thumbnail', 'None')}")
print("Sending request to Ollama...")
# Prepare the request payload
payload = {
'model': 'qwen2.5vl:7b',
'prompt': prompt,
'stream': False
}
# Only include images if image_data is available
if image_data:
payload['images'] = [image_data]
response = requests.post(
f'{ollama_host}/api/generate',
json=payload,
timeout=60
)
if response.status_code == 200:
result = response.json()
classification = result['response'].strip().strip('"\'')
print(f"✅ Classification: '{classification}'")
return classification
else:
print(f"❌ Error: {response.status_code}")
return "Uncategorized"
except Exception as e:
print(f"❌ Error: {e}")
return "Uncategorized"
def run_demo():
"""Run classification demo with sample videos."""
sample_videos = [
{
"title": "I can't believe this change!",
"thumbnail": "img/iphone_thumbnail.png"
},
{
"title": "iPhone 15 Pro Review - Best Camera Phone?"
},
{
"title": "Easy Pasta Recipe for Beginners",
},
{
"title": "Minecraft Survival Guide - Episode 1",
},
{
"title": "Classical Piano Music for Studying",
},
{
"title": "Machine Learning Explained Simply",
},
]
print("YouTube Video Classification Demo")
print("=" * 40)
results = []
for i, video_obj in enumerate(sample_videos, 1):
print(f"\n--- Demo {i}/{len(sample_videos)} ---")
# Classify the video
classification = classify_demo_video(video_obj)
results.append((video_obj['title'], classification))
time.sleep(1) # Be nice to the API
print("\n" + "=" * 40)
print("DEMO RESULTS:")
print("=" * 40)
for title, classification in results:
print(f"{classification:15} | {title}")
print("\nDemo complete! The script can:")
print("• Use existing categories when appropriate")
print("• Create new categories for unique content")
print("• Analyze both title and thumbnail information")
if __name__ == '__main__':
print(ollama_host)
# Check if Ollama is running
try:
response = requests.get(f'{ollama_host}/api/tags', timeout=5)
if response.status_code != 200:
print("❌ Ollama is not running. Please start it with: ollama serve")
exit(1)
except:
print("❌ Cannot connect to Ollama. Please start it with: ollama serve")
exit(1)
run_demo()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.3 KiB

BIN
img/iphone_thumbnail.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 456 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 348 B

View File

@@ -2,4 +2,13 @@
opencv-python==4.11.0.86
pillow==11.3.0
PyAutoGUI==0.9.54
keyboard==0.13.5
pynput==1.8.1
requests==2.31.0
pandas~=2.3.1
ollama==0.2.1
configparser==6.0.0
pyperclip==1.8.2
pytesseract==0.3.10
selenium==4.15.2
webdriver-manager==4.0.1
rich==13.8.0

1316
script.py

File diff suppressed because it is too large Load Diff

63
setup.sh Executable file
View File

@@ -0,0 +1,63 @@
#!/bin/bash
# YouTube Video Classifier Setup Script
echo "🎬 YouTube Video Classifier Setup"
echo "=================================="
# Check if Python 3.11 is available
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3.11 not found. Please install Python 3.11.10"
exit 1
fi
echo "✅ Python 3.11 found"
# Create virtual environment
echo "📦 Creating virtual environment..."
python3 -m venv venv
# Activate virtual environment
echo "🔧 Activating virtual environment..."
source venv/bin/activate
# Install requirements
echo "📥 Installing Python dependencies..."
pip install -r requirements.txt
# Check if Ollama is installed
if ! command -v ollama &> /dev/null; then
echo "❌ Ollama not found. Please install Ollama from https://ollama.ai"
echo " After installation, run:"
echo " 1. ollama serve"
echo " 2. ollama pull qwen2.5-vl:7b"
exit 1
fi
echo "✅ Ollama found"
# Check if Ollama is running
if ! curl -s http://localhost:11434/api/tags &> /dev/null; then
echo "⚠️ Ollama is not running. Starting Ollama..."
ollama serve &
sleep 5
fi
# Pull Qwen2.5VL model
echo "🤖 Pulling Qwen2.5VL model..."
ollama pull qwen2.5vl:7b
# Test setup
echo "🧪 Testing setup..."
python test_ollama.py
echo "✅ Setup complete!"
echo ""
echo "Next steps:"
echo "1. Make sure your browser is pinned to the taskbar"
echo "2. Update the browser image in img/ folder if needed"
echo "3. Run: python script.py"
echo ""
echo "Optional:"
echo "- Run demo: python demo_classification.py"
echo "- Analyze results: python playlist_manager.py --analyze"

114
setup_model.py Normal file
View File

@@ -0,0 +1,114 @@
#!/usr/bin/env python3
"""
Script to ensure the Qwen2.5VL model is available in the Ollama container
"""
import configparser
import time
import sys
import requests
def load_config():
"""Load configuration from config.ini"""
config = configparser.ConfigParser()
config.read('config.ini')
ollama_host = config.get('DEFAULT', 'ollama_host', fallback='http://ollama:11434')
ollama_model = config.get('DEFAULT', 'ollama_model', fallback='qwen2.5vl:7b')
return ollama_host, ollama_model
def wait_for_ollama(host, max_attempts=30):
"""Wait for Ollama container to be ready"""
print(f"⏳ Waiting for Ollama container at {host}...")
for attempt in range(1, max_attempts + 1):
try:
response = requests.get(f"{host}/api/tags", timeout=5)
if response.status_code == 200:
print("✅ Ollama container is ready!")
return True
except requests.exceptions.RequestException:
pass
print(f" Attempt {attempt}/{max_attempts} - waiting...")
time.sleep(2)
print("❌ Ollama container is not responding after maximum attempts")
return False
def check_model_exists(host, model_name):
"""Check if the model is already available"""
try:
response = requests.get(f"{host}/api/tags", timeout=5)
if response.status_code == 200:
models = response.json()
model_names = [model['name'] for model in models.get('models', [])]
return any(model_name in name for name in model_names), model_names
return False, []
except requests.exceptions.RequestException as e:
print(f"❌ Error checking models: {e}")
return False, []
def pull_model(host, model_name):
"""Pull the model from Ollama"""
print(f"📥 Pulling model '{model_name}' (this may take several minutes)...")
try:
response = requests.post(
f"{host}/api/pull",
json={"name": model_name},
timeout=600 # 10 minutes timeout
)
if response.status_code == 200:
print(f"✅ Successfully pulled model '{model_name}'")
return True
else:
print(f"❌ Failed to pull model: HTTP {response.status_code}")
print(f"Response: {response.text}")
return False
except requests.exceptions.RequestException as e:
print(f"❌ Error pulling model: {e}")
return False
def main():
"""Main function to set up the model"""
print("🔧 Model Setup for YouTube Video Classifier")
print("=" * 50)
# Load configuration
try:
ollama_host, ollama_model = load_config()
print("📋 Configuration:")
print(f" Host: {ollama_host}")
print(f" Model: {ollama_model}")
print()
except Exception as e:
print(f"❌ Failed to load configuration: {e}")
sys.exit(1)
# Wait for Ollama to be ready
if not wait_for_ollama(ollama_host):
sys.exit(1)
# Check if model exists
model_exists, available_models = check_model_exists(ollama_host, ollama_model)
if model_exists:
print(f"✅ Model '{ollama_model}' is already available!")
else:
print(f"📋 Available models: {available_models}")
print(f"❌ Model '{ollama_model}' not found")
if pull_model(ollama_host, ollama_model):
print(f"🎉 Model '{ollama_model}' is now ready for use!")
else:
print(f"❌ Failed to set up model '{ollama_model}'")
sys.exit(1)
print("\n🎬 YouTube Video Classifier is ready!")
print("🧪 Run 'python test_ollama.py' to verify the setup")
if __name__ == "__main__":
main()

99
test_ollama.py Normal file
View File

@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""
Test script to verify Ollama connection and Qwen2.5-VL model
"""
import requests
import configparser
def load_config():
"""Load configuration from config.ini"""
config = configparser.ConfigParser()
config.read('config.ini')
ollama_host = config.get('DEFAULT', 'ollama_host', fallback='http://ollama:11434')
ollama_model = config.get('DEFAULT', 'ollama_model', fallback='qwen2.5vl:7b')
return ollama_host, ollama_model
def test_ollama_connection(host, model_name):
"""Test if Ollama is running and accessible."""
try:
response = requests.get(f'{host}/api/tags', timeout=5)
if response.status_code == 200:
models = response.json()
print("✅ Ollama is running!")
model_names = [model['name'] for model in models.get('models', [])]
print(f"Available models: {model_names}")
# Check if the configured model is available
model_available = any(model_name in name for name in model_names)
if model_available:
print(f"{model_name} model is available!")
else:
print(f"{model_name} model not found. Available models: {model_names}")
print(f"Model may still be downloading. Check with: curl {host}/api/tags")
return True
else:
print(f"❌ Ollama responded with status code: {response.status_code}")
return False
except requests.exceptions.ConnectionError:
print("❌ Cannot connect to Ollama container. Is the ollama service running?")
print("💡 Try: docker-compose up -d ollama")
return False
except Exception as e:
print(f"❌ Error checking Ollama: {e}")
return False
def test_classification(host, model_name):
"""Test a simple classification without image."""
try:
response = requests.post(
f'{host}/api/generate',
json={
'model': model_name,
'prompt': 'Classify this video title into a category: "How to Cook Pasta - Italian Recipe Tutorial". Respond with only the category name.',
'stream': False
},
timeout=30
)
if response.status_code == 200:
result = response.json()
classification = result['response'].strip()
print(f"✅ Test classification successful: '{classification}'")
return True
else:
print(f"❌ Classification test failed: {response.status_code}")
print(f"Response: {response.text}")
return False
except Exception as e:
print(f"❌ Error testing classification: {e}")
return False
if __name__ == '__main__':
print("Testing Ollama setup for YouTube Video Classifier...")
print("-" * 50)
# Load configuration
try:
ollama_host, ollama_model = load_config()
print(f"📋 Configuration:")
print(f" Host: {ollama_host}")
print(f" Model: {ollama_model}")
print()
except Exception as e:
print(f"❌ Failed to load configuration: {e}")
exit(1)
if test_ollama_connection(ollama_host, ollama_model):
print("\nTesting classification...")
test_classification(ollama_host, ollama_model)
print("\nSetup verification complete!")
print("\nIf all tests passed, you can run the main script with: python script.py")
print("If any tests failed, please:")
print("1. Make sure the ollama container is running: docker-compose up -d ollama")
print(f"2. Wait for the model to download: curl {ollama_host}/api/tags")
print("3. Check container logs: docker-compose logs ollama")

13
web/README.md Normal file
View File

@@ -0,0 +1,13 @@
# Astro with Tailwind
```sh
pnpm create astro@latest -- --template with-tailwindcss
```
[![Open in StackBlitz](https://developer.stackblitz.com/img/open_in_stackblitz.svg)](https://stackblitz.com/github/withastro/astro/tree/latest/examples/with-tailwindcss)
[![Open with CodeSandbox](https://assets.codesandbox.io/github/button-edit-lime.svg)](https://codesandbox.io/p/sandbox/github/withastro/astro/tree/latest/examples/with-tailwindcss)
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/withastro/astro?devcontainer_path=.devcontainer/with-tailwindcss/devcontainer.json)
Astro comes with [Tailwind](https://tailwindcss.com) support out of the box. This example showcases how to style your Astro project with Tailwind.
For complete setup instructions, please see our [Tailwind Integration Guide](https://docs.astro.build/en/guides/integrations-guide/tailwind).

16
web/astro.config.mjs Normal file
View File

@@ -0,0 +1,16 @@
// @ts-check
import { defineConfig } from 'astro/config';
import tailwindcss from '@tailwindcss/vite';
import react from '@astrojs/react';
// https://astro.build/config
export default defineConfig({
vite: {
plugins: [tailwindcss()]
},
integrations: [react({
experimentalDisableStreaming: true,
})]
});

21
web/components.json Normal file
View File

@@ -0,0 +1,21 @@
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "new-york",
"rsc": false,
"tsx": true,
"tailwind": {
"config": "",
"css": "src/styles/global.css",
"baseColor": "neutral",
"cssVariables": true,
"prefix": ""
},
"aliases": {
"components": "@/components",
"utils": "@/lib/utils",
"ui": "@/components/ui",
"lib": "@/lib",
"hooks": "@/hooks"
},
"iconLibrary": "lucide"
}

37
web/package.json Normal file
View File

@@ -0,0 +1,37 @@
{
"name": "web",
"type": "module",
"version": "0.0.1",
"scripts": {
"dev": "astro dev",
"build": "astro build",
"preview": "astro preview",
"astro": "astro"
},
"dependencies": {
"@astrojs/mdx": "^4.3.0",
"@astrojs/react": "^4.3.0",
"@base-ui-components/react": "1.0.0-beta.1",
"@radix-ui/react-slot": "^1.2.3",
"@tailwindcss/vite": "^4.1.3",
"@tanstack/react-virtual": "^3.13.12",
"@types/canvas-confetti": "^1.9.0",
"@types/react": "^19.1.8",
"@types/react-dom": "^19.1.6",
"astro": "^5.11.0",
"canvas-confetti": "^1.9.3",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"date-fns": "^4.1.0",
"lucide-react": "^0.525.0",
"next-themes": "^0.4.6",
"papaparse": "^5.5.3",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.3"
},
"devDependencies": {
"tw-animate-css": "^1.3.5"
}
}

4642
web/pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

9
web/public/favicon.svg Normal file
View File

@@ -0,0 +1,9 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 128 128">
<path d="M50.4 78.5a75.1 75.1 0 0 0-28.5 6.9l24.2-65.7c.7-2 1.9-3.2 3.4-3.2h29c1.5 0 2.7 1.2 3.4 3.2l24.2 65.7s-11.6-7-28.5-7L67 45.5c-.4-1.7-1.6-2.8-2.9-2.8-1.3 0-2.5 1.1-2.9 2.7L50.4 78.5Zm-1.1 28.2Zm-4.2-20.2c-2 6.6-.6 15.8 4.2 20.2a17.5 17.5 0 0 1 .2-.7 5.5 5.5 0 0 1 5.7-4.5c2.8.1 4.3 1.5 4.7 4.7.2 1.1.2 2.3.2 3.5v.4c0 2.7.7 5.2 2.2 7.4a13 13 0 0 0 5.7 4.9v-.3l-.2-.3c-1.8-5.6-.5-9.5 4.4-12.8l1.5-1a73 73 0 0 0 3.2-2.2 16 16 0 0 0 6.8-11.4c.3-2 .1-4-.6-6l-.8.6-1.6 1a37 37 0 0 1-22.4 2.7c-5-.7-9.7-2-13.2-6.2Z" />
<style>
path { fill: #000; }
@media (prefers-color-scheme: dark) {
path { fill: #FFF; }
}
</style>
</svg>

After

Width:  |  Height:  |  Size: 749 B

View File

@@ -0,0 +1,19 @@
---
// Click button, get confetti!
// Styled by Tailwind :)
---
<button
class="appearance-none py-2 px-4 bg-purple-500 text-white font-semibold rounded-lg shadow-md hover:bg-purple-700 focus:outline-none focus:ring-2 focus:ring-purple-400 focus:ring-opacity-75"
>
<slot />
</button>
<script>
import confetti from 'canvas-confetti';
const button = document.body.querySelector('button');
if (button) {
button.addEventListener('click', () => confetti());
}
</script>

View File

@@ -0,0 +1,129 @@
import React from 'react';
import { Input } from './ui/input';
import { Select } from './ui/select';
import { Button } from './ui/button';
import { Badge } from './ui/badge';
import type { VideoData } from '../types/video';
import { getUniqueValues } from '../utils/search';
interface SearchAndFilterProps {
data: VideoData[];
searchTerm: string;
onSearchChange: (term: string) => void;
filters: {
classification: string;
language: string;
playlist_name: string;
};
onFilterChange: (filters: any) => void;
totalResults: number;
filteredResults: number;
}
export function SearchAndFilter({
data,
searchTerm,
onSearchChange,
filters,
onFilterChange,
totalResults,
filteredResults
}: SearchAndFilterProps) {
const classifications = getUniqueValues(data, 'classification');
const languages = getUniqueValues(data, 'language');
const playlists = getUniqueValues(data, 'playlist_name');
const clearFilters = () => {
onSearchChange('');
onFilterChange({
classification: 'all',
language: 'all',
playlist_name: 'all'
});
};
const activeFiltersCount = Object.values(filters).filter(value => value !== 'all').length + (searchTerm ? 1 : 0);
return (
<div className="space-y-4 p-6 bg-card border rounded-lg">
<div className="flex items-center justify-between">
<div className="space-y-1">
<h2 className="text-lg font-semibold">Search & Filter</h2>
<p className="text-sm text-muted-foreground">
{filteredResults} of {totalResults} videos
{activeFiltersCount > 0 && (
<Badge variant="secondary" className="ml-2">
{activeFiltersCount} filter{activeFiltersCount !== 1 ? 's' : ''} active
</Badge>
)}
</p>
</div>
{activeFiltersCount > 0 && (
<Button variant="outline" size="sm" onClick={clearFilters}>
Clear All
</Button>
)}
</div>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
{/* Search */}
<div className="space-y-2">
<label className="text-sm font-medium">Search</label>
<Input
placeholder="Search videos, channels, tags..."
value={searchTerm}
onChange={(e) => onSearchChange(e.target.value)}
/>
</div>
{/* Classification Filter */}
<div className="space-y-2">
<label className="text-sm font-medium">Classification</label>
<Select
value={filters.classification}
onChange={(e) => onFilterChange({ ...filters, classification: e.target.value })}
>
<option value="all">All Classifications</option>
{classifications.map((classification) => (
<option key={classification} value={classification}>
{classification}
</option>
))}
</Select>
</div>
{/* Language Filter */}
<div className="space-y-2">
<label className="text-sm font-medium">Language</label>
<Select
value={filters.language}
onChange={(e) => onFilterChange({ ...filters, language: e.target.value })}
>
<option value="all">All Languages</option>
{languages.map((language) => (
<option key={language} value={language}>
{language}
</option>
))}
</Select>
</div>
{/* Playlist Filter */}
<div className="space-y-2">
<label className="text-sm font-medium">Playlist</label>
<Select
value={filters.playlist_name}
onChange={(e) => onFilterChange({ ...filters, playlist_name: e.target.value })}
>
<option value="all">All Playlists</option>
{playlists.map((playlist) => (
<option key={playlist} value={playlist}>
{playlist}
</option>
))}
</Select>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,41 @@
import React from 'react';
import { Badge } from './ui/badge';
import type { VideoData } from '../types/video';
import { getUniqueValues } from '../utils/search';
interface StatsOverviewProps {
data: VideoData[];
}
export function StatsOverview({ data }: StatsOverviewProps) {
const totalVideos = data.length;
const totalDuration = data.reduce((sum, video) => sum + video.video_length_seconds, 0);
const uniqueChannels = getUniqueValues(data, 'channel_name').length;
const uniqueClassifications = getUniqueValues(data, 'classification').length;
const uniqueLanguages = getUniqueValues(data, 'language').length;
const formatTotalDuration = (seconds: number) => {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
return `${hours}h ${minutes}m`;
};
const stats = [
{ label: 'Total Videos', value: totalVideos.toLocaleString() },
{ label: 'Total Duration', value: formatTotalDuration(totalDuration) },
{ label: 'Channels', value: uniqueChannels },
{ label: 'Categories', value: uniqueClassifications },
{ label: 'Languages', value: uniqueLanguages },
];
return (
<div className="grid grid-cols-2 md:grid-cols-5 gap-4">
{stats.map((stat) => (
<div key={stat.label} className="text-center p-4 bg-card border rounded-lg">
<div className="text-2xl font-bold text-primary">{stat.value}</div>
<div className="text-sm text-muted-foreground">{stat.label}</div>
</div>
))}
</div>
);
}

View File

@@ -0,0 +1,75 @@
import React, { createContext, useContext, useEffect, useState } from 'react'
type Theme = 'dark' | 'light' | 'system'
type ThemeProviderProps = {
children: React.ReactNode
defaultTheme?: Theme
storageKey?: string
}
type ThemeProviderState = {
theme: Theme
setTheme: (theme: Theme) => void
}
const initialState: ThemeProviderState = {
theme: 'system',
setTheme: () => null,
}
const ThemeProviderContext = createContext<ThemeProviderState>(initialState)
export function ThemeProvider({
children,
defaultTheme = 'system',
storageKey = 'vite-ui-theme',
...props
}: ThemeProviderProps) {
const [theme, setTheme] = useState<Theme>(
() => (localStorage ? localStorage.getItem(storageKey) as Theme : undefined) || defaultTheme
)
useEffect(() => {
const root = window.document.documentElement
root.classList.remove('light', 'dark')
if (theme === 'system') {
const systemTheme = window.matchMedia('(prefers-color-scheme: dark)')
.matches
? 'dark'
: 'light'
root.classList.add(systemTheme)
return
}
root.classList.add(theme)
}, [theme])
const value = {
theme,
setTheme: (theme: Theme) => {
if (localStorage) {
localStorage.setItem(storageKey, theme)
}
setTheme(theme)
},
}
return (
<ThemeProviderContext.Provider {...props} value={value}>
{children}
</ThemeProviderContext.Provider>
)
}
export const useTheme = () => {
const context = useContext(ThemeProviderContext)
if (context === undefined)
throw new Error('useTheme must be used within a ThemeProvider')
return context
}

View File

@@ -0,0 +1,21 @@
import React from 'react'
import { Moon, Sun } from 'lucide-react'
import { Button } from './ui/button'
import { useTheme } from './ThemeProvider'
export function ThemeToggle() {
const { theme, setTheme } = useTheme()
return (
<Button
variant="outline"
size="icon"
onClick={() => setTheme(theme === 'light' ? 'dark' : 'light')}
className="relative"
>
<Sun className="h-[1.2rem] w-[1.2rem] rotate-0 scale-100 transition-all dark:-rotate-90 dark:scale-0" />
<Moon className="absolute h-[1.2rem] w-[1.2rem] rotate-90 scale-0 transition-all dark:rotate-0 dark:scale-100" />
<span className="sr-only">Toggle theme</span>
</Button>
)
}

View File

@@ -0,0 +1,136 @@
import React, { useState, useEffect, useMemo } from 'react';
import { VirtualTable } from './VirtualTable';
import { SearchAndFilter } from './SearchAndFilter';
import { StatsOverview } from './StatsOverview';
import { Button } from './ui/button';
import { ThemeProvider } from './ThemeProvider';
import { ThemeToggle } from './ThemeToggle';
import type { VideoData } from '../types/video';
import { searchVideos, filterVideos } from '../utils/search';
interface ApiResponse {
videos?: VideoData[];
error?: string;
message?: string;
}
export default function VideoClassifierApp({ videos }: { videos: VideoData[] }) {
const [searchTerm, setSearchTerm] = useState('');
const [filters, setFilters] = useState({
classification: 'all',
language: 'all',
playlist_name: 'all'
});
// Filter and search data
const filteredData = useMemo(() => {
let result = videos;
// Apply filters
result = filterVideos(result, filters);
// Apply search
result = searchVideos(result, searchTerm);
return result;
}, [videos, searchTerm, filters]);
if (!videos) {
return (
<div className="min-h-screen flex items-center justify-center p-6">
<div className="max-w-md text-center space-y-4">
<div className="text-6xl">📁</div>
<h1 className="text-2xl font-bold text-destructive">CSV File Not Found</h1>
<p className="text-muted-foreground">
It seems the <code className="bg-background px-1 rounded">video_classifications.csv</code> file is missing or not properly configured.
</p>
<div className="bg-muted p-4 rounded-lg text-sm text-left">
<p className="font-medium mb-2">To fix this:</p>
<ol className="list-decimal list-inside space-y-1 text-muted-foreground">
<li>Make sure <code className="bg-background px-1 rounded">video_classifications.csv</code> exists in your project root</li>
<li>Run your YouTube classifier script to generate the CSV</li>
<li>Refresh this page</li>
</ol>
</div>
</div>
</div>
);
}
return (
<ThemeProvider defaultTheme="system" storageKey="video-classifier-theme">
<div className="min-h-screen bg-background">
<div className="container mx-auto px-6 py-8 space-y-8">
{/* Header */}
<div className="text-center space-y-4">
<div className="flex items-center justify-center gap-4">
<h1 className="text-4xl font-bold bg-gradient-to-r from-primary to-primary/60 bg-clip-text text-transparent">
YouTube Video Classifier
</h1>
<ThemeToggle />
</div>
<p className="text-muted-foreground max-w-2xl mx-auto">
AI-powered video classification and management platform. Search, filter, and organize your YouTube video collection with intelligent categorization.
</p>
</div>
{/* Stats Overview */}
{videos.length > 0 && (
<StatsOverview data={videos} />
)}
{/* Search and Filter */}
{videos.length > 0 && (
<SearchAndFilter
data={videos}
searchTerm={searchTerm}
onSearchChange={setSearchTerm}
filters={filters}
onFilterChange={setFilters}
totalResults={videos.length}
filteredResults={filteredData.length}
/>
)}
{/* Results */}
{videos.length > 0 ? (
filteredData.length > 0 ? (
<VirtualTable data={filteredData} />
) : (
<div className="text-center py-12">
<div className="text-6xl mb-4">🔍</div>
<h3 className="text-lg font-medium mb-2">No results found</h3>
<p className="text-muted-foreground">
Try adjusting your search terms or filters
</p>
</div>
)
) : (
<div className="text-center py-12">
<div className="text-6xl mb-4">📹</div>
<h3 className="text-lg font-medium mb-2">No videos found</h3>
<p className="text-muted-foreground">
Your CSV file appears to be empty
</p>
</div>
)}
{/* Footer */}
<div className="text-center text-sm text-muted-foreground border-t pt-8">
<p>
{videos.length} videos indexed
</p>
<Button
variant="ghost"
size="sm"
onClick={() => window.location.reload}
className="mt-2"
>
Refresh Data
</Button>
</div>
</div>
</div>
</ThemeProvider>
);
}

View File

@@ -0,0 +1,172 @@
import React, { useRef } from 'react';
import { useVirtualizer } from '@tanstack/react-virtual';
import type { VideoData } from '../types/video';
import { formatDuration, formatDate } from '../utils/csvParser';
import { Badge } from './ui/badge';
import { Button } from './ui/button';
import { Tooltip,TooltipContent, TooltipTrigger } from './ui/tooltip';
interface VirtualTableProps {
data: VideoData[];
}
export function VirtualTable({ data }: VirtualTableProps) {
const parentRef = useRef<HTMLDivElement>(null);
console.log(data);
const rowVirtualizer = useVirtualizer({
count: data.length,
getScrollElement: () => parentRef.current,
estimateSize: () => 120,
overscan: 5,
});
const virtualItems = rowVirtualizer.getVirtualItems();
return (
<div className="border rounded-lg overflow-hidden">
{/* Table Header */}
<div className="bg-muted/50 border-b">
<div className="grid grid-cols-12 gap-3 px-4 py-3 text-sm font-medium text-muted-foreground">
<div className="col-span-3 flex items-center">Video</div>
<div className="col-span-2 flex items-center">Channel</div>
<div className="col-span-2 flex items-center">Classification</div>
<div className="col-span-1 flex items-center">Language</div>
<div className="col-span-1 flex items-center">Duration</div>
<div className="col-span-2 flex items-center">Date</div>
<div className="col-span-1 flex items-center">Actions</div>
</div>
</div>
{/* Virtual Container */}
<div
ref={parentRef}
className="h-[600px] overflow-auto"
style={{
contain: 'strict',
}}
>
<div
style={{
height: `${rowVirtualizer.getTotalSize()}px`,
width: '100%',
position: 'relative',
}}
>
{virtualItems.map((virtualItem) => {
const video = data[virtualItem.index];
console.log(data, virtualItem, video);
if (!video) return null;
return (
<div
key={virtualItem.key}
data-index={virtualItem.index}
style={{
position: 'absolute',
top: 0,
left: 0,
width: '100%',
transform: `translateY(${virtualItem.start}px)`,
}}
>
<div className="border-b hover:bg-muted/50 transition-colors">
<div className="grid grid-cols-12 gap-3 px-4 py-3 items-center min-h-[80px]">
{/* Video Info */}
<div className="col-span-3 space-y-2 min-w-0">
<Tooltip>
<TooltipContent>
{video.video_title}
</TooltipContent>
<TooltipTrigger>
<h3 className="text-start font-medium text-sm leading-tight truncate sm:w-[150px] lg:w-[200px] xl:w-[250px] 2xl:w-[300px]">
{video.video_title}
</h3>
</TooltipTrigger>
</Tooltip>
<div className="flex flex-wrap gap-1">
{video.detailed_subtags.split(',').slice(0, 2).map((tag, i) => (
<Badge variant="secondary" className="text-xs max-w-20 truncate">
{tag.trim()}
</Badge>
))}
{video.detailed_subtags.split(',').length > 2 && (
<Tooltip>
<TooltipContent>
{video.detailed_subtags.split(',').slice(2).join(', ')}
</TooltipContent>
<TooltipTrigger>
<Badge variant="outline" className="text-xs">
+{video.detailed_subtags.split(',').length - 2}
</Badge>
</TooltipTrigger>
</Tooltip>
)}
</div>
</div>
{/* Channel */}
<div className="col-span-2 min-w-0">
<Tooltip content={video.channel_name}>
<div className="text-sm font-medium truncate">{video.channel_name}</div>
</Tooltip>
<Tooltip content={video.playlist_name}>
<div className="text-xs text-muted-foreground truncate">{video.playlist_name}</div>
</Tooltip>
</div>
{/* Classification */}
<div className="col-span-2 min-w-0">
<Badge variant="default" className="text-xs truncate max-w-full">
{video.classification}
</Badge>
</div>
{/* Language */}
<div className="col-span-1 min-w-0">
<Badge variant="outline" className="text-xs truncate max-w-full">
{video.language}
</Badge>
</div>
{/* Duration */}
<div className="col-span-1 text-sm font-mono">
{formatDuration(video.video_length_seconds)}
</div>
{/* Date */}
<div className="col-span-2 space-y-1 min-w-0">
<div className="text-sm">{formatDate(video.video_date)}</div>
<div className="text-xs text-muted-foreground truncate">
Added: {formatDate(video.timestamp)}
</div>
</div>
{/* Actions */}
<div className="col-span-1">
<Button
size="sm"
variant="outline"
onClick={() => window.open(video.video_url, '_blank')}
className="text-xs"
>
Watch
</Button>
</div>
</div>
</div>
</div>
);
})}
</div>
</div>
{/* Footer */}
<div className="bg-muted/50 border-t px-4 py-3">
<div className="text-sm text-muted-foreground text-center">
Showing {data.length} videos
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,36 @@
import * as React from "react"
import { cva, type VariantProps } from "class-variance-authority"
import { cn } from "@/lib/utils"
const badgeVariants = cva(
"inline-flex items-center rounded-md border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2",
{
variants: {
variant: {
default:
"border-transparent bg-primary text-primary-foreground shadow hover:bg-primary/80",
secondary:
"border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
destructive:
"border-transparent bg-destructive text-destructive-foreground shadow hover:bg-destructive/80",
outline: "text-foreground",
},
},
defaultVariants: {
variant: "default",
},
}
)
export interface BadgeProps
extends React.HTMLAttributes<HTMLDivElement>,
VariantProps<typeof badgeVariants> {}
function Badge({ className, variant, ...props }: BadgeProps) {
return (
<div className={cn(badgeVariants({ variant }), className)} {...props} />
)
}
export { Badge, badgeVariants }

View File

@@ -0,0 +1,59 @@
import * as React from "react"
import { Slot } from "@radix-ui/react-slot"
import { cva, type VariantProps } from "class-variance-authority"
import { cn } from "@/lib/utils"
const buttonVariants = cva(
"inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
{
variants: {
variant: {
default:
"bg-primary text-primary-foreground shadow-xs hover:bg-primary/90",
destructive:
"bg-destructive text-white shadow-xs hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
outline:
"border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
secondary:
"bg-secondary text-secondary-foreground shadow-xs hover:bg-secondary/80",
ghost:
"hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
link: "text-primary underline-offset-4 hover:underline",
},
size: {
default: "h-9 px-4 py-2 has-[>svg]:px-3",
sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
icon: "size-9",
},
},
defaultVariants: {
variant: "default",
size: "default",
},
}
)
function Button({
className,
variant,
size,
asChild = false,
...props
}: React.ComponentProps<"button"> &
VariantProps<typeof buttonVariants> & {
asChild?: boolean
}) {
const Comp = asChild ? Slot : "button"
return (
<Comp
data-slot="button"
className={cn(buttonVariants({ variant, size, className }))}
{...props}
/>
)
}
export { Button, buttonVariants }

View File

@@ -0,0 +1,25 @@
import * as React from "react"
import { cn } from "@/lib/utils"
export interface InputProps
extends React.InputHTMLAttributes<HTMLInputElement> {}
const Input = React.forwardRef<HTMLInputElement, InputProps>(
({ className, type, ...props }, ref) => {
return (
<input
type={type}
className={cn(
"flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50",
className
)}
ref={ref}
{...props}
/>
)
}
)
Input.displayName = "Input"
export { Input }

View File

@@ -0,0 +1,23 @@
import * as React from "react"
import { cn } from "@/lib/utils"
const Select = React.forwardRef<
HTMLSelectElement,
React.SelectHTMLAttributes<HTMLSelectElement>
>(({ className, children, ...props }, ref) => {
return (
<select
className={cn(
"flex h-9 w-full rounded-md border border-input bg-background px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50",
className
)}
ref={ref}
{...props}
>
{children}
</select>
)
})
Select.displayName = "Select"
export { Select }

View File

@@ -0,0 +1,103 @@
import * as React from "react"
import { Tooltip as BaseTooltip } from "@base-ui-components/react/tooltip"
import { cn } from "@/lib/utils"
function TooltipProvider({
delay = 0,
closeDelay = 0,
...props
}: React.ComponentProps<typeof BaseTooltip.Provider>) {
return (
<BaseTooltip.Provider
data-slot="tooltip-provider"
delay={delay}
closeDelay={closeDelay}
{...props}
/>
)
}
function Tooltip({ ...props }: React.ComponentProps<typeof BaseTooltip.Root>) {
return (
<TooltipProvider>
<BaseTooltip.Root data-slot="tooltip" {...props} />
</TooltipProvider>
)
}
function TooltipTrigger({
...props
}: React.ComponentProps<typeof BaseTooltip.Trigger>) {
return <BaseTooltip.Trigger data-slot="tooltip-trigger" {...props} />
}
function TooltipPortal({
...props
}: React.ComponentProps<typeof BaseTooltip.Portal>) {
return <BaseTooltip.Portal data-slot="tooltip-portal" {...props} />
}
function TooltipPositioner({
...props
}: React.ComponentProps<typeof BaseTooltip.Positioner>) {
return <BaseTooltip.Positioner data-slot="tooltip-positioner" {...props} />
}
function TooltipArrow({
...props
}: React.ComponentProps<typeof BaseTooltip.Arrow>) {
return <BaseTooltip.Arrow data-slot="tooltip-arrow" {...props} />
}
function TooltipContent({
className,
align = "center",
sideOffset = 8,
side = "top",
children,
...props
}: React.ComponentProps<typeof BaseTooltip.Popup> & {
align?: BaseTooltip.Positioner.Props["align"]
side?: BaseTooltip.Positioner.Props["side"]
sideOffset?: BaseTooltip.Positioner.Props["sideOffset"]
}) {
return (
<TooltipPortal>
<TooltipPositioner sideOffset={sideOffset} align={align} side={side}>
<BaseTooltip.Popup
data-slot="tooltip-content"
className={cn(
"bg-popover text-popover-foreground outline-border z-50 w-fit origin-[var(--transform-origin)] rounded-md px-3 py-1.5 text-xs text-balance shadow-sm outline -outline-offset-1 transition-[transform,scale,opacity] data-[ending-style]:scale-95 data-[ending-style]:opacity-0 data-[starting-style]:scale-95 data-[starting-style]:opacity-0",
className
)}
{...props}
>
{children}
<TooltipArrow className="data-[side=bottom]:top-[-8px] data-[side=left]:right-[-13px] data-[side=left]:rotate-90 data-[side=right]:left-[-13px] data-[side=right]:-rotate-90 data-[side=top]:bottom-[-8px] data-[side=top]:rotate-180">
<svg width="20" height="10" viewBox="0 0 20 10" fill="none">
<path
d="M9.66437 2.60207L4.80758 6.97318C4.07308 7.63423 3.11989 8 2.13172 8H0V9H20V8H18.5349C17.5468 8 16.5936 7.63423 15.8591 6.97318L11.0023 2.60207C10.622 2.2598 10.0447 2.25979 9.66437 2.60207Z"
className="fill-popover"
/>
<path
d="M10.3333 3.34539L5.47654 7.71648C4.55842 8.54279 3.36693 9 2.13172 9H0V8H2.13172C3.11989 8 4.07308 7.63423 4.80758 6.97318L9.66437 2.60207C10.0447 2.25979 10.622 2.2598 11.0023 2.60207L15.8591 6.97318C16.5936 7.63423 17.5468 8 18.5349 8H20V9H18.5349C17.2998 9 16.1083 8.54278 15.1901 7.71648L10.3333 3.34539Z"
className="fill-border"
/>
</svg>
</TooltipArrow>
</BaseTooltip.Popup>
</TooltipPositioner>
</TooltipPortal>
)
}
export {
Tooltip,
TooltipTrigger,
TooltipContent,
TooltipPortal,
TooltipPositioner,
TooltipArrow,
TooltipProvider,
}

2
web/src/env.d.ts vendored Normal file
View File

@@ -0,0 +1,2 @@
/// <reference path="../.astro/types.d.ts" />
/// <reference types="astro/client" />

View File

@@ -0,0 +1,17 @@
---
import { Button } from '@/components/ui/button';
import '../styles/global.css';
const { content } = Astro.props;
---
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width" />
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<title>{content.title}</title>
</head>
<body>
<slot />
</body>
</html>

6
web/src/lib/utils.ts Normal file
View File

@@ -0,0 +1,6 @@
import { clsx, type ClassValue } from "clsx"
import { twMerge } from "tailwind-merge"
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
}

View File

@@ -0,0 +1,51 @@
import type { APIRoute } from 'astro';
import { readFileSync, existsSync } from 'fs';
import { join } from 'path';
import { parseCSV } from '../../utils/csvParser';
export const GET: APIRoute = async () => {
try {
// Look for CSV file in the project root (outside web folder)
const csvPath = join(process.cwd(), '..', 'video_classifications.csv');
if (!existsSync(csvPath)) {
return new Response(
JSON.stringify({
error: 'CSV file not found',
message: 'video_classifications.csv not found in project root',
path: csvPath
}),
{
status: 404,
headers: {
'Content-Type': 'application/json',
},
}
);
}
const csvContent = readFileSync(csvPath, 'utf-8');
const videos = parseCSV(csvContent);
return new Response(JSON.stringify({ videos }), {
status: 200,
headers: {
'Content-Type': 'application/json',
},
});
} catch (error) {
console.error('Error reading CSV file:', error);
return new Response(
JSON.stringify({
error: 'Failed to read CSV file',
message: error instanceof Error ? error.message : 'Unknown error'
}),
{
status: 500,
headers: {
'Content-Type': 'application/json',
},
}
);
}
};

27
web/src/pages/index.astro Normal file
View File

@@ -0,0 +1,27 @@
---
import VideoClassifierApp from '@/components/VideoClassifierApp';
import '../styles/global.css';
import { join } from 'path';
import { readFileSync } from 'fs';
import { parseCSV } from '@/utils/csvParser';
const csvPath = join(process.cwd(), '..', 'video_classifications.csv')
const csvContent = readFileSync(csvPath, 'utf-8');
const videos = parseCSV(csvContent);
---
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<meta name="generator" content={Astro.generator} />
<title>YouTube Video Classifier</title>
<meta name="description" content="AI-powered YouTube video classification and management platform" />
</head>
<body class="min-h-screen bg-background">
<VideoClassifierApp client:only videos={videos} />
</body>
</html>

View File

@@ -0,0 +1,16 @@
---
title: 'Markdown + Tailwind'
layout: ../layouts/main.astro
---
<div class="grid place-items-center h-screen content-center">
<div class="py-2 px-4 bg-purple-500 text-white font-semibold rounded-lg shadow-md">
Tailwind classes also work in Markdown!
</div>
<a
href="/"
class="p-4 underline hover:text-purple-500 transition-colors ease-in-out duration-200"
>
Go home
</a>
</div>

194
web/src/styles/global.css Normal file
View File

@@ -0,0 +1,194 @@
@import 'tailwindcss';
@import "tw-animate-css";
@custom-variant dark (&:is(.dark *));
@theme inline {
--radius-sm: calc(var(--radius) - 4px);
--radius-md: calc(var(--radius) - 2px);
--radius-lg: var(--radius);
--radius-xl: calc(var(--radius) + 4px);
--color-background: var(--background);
--color-foreground: var(--foreground);
--color-card: var(--card);
--color-card-foreground: var(--card-foreground);
--color-popover: var(--popover);
--color-popover-foreground: var(--popover-foreground);
--color-primary: var(--primary);
--color-primary-foreground: var(--primary-foreground);
--color-secondary: var(--secondary);
--color-secondary-foreground: var(--secondary-foreground);
--color-muted: var(--muted);
--color-muted-foreground: var(--muted-foreground);
--color-accent: var(--accent);
--color-accent-foreground: var(--accent-foreground);
--color-destructive: var(--destructive);
--color-border: var(--border);
--color-input: var(--input);
--color-ring: var(--ring);
--color-chart-1: var(--chart-1);
--color-chart-2: var(--chart-2);
--color-chart-3: var(--chart-3);
--color-chart-4: var(--chart-4);
--color-chart-5: var(--chart-5);
--color-sidebar: var(--sidebar);
--color-sidebar-foreground: var(--sidebar-foreground);
--color-sidebar-primary: var(--sidebar-primary);
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
--color-sidebar-accent: var(--sidebar-accent);
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
--color-sidebar-border: var(--sidebar-border);
--color-sidebar-ring: var(--sidebar-ring);
--font-sans: Poppins, sans-serif;
--font-mono: Fira Code, monospace;
--font-serif: Lora, serif;
--radius: 0.4rem;
--tracking-tighter: calc(var(--tracking-normal) - 0.05em);
--tracking-tight: calc(var(--tracking-normal) - 0.025em);
--tracking-wide: calc(var(--tracking-normal) + 0.025em);
--tracking-wider: calc(var(--tracking-normal) + 0.05em);
--tracking-widest: calc(var(--tracking-normal) + 0.1em);
--tracking-normal: var(--tracking-normal);
--shadow-2xl: var(--shadow-2xl);
--shadow-xl: var(--shadow-xl);
--shadow-lg: var(--shadow-lg);
--shadow-md: var(--shadow-md);
--shadow: var(--shadow);
--shadow-sm: var(--shadow-sm);
--shadow-xs: var(--shadow-xs);
--shadow-2xs: var(--shadow-2xs);
--spacing: var(--spacing);
--letter-spacing: var(--letter-spacing);
--shadow-offset-y: var(--shadow-offset-y);
--shadow-offset-x: var(--shadow-offset-x);
--shadow-spread: var(--shadow-spread);
--shadow-blur: var(--shadow-blur);
--shadow-opacity: var(--shadow-opacity);
--color-shadow-color: var(--shadow-color);
--color-destructive-foreground: var(--destructive-foreground);
}
:root {
--radius: 0.625rem;
--background: oklch(1 0 0);
--foreground: oklch(0.145 0 0);
--card: oklch(1 0 0);
--card-foreground: oklch(0.145 0 0);
--popover: oklch(1 0 0);
--popover-foreground: oklch(0.145 0 0);
--primary: oklch(0.205 0 0);
--primary-foreground: oklch(0.985 0 0);
--secondary: oklch(0.97 0 0);
--secondary-foreground: oklch(0.205 0 0);
--muted: oklch(0.97 0 0);
--muted-foreground: oklch(0.556 0 0);
--accent: oklch(0.97 0 0);
--accent-foreground: oklch(0.205 0 0);
--destructive: oklch(0.577 0.245 27.325);
--border: oklch(0.922 0 0);
--input: oklch(0.922 0 0);
--ring: oklch(0.708 0 0);
--chart-1: oklch(0.646 0.222 41.116);
--chart-2: oklch(0.6 0.118 184.704);
--chart-3: oklch(0.398 0.07 227.392);
--chart-4: oklch(0.828 0.189 84.429);
--chart-5: oklch(0.769 0.188 70.08);
--sidebar: oklch(0.985 0 0);
--sidebar-foreground: oklch(0.145 0 0);
--sidebar-primary: oklch(0.205 0 0);
--sidebar-primary-foreground: oklch(0.985 0 0);
--sidebar-accent: oklch(0.97 0 0);
--sidebar-accent-foreground: oklch(0.205 0 0);
--sidebar-border: oklch(0.922 0 0);
--sidebar-ring: oklch(0.708 0 0);
--destructive-foreground: oklch(1.0000 0 0);
--font-sans: Poppins, sans-serif;
--font-serif: Lora, serif;
--font-mono: Fira Code, monospace;
--shadow-color: hsl(325.78 58.18% 56.86% / 0.5);
--shadow-opacity: 1.0;
--shadow-blur: 0px;
--shadow-spread: 0px;
--shadow-offset-x: 3px;
--shadow-offset-y: 3px;
--letter-spacing: 0em;
--spacing: 0.25rem;
--shadow-2xs: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 0.50);
--shadow-xs: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 0.50);
--shadow-sm: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 1.00), 3px 1px 2px -1px hsl(325.7800 58.1800% 56.8600% / 1.00);
--shadow: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 1.00), 3px 1px 2px -1px hsl(325.7800 58.1800% 56.8600% / 1.00);
--shadow-md: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 1.00), 3px 2px 4px -1px hsl(325.7800 58.1800% 56.8600% / 1.00);
--shadow-lg: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 1.00), 3px 4px 6px -1px hsl(325.7800 58.1800% 56.8600% / 1.00);
--shadow-xl: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 1.00), 3px 8px 10px -1px hsl(325.7800 58.1800% 56.8600% / 1.00);
--shadow-2xl: 3px 3px 0px 0px hsl(325.7800 58.1800% 56.8600% / 2.50);
--tracking-normal: 0em;
}
.dark {
--background: oklch(0.145 0 0);
--foreground: oklch(0.985 0 0);
--card: oklch(0.205 0 0);
--card-foreground: oklch(0.985 0 0);
--popover: oklch(0.205 0 0);
--popover-foreground: oklch(0.985 0 0);
--primary: oklch(0.922 0 0);
--primary-foreground: oklch(0.205 0 0);
--secondary: oklch(0.269 0 0);
--secondary-foreground: oklch(0.985 0 0);
--muted: oklch(0.269 0 0);
--muted-foreground: oklch(0.708 0 0);
--accent: oklch(0.269 0 0);
--accent-foreground: oklch(0.985 0 0);
--destructive: oklch(0.704 0.191 22.216);
--border: oklch(1 0 0 / 10%);
--input: oklch(1 0 0 / 15%);
--ring: oklch(0.556 0 0);
--chart-1: oklch(0.488 0.243 264.376);
--chart-2: oklch(0.696 0.17 162.48);
--chart-3: oklch(0.769 0.188 70.08);
--chart-4: oklch(0.627 0.265 303.9);
--chart-5: oklch(0.645 0.246 16.439);
--sidebar: oklch(0.205 0 0);
--sidebar-foreground: oklch(0.985 0 0);
--sidebar-primary: oklch(0.488 0.243 264.376);
--sidebar-primary-foreground: oklch(0.985 0 0);
--sidebar-accent: oklch(0.269 0 0);
--sidebar-accent-foreground: oklch(0.985 0 0);
--sidebar-border: oklch(1 0 0 / 10%);
--sidebar-ring: oklch(0.556 0 0);
--destructive-foreground: oklch(0.2497 0.0305 234.1628);
--radius: 0.4rem;
--font-sans: Poppins, sans-serif;
--font-serif: Lora, serif;
--font-mono: Fira Code, monospace;
--shadow-color: #324859;
--shadow-opacity: 1.0;
--shadow-blur: 0px;
--shadow-spread: 0px;
--shadow-offset-x: 3px;
--shadow-offset-y: 3px;
--letter-spacing: 0em;
--spacing: 0.25rem;
--shadow-2xs: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 0.50);
--shadow-xs: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 0.50);
--shadow-sm: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 1.00), 3px 1px 2px -1px hsl(206.1538 28.0576% 27.2549% / 1.00);
--shadow: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 1.00), 3px 1px 2px -1px hsl(206.1538 28.0576% 27.2549% / 1.00);
--shadow-md: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 1.00), 3px 2px 4px -1px hsl(206.1538 28.0576% 27.2549% / 1.00);
--shadow-lg: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 1.00), 3px 4px 6px -1px hsl(206.1538 28.0576% 27.2549% / 1.00);
--shadow-xl: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 1.00), 3px 8px 10px -1px hsl(206.1538 28.0576% 27.2549% / 1.00);
--shadow-2xl: 3px 3px 0px 0px hsl(206.1538 28.0576% 27.2549% / 2.50);
}
@layer base {
* {
@apply border-border outline-ring/50;
}
body {
@apply bg-background text-foreground;
letter-spacing: var(--tracking-normal);
}
}
button {
cursor: pointer;
}

22
web/src/types/video.ts Normal file
View File

@@ -0,0 +1,22 @@
export interface VideoData {
video_title: string;
video_url: string;
thumbnail_url: string;
classification: string;
language: string;
channel_name: string;
channel_link: string;
video_length_seconds: number;
video_date: string;
detailed_subtags: string;
playlist_name: string;
playlist_link: string;
image_data: string;
timestamp: string;
}
export interface FilterOptions {
classification: string[];
language: string[];
playlist_name: string[];
}

View File

@@ -0,0 +1,41 @@
import Papa from 'papaparse';
import type { VideoData } from '../types/video';
export function parseCSV(csvText: string): VideoData[] {
const result = Papa.parse<VideoData>(csvText, {
header: true,
skipEmptyLines: true,
transform: (value, field) => {
// Convert numeric fields
if (field === 'video_length_seconds') {
return parseInt(value) || 0;
}
return value;
}
});
if (result.errors.length > 0) {
console.warn('CSV parsing errors:', result.errors);
}
return result.data;
}
export function formatDuration(seconds: number): string {
const minutes = Math.floor(seconds / 60);
const remainingSeconds = seconds % 60;
return `${minutes}:${remainingSeconds.toString().padStart(2, '0')}`;
}
export function formatDate(dateString: string): string {
try {
const date = new Date(dateString);
return date.toLocaleDateString('en-US', {
year: 'numeric',
month: 'short',
day: 'numeric'
});
} catch {
return dateString;
}
}

57
web/src/utils/search.ts Normal file
View File

@@ -0,0 +1,57 @@
import type { VideoData } from '../types/video';
export function searchVideos(videos: VideoData[], searchTerm: string): VideoData[] {
if (!searchTerm.trim()) return videos;
const term = searchTerm.toLowerCase();
return videos.filter(video => {
const searchableFields = [
video.video_title,
video.channel_name,
video.detailed_subtags,
video.classification,
video.language,
video.playlist_name,
video.video_length_seconds.toString(),
formatDateForSearch(video.video_date),
formatDateForSearch(video.timestamp)
];
return searchableFields.some(field =>
field && field.toLowerCase().includes(term)
);
});
}
function formatDateForSearch(dateString: string): string {
try {
const date = new Date(dateString);
return date.toLocaleDateString('en-US');
} catch {
return dateString;
}
}
export function filterVideos(
videos: VideoData[],
filters: { classification?: string; language?: string; playlist_name?: string }
): VideoData[] {
return videos.filter(video => {
if (filters.classification && filters.classification !== 'all' && video.classification !== filters.classification) {
return false;
}
if (filters.language && filters.language !== 'all' && video.language !== filters.language) {
return false;
}
if (filters.playlist_name && filters.playlist_name !== 'all' && video.playlist_name !== filters.playlist_name) {
return false;
}
return true;
});
}
export function getUniqueValues(videos: VideoData[], field: keyof VideoData): string[] {
const values = videos.map(video => video[field] as string).filter(Boolean);
return [...new Set(values)].sort();
}

20
web/tsconfig.json Normal file
View File

@@ -0,0 +1,20 @@
{
"extends": "astro/tsconfigs/strict",
"include": [
".astro/types.d.ts",
"**/*"
],
"exclude": [
"dist"
],
"compilerOptions": {
"jsx": "react-jsx",
"jsxImportSource": "react",
"baseUrl": ".",
"paths": {
"@/*": [
"./src/*"
]
}
}
}