Add database schema initialization

This commit is contained in:
2025-07-14 00:18:50 -05:00
parent 920736b6f7
commit 6f000315e5

View File

@ -0,0 +1,154 @@
-- Voice Translator Database Schema
-- Initialize database tables and indexes
-- Enable UUID extension
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Connections table - tracks Discord voice channel sessions
CREATE TABLE connections (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE,
guild_id BIGINT NOT NULL,
guild_name VARCHAR(255),
channel_id BIGINT NOT NULL,
channel_name VARCHAR(255),
inviter_id BIGINT NOT NULL,
inviter_name VARCHAR(255),
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
ended_at TIMESTAMP WITH TIME ZONE,
participant_count INTEGER DEFAULT 0,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Recordings table - individual user voice recordings
CREATE TABLE recordings (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE,
connection_id INTEGER REFERENCES connections(id) ON DELETE CASCADE,
speaker_id BIGINT NOT NULL,
speaker_nickname VARCHAR(255),
speaker_username VARCHAR(255),
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
ended_at TIMESTAMP WITH TIME ZONE,
duration_seconds DECIMAL(10,3),
file_name VARCHAR(500),
file_path VARCHAR(1000),
file_size_bytes BIGINT,
status VARCHAR(50) DEFAULT 'recording', -- recording, queued, processing, completed, failed
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Transcriptions table - speech-to-text results
CREATE TABLE transcriptions (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE,
recording_id INTEGER REFERENCES recordings(id) ON DELETE CASCADE,
detected_language VARCHAR(10),
language_confidence DECIMAL(5,4),
transcription_text TEXT,
word_count INTEGER,
processing_time_ms INTEGER,
whisper_model VARCHAR(50),
status VARCHAR(50) DEFAULT 'pending', -- pending, processing, completed, failed
error_message TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Translations table - multi-language translations
CREATE TABLE translations (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE,
transcription_id INTEGER REFERENCES transcriptions(id) ON DELETE CASCADE,
target_language VARCHAR(10) NOT NULL,
translated_text TEXT,
translation_service VARCHAR(50), -- google, deepl, azure
confidence_score DECIMAL(5,4),
processing_time_ms INTEGER,
status VARCHAR(50) DEFAULT 'pending', -- pending, processing, completed, failed
error_message TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Processing metrics table - performance tracking
CREATE TABLE processing_metrics (
id SERIAL PRIMARY KEY,
service_name VARCHAR(50) NOT NULL,
operation VARCHAR(100) NOT NULL,
duration_ms INTEGER,
success BOOLEAN,
error_message TEXT,
metadata JSONB,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- User activity summary table - aggregated stats
CREATE TABLE user_activity (
id SERIAL PRIMARY KEY,
user_id BIGINT NOT NULL,
guild_id BIGINT NOT NULL,
date DATE NOT NULL,
total_speaking_time_seconds INTEGER DEFAULT 0,
total_words_spoken INTEGER DEFAULT 0,
total_recordings INTEGER DEFAULT 0,
most_used_language VARCHAR(10),
languages_detected JSONB DEFAULT '[]',
session_count INTEGER DEFAULT 0,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, guild_id, date)
);
-- Indexes for performance
CREATE INDEX idx_connections_guild_channel ON connections(guild_id, channel_id);
CREATE INDEX idx_connections_started_at ON connections(started_at);
CREATE INDEX idx_recordings_connection_id ON recordings(connection_id);
CREATE INDEX idx_recordings_speaker_id ON recordings(speaker_id);
CREATE INDEX idx_recordings_status ON recordings(status);
CREATE INDEX idx_recordings_started_at ON recordings(started_at);
CREATE INDEX idx_transcriptions_recording_id ON transcriptions(recording_id);
CREATE INDEX idx_transcriptions_language ON transcriptions(detected_language);
CREATE INDEX idx_transcriptions_status ON transcriptions(status);
CREATE INDEX idx_translations_transcription_id ON translations(transcription_id);
CREATE INDEX idx_translations_target_language ON translations(target_language);
CREATE INDEX idx_translations_status ON translations(status);
CREATE INDEX idx_metrics_service_operation ON processing_metrics(service_name, operation);
CREATE INDEX idx_metrics_created_at ON processing_metrics(created_at);
CREATE INDEX idx_user_activity_user_date ON user_activity(user_id, date);
CREATE INDEX idx_user_activity_guild_date ON user_activity(guild_id, date);
-- Updated timestamp triggers
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
CREATE TRIGGER update_connections_updated_at BEFORE UPDATE ON connections
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_recordings_updated_at BEFORE UPDATE ON recordings
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_transcriptions_updated_at BEFORE UPDATE ON transcriptions
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_translations_updated_at BEFORE UPDATE ON translations
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_user_activity_updated_at BEFORE UPDATE ON user_activity
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- Insert some initial data for testing
INSERT INTO connections (guild_id, guild_name, channel_id, channel_name, inviter_id, inviter_name)
VALUES (123456789, 'Test Guild', 987654321, 'General', 111222333, 'TestUser');