From 6f000315e538e1b10e796035898ea1456c78cb04 Mon Sep 17 00:00:00 2001 From: MAHaines Date: Mon, 14 Jul 2025 00:18:50 -0500 Subject: [PATCH] Add database schema initialization --- infrastructure/database/init.sql | 154 +++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 infrastructure/database/init.sql diff --git a/infrastructure/database/init.sql b/infrastructure/database/init.sql new file mode 100644 index 0000000..fd93e4a --- /dev/null +++ b/infrastructure/database/init.sql @@ -0,0 +1,154 @@ +-- Voice Translator Database Schema +-- Initialize database tables and indexes + +-- Enable UUID extension +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; + +-- Connections table - tracks Discord voice channel sessions +CREATE TABLE connections ( + id SERIAL PRIMARY KEY, + uuid UUID DEFAULT uuid_generate_v4() UNIQUE, + guild_id BIGINT NOT NULL, + guild_name VARCHAR(255), + channel_id BIGINT NOT NULL, + channel_name VARCHAR(255), + inviter_id BIGINT NOT NULL, + inviter_name VARCHAR(255), + started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + ended_at TIMESTAMP WITH TIME ZONE, + participant_count INTEGER DEFAULT 0, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Recordings table - individual user voice recordings +CREATE TABLE recordings ( + id SERIAL PRIMARY KEY, + uuid UUID DEFAULT uuid_generate_v4() UNIQUE, + connection_id INTEGER REFERENCES connections(id) ON DELETE CASCADE, + speaker_id BIGINT NOT NULL, + speaker_nickname VARCHAR(255), + speaker_username VARCHAR(255), + started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + ended_at TIMESTAMP WITH TIME ZONE, + duration_seconds DECIMAL(10,3), + file_name VARCHAR(500), + file_path VARCHAR(1000), + file_size_bytes BIGINT, + status VARCHAR(50) DEFAULT 'recording', -- recording, queued, processing, completed, failed + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Transcriptions table - speech-to-text results +CREATE TABLE transcriptions ( + id SERIAL PRIMARY KEY, + uuid UUID DEFAULT uuid_generate_v4() UNIQUE, + recording_id INTEGER REFERENCES recordings(id) ON DELETE CASCADE, + detected_language VARCHAR(10), + language_confidence DECIMAL(5,4), + transcription_text TEXT, + word_count INTEGER, + processing_time_ms INTEGER, + whisper_model VARCHAR(50), + status VARCHAR(50) DEFAULT 'pending', -- pending, processing, completed, failed + error_message TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Translations table - multi-language translations +CREATE TABLE translations ( + id SERIAL PRIMARY KEY, + uuid UUID DEFAULT uuid_generate_v4() UNIQUE, + transcription_id INTEGER REFERENCES transcriptions(id) ON DELETE CASCADE, + target_language VARCHAR(10) NOT NULL, + translated_text TEXT, + translation_service VARCHAR(50), -- google, deepl, azure + confidence_score DECIMAL(5,4), + processing_time_ms INTEGER, + status VARCHAR(50) DEFAULT 'pending', -- pending, processing, completed, failed + error_message TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Processing metrics table - performance tracking +CREATE TABLE processing_metrics ( + id SERIAL PRIMARY KEY, + service_name VARCHAR(50) NOT NULL, + operation VARCHAR(100) NOT NULL, + duration_ms INTEGER, + success BOOLEAN, + error_message TEXT, + metadata JSONB, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- User activity summary table - aggregated stats +CREATE TABLE user_activity ( + id SERIAL PRIMARY KEY, + user_id BIGINT NOT NULL, + guild_id BIGINT NOT NULL, + date DATE NOT NULL, + total_speaking_time_seconds INTEGER DEFAULT 0, + total_words_spoken INTEGER DEFAULT 0, + total_recordings INTEGER DEFAULT 0, + most_used_language VARCHAR(10), + languages_detected JSONB DEFAULT '[]', + session_count INTEGER DEFAULT 0, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, guild_id, date) +); + +-- Indexes for performance +CREATE INDEX idx_connections_guild_channel ON connections(guild_id, channel_id); +CREATE INDEX idx_connections_started_at ON connections(started_at); + +CREATE INDEX idx_recordings_connection_id ON recordings(connection_id); +CREATE INDEX idx_recordings_speaker_id ON recordings(speaker_id); +CREATE INDEX idx_recordings_status ON recordings(status); +CREATE INDEX idx_recordings_started_at ON recordings(started_at); + +CREATE INDEX idx_transcriptions_recording_id ON transcriptions(recording_id); +CREATE INDEX idx_transcriptions_language ON transcriptions(detected_language); +CREATE INDEX idx_transcriptions_status ON transcriptions(status); + +CREATE INDEX idx_translations_transcription_id ON translations(transcription_id); +CREATE INDEX idx_translations_target_language ON translations(target_language); +CREATE INDEX idx_translations_status ON translations(status); + +CREATE INDEX idx_metrics_service_operation ON processing_metrics(service_name, operation); +CREATE INDEX idx_metrics_created_at ON processing_metrics(created_at); + +CREATE INDEX idx_user_activity_user_date ON user_activity(user_id, date); +CREATE INDEX idx_user_activity_guild_date ON user_activity(guild_id, date); + +-- Updated timestamp triggers +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ language 'plpgsql'; + +CREATE TRIGGER update_connections_updated_at BEFORE UPDATE ON connections + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_recordings_updated_at BEFORE UPDATE ON recordings + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_transcriptions_updated_at BEFORE UPDATE ON transcriptions + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_translations_updated_at BEFORE UPDATE ON translations + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_user_activity_updated_at BEFORE UPDATE ON user_activity + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +-- Insert some initial data for testing +INSERT INTO connections (guild_id, guild_name, channel_id, channel_name, inviter_id, inviter_name) +VALUES (123456789, 'Test Guild', 987654321, 'General', 111222333, 'TestUser');