import { useEffect, useMemo, useRef, useState } from 'react';

interface AudioStreamToText {
    onTranscriptChange?: (text: string) => void;
    onStop?: () => void;
}

const useAudioStreamToText = ({ onTranscriptChange, onStop }: AudioStreamToText) => {
    const [isRecognizing, setIsRecognizing] = useState(false);
    const recognitionRef = useRef<any>(null);
    const fullTranscriptRef = useRef<string>("");

    const handleRecognitionResult = (event: any) => {
        let fullTranscript = '';

        for (let i = 0; i < event.results.length; i++) {
            fullTranscript += event.results[i][0].transcript + ' ';
        }

        // Remove trailing space
        fullTranscript = fullTranscript.trim();
        fullTranscriptRef.current = fullTranscript;
        onTranscriptChange && onTranscriptChange(fullTranscript);
    }

    const createRecognitionInstance = () => {
        const recognitionInstance = new (window as any).webkitSpeechRecognition();
        recognitionInstance.continuous = true;
        recognitionInstance.interimResults = true;
        recognitionInstance.onstart = () => setIsRecognizing(true);
        recognitionInstance.onend = () => setIsRecognizing(false);
        recognitionInstance.addEventListener('result', handleRecognitionResult);
        recognitionInstance.addEventListener('audiostart', () => setIsRecognizing(true));
        recognitionInstance.addEventListener('audioend', () => setIsRecognizing(false));
        return recognitionInstance;
    };

    const recognition = useMemo(() => createRecognitionInstance(), []);

    useEffect(() => {
        recognitionRef.current = recognition;

        return () => {
            recognition.removeEventListener('result', handleRecognitionResult);
            recognition.removeEventListener('audiostart', () => setIsRecognizing(true));
            recognition.removeEventListener('audioend', () => setIsRecognizing(false));
            recognition.stop();
        };
    }, []);

    useEffect(() => {
        recognitionRef.current = createRecognitionInstance();

        return () => {
            resetRecognition();
        };
    }, []);

    const resetRecognition = () => {
        if (recognitionRef.current) {
            recognitionRef.current.removeEventListener('result', handleRecognitionResult);
            recognitionRef.current.stop();
        }
        recognitionRef.current = createRecognitionInstance();
    };

    const startRecognition = () => {
        resetRecognition();
        fullTranscriptRef.current = "";
        recognitionRef.current.start();
    };

    const stopRecognition = (skipCallback?: boolean) => {
        recognitionRef.current.stop();
        if (!skipCallback) {
            onStop && onStop();
        }
        setIsRecognizing(false);
        fullTranscriptRef.current = "";
    };

    const toggleRecognition = () => {
        if (isRecognizing) {
            stopRecognition();
        } else {
            startRecognition();
        }
    };

    const getTranscribe = () => {
        return fullTranscriptRef.current;
    }

    return {
        isRecognizing,
        startRecognition,
        stopRecognition,
        toggleRecognition,
        getTranscribe,
    };
};

export default useAudioStreamToText;
