// taken from https://github.com/kdavis-mozilla/vad.js/tree/master

interface IVoiceAudioDetectorOptions {
    fftSize: number;
    bufferLen: number;
    voiceStopCallback: () => void;
    voiceStartCallback: () => void;
    smoothingTimeConstant: number;
    energy_offset: number, // The initial offset.
    energy_threshold_ratio_pos: number, // Signal must be twice the offset
    energy_threshold_ratio_neg: number, // Signal must be half the offset
    energy_integration: number, // Size of integration change compared to the signal per second.
    filter: Array<{f: number, v: number}>,
    source: MediaStreamAudioSourceNode | null,
    context: BaseAudioContext | null,
}

export class VoiceAudioDetector {
    DEBUG: boolean = false;
    options: IVoiceAudioDetectorOptions = {
        fftSize: 512,
        bufferLen: 512,
        voiceStopCallback: () => { },
        voiceStartCallback: () => { },
        smoothingTimeConstant: 0.99,
        energy_offset: 1e-8, // The initial offset.
        energy_threshold_ratio_pos: 2, // Signal must be twice the offset
        energy_threshold_ratio_neg: 0.5, // Signal must be half the offset
        energy_integration: 1, // Size of integration change compared to the signal per second.
        filter: [
            { f: 200, v: 0 }, // 0 -> 200 is 0
            { f: 2000, v: 1 } // 200 -> 2k is 1
        ],
        source: null,
        context: null
    };
    hertzPerBin: number;
    iterationFrequency: number;
    iterationPeriod: number;
    filter: any[] = [];

    ready: any = {};
    vadState = false; // True when Voice Activity Detected

    // Energy detector props
    energy_offset: number = this.options.energy_offset;
    energy_threshold_pos: number = this.energy_offset * this.options.energy_threshold_ratio_pos;
    energy_threshold_neg: number = this.energy_offset * this.options.energy_threshold_ratio_neg;

    voiceTrend = 0;
    voiceTrendMax = 10;
    voiceTrendMin = -10;
    voiceTrendStart = 5;
    voiceTrendEnd = -5;
    analyser: any;
    floatFrequencyData: Float32Array;
    scriptProcessorNode: any;
    floatFrequencyDataLinear: Float32Array;
    logging: boolean;
    log_i: number;
    log_limit: number;
    energy: any;

    constructor(options: Partial<IVoiceAudioDetectorOptions>) {
        this.options = { ...this.options, ...options };

        console.log(this.options);
        // Require source
        if (!this.options.source)
            throw new Error("The options must specify a MediaStreamAudioSourceNode.");

        // Set options.context
        this.options.context = options.source?.context || null;

        if (this.options.context === null)
            throw new Error("The options must specify a base audio context.");

        // Calculate time relationships
        this.hertzPerBin = this.options.context.sampleRate / this.options.fftSize;
        this.iterationFrequency = this.options.context.sampleRate / this.options.bufferLen;
        this.iterationPeriod = 1 / this.iterationFrequency;

        if (this.DEBUG) console.log(
            'Vad' +
            ' | sampleRate: ' + this.options.context.sampleRate +
            ' | hertzPerBin: ' + this.hertzPerBin +
            ' | iterationFrequency: ' + this.iterationFrequency +
            ' | iterationPeriod: ' + this.iterationPeriod
        );

        this.setFilter(this.options.filter);

        this.energy_offset = this.options.energy_offset;
        this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
        this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;

        // Create analyser 
        this.analyser = this.options.context.createAnalyser();
        this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant; // 0.99;
        this.analyser.fftSize = this.options.fftSize;

        this.floatFrequencyData = new Float32Array(this.analyser.frequencyBinCount);

        // Setup local storage of the Linear FFT data
        this.floatFrequencyDataLinear = new Float32Array(this.floatFrequencyData.length);

        // Connect this.analyser
        this.options.source.connect(this.analyser);

        // Create ScriptProcessorNode
        this.scriptProcessorNode = this.options.context.createScriptProcessor(this.options.bufferLen, 1, 1);

        // Connect scriptProcessorNode (Theretically, not required)
        this.scriptProcessorNode.connect(this.options.context.destination);

        this.scriptProcessorNode.onaudioprocess = () => {
            this.analyser.getFloatFrequencyData(this.floatFrequencyData);
            this.update();
            this.monitor();
        };


        // Connect scriptProcessorNode
        this.options.source.connect(this.scriptProcessorNode);

        // log stuff
        this.logging = false;
        this.log_i = 0;
        this.log_limit = 100;
    }

    setFilter(shape: any) {
        this.filter = [];
        for (var i = 0, iLen = this.options.fftSize / 2; i < iLen; i++) {
            this.filter[i] = 0;
            for (var j = 0, jLen = shape.length; j < jLen; j++) {
                if (i * this.hertzPerBin < shape[j].f) {
                    this.filter[i] = shape[j].v;
                    break; // Exit j loop
                }
            }
        }
    }


    triggerLog(limit: number) {
        this.logging = true;
        this.log_i = 0;
        this.log_limit = typeof limit === 'number' ? limit : this.log_limit;
    }

    log(msg: any) {
        if (this.logging && this.log_i < this.log_limit) {
            this.log_i++;
            console.log(msg);
        } else {
            this.logging = false;
        }
    }

    update() {
        // Update the local version of the Linear FFT
        const fft = this.floatFrequencyData;
        for (let i = 0, iLen = fft.length; i < iLen; i++) {
            this.floatFrequencyDataLinear[i] = Math.pow(10, fft[i] / 10);
        }
        this.ready = {};
    }

    getEnergy() {
        if (this.ready.energy) {
            return this.energy;
        }

        let energy = 0;
        let fft = this.floatFrequencyDataLinear;

        for (var i = 0, iLen = fft.length; i < iLen; i++) {
            energy += this.filter[i] * fft[i] * fft[i];
        }

        this.energy = energy;
        this.ready.energy = true;

        return energy;
    }

    monitor() {
        let energy = this.getEnergy();
        let signal = energy - this.energy_offset;

        if (signal > this.energy_threshold_pos) {
            this.voiceTrend = (this.voiceTrend + 1 > this.voiceTrendMax) ? this.voiceTrendMax : this.voiceTrend + 1;
        } else if (signal < -this.energy_threshold_neg) {
            this.voiceTrend = (this.voiceTrend - 1 < this.voiceTrendMin) ? this.voiceTrendMin : this.voiceTrend - 1;
        } else {
            // voiceTrend gets smaller
            if (this.voiceTrend > 0) {
                this.voiceTrend--;
            } else if (this.voiceTrend < 0) {
                this.voiceTrend++;
            }
        }

        let start = false, end = false;
        if (this.voiceTrend > this.voiceTrendStart) {
            // Start of speech detected
            start = true;
        } else if (this.voiceTrend < this.voiceTrendEnd) {
            // End of speech detected
            end = true;
        }

        // Integration brings in the real-time aspect through the relationship with the frequency this functions is called.
        let integration = signal * this.iterationPeriod * this.options.energy_integration;

        // Idea?: The integration is affected by the voiceTrend magnitude? - Not sure. Not doing atm.

        // The !end limits the offset delta boost till after the end is detected.
        if (integration > 0 || !end) {
            this.energy_offset += integration;
        } else {
            this.energy_offset += integration * 10;
        }
        this.energy_offset = this.energy_offset < 0 ? 0 : this.energy_offset;
        this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
        this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;

        // Broadcast the messages
        if (start && !this.vadState) {
            this.vadState = true;
            this.options.voiceStartCallback();
        }
        if (end && this.vadState) {
            this.vadState = false;
            this.options.voiceStopCallback();
        }

        this.log(
            'e: ' + energy +
            ' | e_of: ' + this.energy_offset +
            ' | e+_th: ' + this.energy_threshold_pos +
            ' | e-_th: ' + this.energy_threshold_neg +
            ' | signal: ' + signal +
            ' | int: ' + integration +
            ' | voiceTrend: ' + this.voiceTrend +
            ' | start: ' + start +
            ' | end: ' + end
        );

        return signal;
    }
}