/*
 * Taken from /e/OS Assistant
 *
 * Copyright (C) 2024 MURENA SAS
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.stypox.dicio.io.input.vosk

import android.util.Log
import org.json.JSONException
import org.json.JSONObject
import org.stypox.dicio.io.input.InputEvent
import org.vosk.android.RecognitionListener
import org.vosk.android.SpeechService


/**
 * This class is an implementation of [RecognitionListener] that listens to events generated by a
 * Vosk [SpeechService], pushes them to [eventListener], and handles the listening state via
 * [VoskInputDevice].
 */
internal class VoskListener(
    private val voskInputDevice: VoskInputDevice,
    private val eventListener: (InputEvent) -> Unit,
    private val speechService: SpeechService,
) : RecognitionListener {

    /**
     * Called when partial recognition result is available.
     */
    override fun onPartialResult(s: String) {
        Log.d(TAG, "onPartialResult called with s = $s")

        // Try to obtain the partial result from the json object. Unlike in `onResult`, where more
        // than one alternative may be generated, in this case there will be only the alternative
        // with most confidence.
        var partialInput: String? = null
        try {
            partialInput = JSONObject(s).getString("partial")
        } catch (e: JSONException) {
            Log.e(TAG, "Can't obtain partial result from $s", e)
        }

        // emit a partial event
        partialInput?.also {
            if (it.isNotBlank()) {
                eventListener(InputEvent.Partial(it))
            }
        }
    }

    /**
     * Called after silence occurred.
     */
    @Suppress("ktlint:Style:NestedBlockDepth")
    override fun onResult(s: String) {
        Log.d(TAG, "onResult called with s = $s")

        // we only want to listen one sentence at a time
        voskInputDevice.stopListening(speechService, eventListener, false)

        // collect all alternative user inputs generated by the STT model
        val inputs = try {
            val jsonResult = JSONObject(s)
            utterancesFromJson(jsonResult)
        } catch (e: JSONException) {
            Log.e(TAG, "Can't obtain result from $s", e)
            eventListener(InputEvent.Error(e))
            return
        }

        // emit the final event
        if (inputs.isEmpty()) {
            eventListener(InputEvent.None)
        } else {
            eventListener(InputEvent.Final(inputs))
        }
    }

    private fun utterancesFromJson(jsonResult: JSONObject): List<Pair<String, Float>> {
        return jsonResult.optJSONArray("alternatives")
            ?.takeIf { array -> array.length() > 0 }
            ?.let { array ->
                val maxConfidence = array.optJSONObject(0)?.optDouble("confidence")?.toFloat()
                    ?: return@let null

                return (0 until array.length())
                    .mapNotNull { i -> array.optJSONObject(i) }
                    .map {
                        Pair(
                            // for some reason the texts returned by Vosk start with " "
                            it.getString("text").removePrefix(" "),
                            // the values for confidence provided by Vosk are not from 0.0 to 1.0
                            it.getDouble("confidence").toFloat() / maxConfidence
                        )
                    }
                    // empty results are not real results, so filter them out so that the listener
                    // will be sent an "InputEvent.None" instead of a ".Final" with empty utterances
                    .filter { it.first.isNotBlank() }
            }
            ?: (jsonResult.optString("text")
                .takeIf { it.isNotBlank() }
                ?.let { listOf(Pair(it.removePrefix(" "), 1.0f)) })
            ?: listOf()
    }

    /**
     * Called after stream end. In our case, always called after silence occurred, since we call
     * stopListening in onResult.
     */
    override fun onFinalResult(s: String) {
        Log.d(TAG, "onFinalResult called with s = $s")
    }
    
    /**
     * Called when an error occurs.
     */
    override fun onError(e: Exception) {
        // TODO set error state in VoskInputDevice
        Log.e(TAG, "onError called", e)
        voskInputDevice.stopListening(speechService, eventListener, false)
        eventListener(InputEvent.Error(e))
    }

    /**
     * Called after timeout expired
     */
    override fun onTimeout() {
        Log.d(TAG, "onTimeout called")
        voskInputDevice.stopListening(speechService, eventListener, true)
    }

    companion object {
        private val TAG = VoskListener::class.simpleName
    }
}
