package com.alibaba.cloud.ai.dashscope.audio;

import com.alibaba.cloud.ai.dashscope.api.DashScopeAudioTranscriptionApi;
import com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel;
import com.alibaba.cloud.ai.dashscope.common.DashScopeException;
import com.alibaba.cloud.ai.dashscope.protocol.DashScopeWebSocketClient;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.audio.transcription.AudioTranscription;
import org.springframework.ai.audio.transcription.AudioTranscriptionOptions;
import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponseMetadata;
import org.springframework.ai.model.ModelOptionsUtils;
import org.springframework.ai.retry.RetryUtils;
import org.springframework.core.io.Resource;
import org.springframework.core.io.buffer.DataBufferUtils;
import org.springframework.core.io.buffer.DefaultDataBufferFactory;
import org.springframework.http.ResponseEntity;
import org.springframework.retry.support.RetryTemplate;
import reactor.core.publisher.Flux;
import reactor.core.scheduler.Schedulers;

/* loaded from: input_file:com/alibaba/cloud/ai/dashscope/audio/DashScopeAudioTranscriptionModel.class */
public class DashScopeAudioTranscriptionModel implements AudioTranscriptionModel {
    private static final Logger logger = LoggerFactory.getLogger(DashScopeAudioTranscriptionModel.class);
    public static final String REQUEST_ID = "request_id";
    public static final String TASK_ID = "task_id";
    public static final String STATUS_CODE = "status_code";
    public static final String CODE = "code";
    public static final String USAGE = "usage";
    public static final String OUTPUT = "output";
    public static final String MESSAGE = "message";
    private final DashScopeAudioTranscriptionApi api;
    private final DashScopeAudioTranscriptionOptions options;
    private final RetryTemplate retryTemplate;

    public DashScopeAudioTranscriptionModel(DashScopeAudioTranscriptionApi dashScopeAudioTranscriptionApi) {
        this(dashScopeAudioTranscriptionApi, DashScopeAudioTranscriptionOptions.builder().build());
    }

    public DashScopeAudioTranscriptionModel(DashScopeAudioTranscriptionApi dashScopeAudioTranscriptionApi, DashScopeAudioTranscriptionOptions dashScopeAudioTranscriptionOptions) {
        this(dashScopeAudioTranscriptionApi, dashScopeAudioTranscriptionOptions, RetryUtils.DEFAULT_RETRY_TEMPLATE);
    }

    public DashScopeAudioTranscriptionModel(DashScopeAudioTranscriptionApi dashScopeAudioTranscriptionApi, DashScopeAudioTranscriptionOptions dashScopeAudioTranscriptionOptions, RetryTemplate retryTemplate) {
        this.api = dashScopeAudioTranscriptionApi;
        this.options = dashScopeAudioTranscriptionOptions;
        this.retryTemplate = retryTemplate;
    }

    @Override // com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel
    public AudioTranscriptionResponse asyncCall(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        DashScopeAudioTranscriptionApi.Request createRequest = createRequest(audioTranscriptionPrompt);
        ResponseEntity<DashScopeAudioTranscriptionApi.Response> call = this.api.call(createRequest);
        if (call != null && call.getBody() != null) {
            return toResponse((DashScopeAudioTranscriptionApi.Response) call.getBody());
        }
        logger.warn("app call error: request: {}", createRequest);
        return null;
    }

    @Override // com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel
    public AudioTranscriptionResponse fetch(String str) {
        return toResponse((DashScopeAudioTranscriptionApi.Response) Objects.requireNonNull((DashScopeAudioTranscriptionApi.Response) this.api.callWithTaskId(createRequest(new AudioTranscriptionPrompt((Resource) null)), str).getBody()));
    }

    @Override // com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel
    public AudioTranscriptionResponse call(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        DashScopeAudioTranscriptionApi.Response response;
        DashScopeAudioTranscriptionApi.Request createRequest = createRequest(audioTranscriptionPrompt);
        String taskId = ((DashScopeAudioTranscriptionApi.Response) Objects.requireNonNull((DashScopeAudioTranscriptionApi.Response) this.api.call(createRequest).getBody())).output().taskId();
        int i = 1000;
        int i2 = 0;
        while (true) {
            response = (DashScopeAudioTranscriptionApi.Response) this.api.callWithTaskId(createRequest, taskId).getBody();
            DashScopeAudioTranscriptionApi.TaskStatus taskStatus = ((DashScopeAudioTranscriptionApi.Response) Objects.requireNonNull(response)).output().taskStatus();
            if (taskStatus == DashScopeAudioTranscriptionApi.TaskStatus.FAILED || taskStatus == DashScopeAudioTranscriptionApi.TaskStatus.CANCELED || taskStatus == DashScopeAudioTranscriptionApi.TaskStatus.UNKNOWN) {
                break;
            }
            if (taskStatus == DashScopeAudioTranscriptionApi.TaskStatus.SUCCEEDED) {
                logger.info("task succeeded");
                return toResponse(response);
            }
            i2++;
            if (i < 5000 && i2 % 3 == 0) {
                i = Math.min(i * 2, 5000);
            }
            try {
                Thread.sleep(i);
            } catch (InterruptedException e) {
            }
        }
        logger.error("task failed");
        return toResponse(response);
    }

    @Override // com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel
    public Flux<AudioTranscriptionResponse> stream(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        DashScopeAudioTranscriptionApi.RealtimeRequest createRealtimeRequest = createRealtimeRequest(audioTranscriptionPrompt, DashScopeWebSocketClient.EventType.RUN_TASK);
        logger.info("send run-task");
        this.api.realtimeControl(createRealtimeRequest);
        return this.api.realtimeStream(DataBufferUtils.read(audioTranscriptionPrompt.getInstructions(), new DefaultDataBufferFactory(), 16384).map(dataBuffer -> {
            byte[] bArr = new byte[dataBuffer.readableByteCount()];
            dataBuffer.read(bArr);
            ByteBuffer wrap = ByteBuffer.wrap(bArr);
            DataBufferUtils.release(dataBuffer);
            return wrap;
        }).delayElements(Duration.ofMillis(100L), Schedulers.boundedElastic()).doOnComplete(() -> {
            DashScopeAudioTranscriptionApi.RealtimeRequest createRealtimeRequest2 = createRealtimeRequest(audioTranscriptionPrompt, DashScopeWebSocketClient.EventType.FINISH_TASK);
            logger.info("send finish-task");
            this.api.realtimeControl(createRealtimeRequest2);
        })).map(this::toResponse);
    }

    private DashScopeAudioTranscriptionApi.Request createRequest(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        DashScopeAudioTranscriptionOptions mergeOptions = mergeOptions(audioTranscriptionPrompt);
        List of = List.of();
        try {
            if (audioTranscriptionPrompt.getInstructions() != null) {
                of = List.of(audioTranscriptionPrompt.getInstructions().getURL().toString());
            }
            return new DashScopeAudioTranscriptionApi.Request(mergeOptions.getModel(), new DashScopeAudioTranscriptionApi.Request.Input(of), new DashScopeAudioTranscriptionApi.Request.Parameters(mergeOptions.getChannelId(), mergeOptions.getVocabularyId(), mergeOptions.getPhraseId(), mergeOptions.getDisfluencyRemovalEnabled(), mergeOptions.getLanguageHints()));
        } catch (IOException e) {
            throw new DashScopeException("failed to get file urls", e);
        }
    }

    private DashScopeAudioTranscriptionApi.RealtimeRequest createRealtimeRequest(AudioTranscriptionPrompt audioTranscriptionPrompt, DashScopeWebSocketClient.EventType eventType) {
        DashScopeAudioTranscriptionOptions mergeOptions = mergeOptions(audioTranscriptionPrompt);
        return new DashScopeAudioTranscriptionApi.RealtimeRequest(new DashScopeAudioTranscriptionApi.RealtimeRequest.Header(eventType, UUID.randomUUID().toString(), "duplex"), new DashScopeAudioTranscriptionApi.RealtimeRequest.Payload(mergeOptions.getModel(), "audio", "asr", "recognition", null, new DashScopeAudioTranscriptionApi.RealtimeRequest.Payload.Parameters(mergeOptions.getSampleRate(), mergeOptions.getFormat(), mergeOptions.getDisfluencyRemovalEnabled())));
    }

    private DashScopeAudioTranscriptionOptions mergeOptions(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        DashScopeAudioTranscriptionOptions build = DashScopeAudioTranscriptionOptions.builder().build();
        if (audioTranscriptionPrompt.getOptions() != null) {
            build = (DashScopeAudioTranscriptionOptions) ModelOptionsUtils.merge((DashScopeAudioTranscriptionOptions) ModelOptionsUtils.copyToTarget(audioTranscriptionPrompt.getOptions(), AudioTranscriptionOptions.class, DashScopeAudioTranscriptionOptions.class), build, DashScopeAudioTranscriptionOptions.class);
        }
        return (DashScopeAudioTranscriptionOptions) ModelOptionsUtils.merge(build, this.options, DashScopeAudioTranscriptionOptions.class);
    }

    private AudioTranscriptionResponse toResponse(DashScopeAudioTranscriptionApi.Response response) {
        DashScopeAudioTranscriptionApi.Response.Output output = response.output();
        List<DashScopeAudioTranscriptionApi.Response.Output.Result> results = output.results();
        output.taskStatus();
        String str = null;
        if (results != null && !results.isEmpty()) {
            DashScopeAudioTranscriptionApi.Outcome outcome = this.api.getOutcome(results.get(0).transcriptionUrl());
            if (!outcome.transcripts().isEmpty()) {
                str = outcome.transcripts().get(0).text();
            }
        }
        AudioTranscription audioTranscription = new AudioTranscription(str);
        AudioTranscriptionResponseMetadata audioTranscriptionResponseMetadata = new AudioTranscriptionResponseMetadata();
        if (response.statusCode() != null) {
            audioTranscriptionResponseMetadata.put(STATUS_CODE, response.statusCode());
        }
        if (response.requestId() != null) {
            audioTranscriptionResponseMetadata.put("request_id", response.requestId());
        }
        if (response.code() != null) {
            audioTranscriptionResponseMetadata.put(CODE, response.code());
        }
        if (response.message() != null) {
            audioTranscriptionResponseMetadata.put(MESSAGE, response.message());
        }
        if (response.usage() != null) {
            audioTranscriptionResponseMetadata.put("usage", response.usage());
        }
        audioTranscriptionResponseMetadata.put("output", output);
        return new AudioTranscriptionResponse(audioTranscription, audioTranscriptionResponseMetadata);
    }

    private AudioTranscriptionResponse toResponse(DashScopeAudioTranscriptionApi.RealtimeResponse realtimeResponse) {
        DashScopeAudioTranscriptionApi.RealtimeResponse.Payload payload = realtimeResponse.payload();
        DashScopeAudioTranscriptionApi.RealtimeResponse.Payload.Output output = payload.output();
        DashScopeAudioTranscriptionApi.RealtimeResponse.Payload.Output.Sentence sentence = output.sentence();
        String taskId = realtimeResponse.header().taskId();
        String str = null;
        if (sentence != null) {
            str = sentence.text();
        }
        AudioTranscription audioTranscription = new AudioTranscription(str);
        AudioTranscriptionResponseMetadata audioTranscriptionResponseMetadata = new AudioTranscriptionResponseMetadata();
        audioTranscriptionResponseMetadata.put(TASK_ID, taskId);
        audioTranscriptionResponseMetadata.put("output", output);
        if (payload.usage() != null) {
            audioTranscriptionResponseMetadata.put("usage", payload.usage());
        }
        return new AudioTranscriptionResponse(audioTranscription, audioTranscriptionResponseMetadata);
    }
}
