generated from Basemash_UE/UE_Template
163 lines
6.7 KiB
C++
163 lines
6.7 KiB
C++
// Copyright (c) Basemash. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "AudioCaptureCore.h"
|
|
#include "Components/ActorComponent.h"
|
|
#include "CoreMinimal.h"
|
|
#include "HAL/ThreadSafeBool.h"
|
|
|
|
#include "VoiceCaptureComponent.generated.h"
|
|
|
|
class UPSVoiceAudioConsumer;
|
|
class IPixelStreaming2AudioSink;
|
|
class UVoiceCommandSubsystem;
|
|
|
|
/**
|
|
* Audio source selection for UVoiceCaptureComponent.
|
|
* LocalMic - Capture from the default OS microphone via Audio::FAudioCapture.
|
|
* PixelStreaming - Capture from a remote browser peer microphone via PS2 audio consumer.
|
|
*/
|
|
UENUM(BlueprintType)
|
|
enum class EVoiceAudioSource : uint8
|
|
{
|
|
LocalMic UMETA(DisplayName = "Local Microphone"),
|
|
PixelStreaming UMETA(DisplayName = "Pixel Streaming (Browser Mic)")
|
|
};
|
|
|
|
/** Broadcast after successful transcription. Fired only on the component that initiated the request. */
|
|
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnVoiceCaptureTranscription, const FString&, Text);
|
|
/** Broadcast after LLM has parsed a command. Fired only on the component that initiated the request. */
|
|
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(
|
|
FOnVoiceCaptureCommand, const FString&, Action, const FString&, ParamsJson);
|
|
/** Broadcast on any error during the voice pipeline. Fired only on the component that initiated the request. */
|
|
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnVoiceCaptureError, const FString&, ErrorMessage);
|
|
|
|
/**
|
|
* UVoiceCaptureComponent
|
|
*
|
|
* Drop on any actor to give it voice-capture abilities. Handles:
|
|
* - Local mic or Pixel Streaming browser mic audio capture
|
|
* - Start/stop recording lifecycle with per-component event isolation
|
|
* - Stereo-to-mono mixdown, silence detection, WAV conversion
|
|
* - Round-trip to UVoiceCommandSubsystem (Groq STT + LLM)
|
|
*
|
|
* Transcription, parsed-command, and error events are delivered ONLY to the component
|
|
* that initiated the request (via FVoiceRequestCallbacks) — multiple components can
|
|
* coexist without cross-talk.
|
|
*/
|
|
UCLASS(ClassGroup = ("VoiceCommander"), meta = (BlueprintSpawnableComponent, DisplayName = "Voice Capture"))
|
|
class BASEMASHVOICECOMMANDER_API UVoiceCaptureComponent : public UActorComponent
|
|
{
|
|
GENERATED_BODY()
|
|
|
|
public:
|
|
UVoiceCaptureComponent();
|
|
|
|
/** Which audio source to use at runtime. Change before BeginPlay for initial setup. */
|
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Voice Commander")
|
|
EVoiceAudioSource AudioSource = EVoiceAudioSource::LocalMic;
|
|
|
|
/**
|
|
* Runtime-only per-component API key. Set via SetApiKey() or Blueprint at runtime.
|
|
* NOT serialized to asset — use Project Settings for a persistent default.
|
|
* If non-empty, takes priority over Subsystem runtime key and Settings default.
|
|
*/
|
|
UPROPERTY(Transient, BlueprintReadWrite, Category = "Voice Commander", meta = (PasswordField = true))
|
|
FString ApiKeyOverride;
|
|
|
|
/** Blueprint helper to set the per-component API key at runtime. */
|
|
UFUNCTION(BlueprintCallable, Category = "Voice Commander")
|
|
void SetApiKey(const FString& Key) { ApiKeyOverride = Key; }
|
|
|
|
/** Silence threshold (max absolute float sample value). Below this, request is aborted with an error event. */
|
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Voice Commander|Audio")
|
|
float SilenceThreshold = 0.001f;
|
|
|
|
/** If true, saves debug_recording.wav to ProjectSavedDir on every send. */
|
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Voice Commander|Debug")
|
|
bool bSaveDebugRecording = false;
|
|
|
|
/** Begin capturing audio into the internal buffer. No-op if already recording. */
|
|
UFUNCTION(BlueprintCallable, Category = "Voice Commander")
|
|
void StartRecording();
|
|
|
|
/** Stop capturing, perform silence check + WAV encode, dispatch to the subsystem. */
|
|
UFUNCTION(BlueprintCallable, Category = "Voice Commander")
|
|
void StopRecordingAndSend();
|
|
|
|
/** True between StartRecording and StopRecordingAndSend. */
|
|
[[nodiscard]] UFUNCTION(BlueprintPure, Category = "Voice Commander")
|
|
bool IsRecording() const { return bIsRecording; }
|
|
|
|
/**
|
|
* True if the configured audio source is ready to capture.
|
|
* - LocalMic: underlying Audio::FAudioCapture stream is open.
|
|
* - PixelStreaming: PS2 consumer is registered on an audio sink.
|
|
*/
|
|
[[nodiscard]] UFUNCTION(BlueprintPure, Category = "Voice Commander")
|
|
bool IsAudioSourceReady() const;
|
|
|
|
/** Events fired after Groq round-trip — scoped to THIS component only. */
|
|
UPROPERTY(BlueprintAssignable, Category = "Voice Commander")
|
|
FOnVoiceCaptureTranscription OnTranscription;
|
|
|
|
UPROPERTY(BlueprintAssignable, Category = "Voice Commander")
|
|
FOnVoiceCaptureCommand OnCommandParsed;
|
|
|
|
UPROPERTY(BlueprintAssignable, Category = "Voice Commander")
|
|
FOnVoiceCaptureError OnError;
|
|
|
|
protected:
|
|
virtual void BeginPlay() override;
|
|
virtual void EndPlay(const EEndPlayReason::Type EndPlayReason) override;
|
|
|
|
// --- Local mic state ---
|
|
Audio::FAudioCapture AudioCapture;
|
|
int32 CaptureSampleRate = 0;
|
|
int32 CaptureNumChannels = 0;
|
|
TArray<float> RecordedPCMData;
|
|
FCriticalSection RecordingCriticalSection;
|
|
|
|
// --- Pixel Streaming state ---
|
|
UPROPERTY()
|
|
UPSVoiceAudioConsumer* PSAudioConsumer = nullptr;
|
|
|
|
TWeakPtr<IPixelStreaming2AudioSink> PSAudioSink;
|
|
|
|
// Game-thread dominant, but read from the stale OnRemovedDelegate lambda path too.
|
|
FThreadSafeBool bPSAudioRegistered = false;
|
|
FTimerHandle PSRetryTimerHandle;
|
|
FString CurrentAudioPlayerId;
|
|
|
|
/** Handle for OnAudioTrackOpenNative so we can cleanly unsubscribe in EndPlay. */
|
|
FDelegateHandle PSAudioTrackOpenHandle;
|
|
|
|
// --- Recording state ---
|
|
// Written on the game thread (Start/Stop), read on the audio thread (LocalMic capture callback).
|
|
// FThreadSafeBool keeps the cross-thread read/write formally race-free without expanding the critical section.
|
|
FThreadSafeBool bIsRecording = false;
|
|
|
|
// --- Lifecycle ---
|
|
void InitLocalMic();
|
|
void CleanupLocalMic();
|
|
void InitPixelStreamingAudio();
|
|
void PSRetryRegistration();
|
|
void CleanupPixelStreamingAudio();
|
|
void SwitchAudioToPlayer(const FString& StreamerId, const FString& PlayerId);
|
|
|
|
// --- Utility ---
|
|
TArray<uint8> ConvertPCMToWav(const TArray<float>& PCMData, int32 SampleRate, int32 NumChannels) const;
|
|
|
|
// --- Subsystem callback glue ---
|
|
// Subsystem invokes these via FVoiceRequestCallbacks when this component is the request origin.
|
|
// Each one simply re-broadcasts the corresponding multicast so listeners on this component only see events for their own requests.
|
|
UFUNCTION()
|
|
void HandleTranscription(const FString& Text);
|
|
|
|
UFUNCTION()
|
|
void HandleCommandParsed(const FString& Action, const FString& ParamsJson);
|
|
|
|
UFUNCTION()
|
|
void HandleError(const FString& ErrorMessage);
|
|
};
|