PSC_SharkTable/Plugins/BasemashVoiceCommander/Source/BasemashVoiceCommander/Public/VoiceCaptureComponent.h

163 lines
6.7 KiB
C++

// Copyright (c) Basemash. All Rights Reserved.
#pragma once
#include "AudioCaptureCore.h"
#include "Components/ActorComponent.h"
#include "CoreMinimal.h"
#include "HAL/ThreadSafeBool.h"
#include "VoiceCaptureComponent.generated.h"
class UPSVoiceAudioConsumer;
class IPixelStreaming2AudioSink;
class UVoiceCommandSubsystem;
/**
* Audio source selection for UVoiceCaptureComponent.
* LocalMic - Capture from the default OS microphone via Audio::FAudioCapture.
* PixelStreaming - Capture from a remote browser peer microphone via PS2 audio consumer.
*/
UENUM(BlueprintType)
enum class EVoiceAudioSource : uint8
{
LocalMic UMETA(DisplayName = "Local Microphone"),
PixelStreaming UMETA(DisplayName = "Pixel Streaming (Browser Mic)")
};
/** Broadcast after successful transcription. Fired only on the component that initiated the request. */
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnVoiceCaptureTranscription, const FString&, Text);
/** Broadcast after LLM has parsed a command. Fired only on the component that initiated the request. */
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(
FOnVoiceCaptureCommand, const FString&, Action, const FString&, ParamsJson);
/** Broadcast on any error during the voice pipeline. Fired only on the component that initiated the request. */
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnVoiceCaptureError, const FString&, ErrorMessage);
/**
* UVoiceCaptureComponent
*
* Drop on any actor to give it voice-capture abilities. Handles:
* - Local mic or Pixel Streaming browser mic audio capture
* - Start/stop recording lifecycle with per-component event isolation
* - Stereo-to-mono mixdown, silence detection, WAV conversion
* - Round-trip to UVoiceCommandSubsystem (Groq STT + LLM)
*
* Transcription, parsed-command, and error events are delivered ONLY to the component
* that initiated the request (via FVoiceRequestCallbacks) — multiple components can
* coexist without cross-talk.
*/
UCLASS(ClassGroup = ("VoiceCommander"), meta = (BlueprintSpawnableComponent, DisplayName = "Voice Capture"))
class BASEMASHVOICECOMMANDER_API UVoiceCaptureComponent : public UActorComponent
{
GENERATED_BODY()
public:
UVoiceCaptureComponent();
/** Which audio source to use at runtime. Change before BeginPlay for initial setup. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Voice Commander")
EVoiceAudioSource AudioSource = EVoiceAudioSource::LocalMic;
/**
* Runtime-only per-component API key. Set via SetApiKey() or Blueprint at runtime.
* NOT serialized to asset — use Project Settings for a persistent default.
* If non-empty, takes priority over Subsystem runtime key and Settings default.
*/
UPROPERTY(Transient, BlueprintReadWrite, Category = "Voice Commander", meta = (PasswordField = true))
FString ApiKeyOverride;
/** Blueprint helper to set the per-component API key at runtime. */
UFUNCTION(BlueprintCallable, Category = "Voice Commander")
void SetApiKey(const FString& Key) { ApiKeyOverride = Key; }
/** Silence threshold (max absolute float sample value). Below this, request is aborted with an error event. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Voice Commander|Audio")
float SilenceThreshold = 0.001f;
/** If true, saves debug_recording.wav to ProjectSavedDir on every send. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Voice Commander|Debug")
bool bSaveDebugRecording = false;
/** Begin capturing audio into the internal buffer. No-op if already recording. */
UFUNCTION(BlueprintCallable, Category = "Voice Commander")
void StartRecording();
/** Stop capturing, perform silence check + WAV encode, dispatch to the subsystem. */
UFUNCTION(BlueprintCallable, Category = "Voice Commander")
void StopRecordingAndSend();
/** True between StartRecording and StopRecordingAndSend. */
[[nodiscard]] UFUNCTION(BlueprintPure, Category = "Voice Commander")
bool IsRecording() const { return bIsRecording; }
/**
* True if the configured audio source is ready to capture.
* - LocalMic: underlying Audio::FAudioCapture stream is open.
* - PixelStreaming: PS2 consumer is registered on an audio sink.
*/
[[nodiscard]] UFUNCTION(BlueprintPure, Category = "Voice Commander")
bool IsAudioSourceReady() const;
/** Events fired after Groq round-trip — scoped to THIS component only. */
UPROPERTY(BlueprintAssignable, Category = "Voice Commander")
FOnVoiceCaptureTranscription OnTranscription;
UPROPERTY(BlueprintAssignable, Category = "Voice Commander")
FOnVoiceCaptureCommand OnCommandParsed;
UPROPERTY(BlueprintAssignable, Category = "Voice Commander")
FOnVoiceCaptureError OnError;
protected:
virtual void BeginPlay() override;
virtual void EndPlay(const EEndPlayReason::Type EndPlayReason) override;
// --- Local mic state ---
Audio::FAudioCapture AudioCapture;
int32 CaptureSampleRate = 0;
int32 CaptureNumChannels = 0;
TArray<float> RecordedPCMData;
FCriticalSection RecordingCriticalSection;
// --- Pixel Streaming state ---
UPROPERTY()
UPSVoiceAudioConsumer* PSAudioConsumer = nullptr;
TWeakPtr<IPixelStreaming2AudioSink> PSAudioSink;
// Game-thread dominant, but read from the stale OnRemovedDelegate lambda path too.
FThreadSafeBool bPSAudioRegistered = false;
FTimerHandle PSRetryTimerHandle;
FString CurrentAudioPlayerId;
/** Handle for OnAudioTrackOpenNative so we can cleanly unsubscribe in EndPlay. */
FDelegateHandle PSAudioTrackOpenHandle;
// --- Recording state ---
// Written on the game thread (Start/Stop), read on the audio thread (LocalMic capture callback).
// FThreadSafeBool keeps the cross-thread read/write formally race-free without expanding the critical section.
FThreadSafeBool bIsRecording = false;
// --- Lifecycle ---
void InitLocalMic();
void CleanupLocalMic();
void InitPixelStreamingAudio();
void PSRetryRegistration();
void CleanupPixelStreamingAudio();
void SwitchAudioToPlayer(const FString& StreamerId, const FString& PlayerId);
// --- Utility ---
TArray<uint8> ConvertPCMToWav(const TArray<float>& PCMData, int32 SampleRate, int32 NumChannels) const;
// --- Subsystem callback glue ---
// Subsystem invokes these via FVoiceRequestCallbacks when this component is the request origin.
// Each one simply re-broadcasts the corresponding multicast so listeners on this component only see events for their own requests.
UFUNCTION()
void HandleTranscription(const FString& Text);
UFUNCTION()
void HandleCommandParsed(const FString& Action, const FString& ParamsJson);
UFUNCTION()
void HandleError(const FString& ErrorMessage);
};