UE4之Game、Render、RHI多线程架构
游戏线程(GameThread)
GameThread是引擎运行的心脏,承载游戏逻辑、运行流程的工作,也是其它线程的数据发起者。在FEngineLoop::Tick函数执行每帧逻辑的更新。
在引擎启动时会把GameThread的线程id存储到全局变量GGameThreadId中,且稍后会设置到TaskGraph系统中。
int32 FEngineLoop::PreInitPreStartupScreen(const TCHAR* CmdLine) { // ... ... // 创建线程自己的TLS数据FPerThreadFreeBlockLists 注:Binned2、Binned3内存分配器需要 FMemory::SetupTLSCachesOnCurrentThread(); // remember thread id of the main thread GGameThreadId = FPlatformTLS::GetCurrentThreadId();// 游戏线程id GIsGameThreadIdInitialized = true; // 游戏线程id是否被初始化 FPlatformProcess::SetThreadAffinityMask(FPlatformAffinity::GetMainGameMask()); // 设置当前线程的cpu核的相关性 注:防止在多个cpu核心上跳来跳去,引发性能问题 FPlatformProcess::SetupGameThread(); // 设置游戏线程数据(但很多平台都是空的实现体) // ... ... FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores()); // TaskGraph初始化,并根据当前机器cpu的核数来创建工作线程 FTaskGraphInterface::Get().AttachToThread(ENamedThreads::GameThread); // 附加到TaskGraph的GameThread命名插槽中. 这样游戏线程便和TaskGraph联动了起来. if (GUseThreadedRendering) // 如果使用渲染线程 { if (GRHISupportsRHIThread) // 当前平台如果支持RHI线程 { const bool DefaultUseRHIThread = true; GUseRHIThread_InternalUseOnly = DefaultUseRHIThread; if (FParse::Param(FCommandLine::Get(), TEXT("rhithread"))) { GUseRHIThread_InternalUseOnly = true; // 创建独立的RHIThread,放加入到TaskGraph中,RHI会跑在TaskGraph的RHIThread上 } else if (FParse::Param(FCommandLine::Get(), TEXT("norhithread"))) { GUseRHIThread_InternalUseOnly = false; } } SCOPED_BOOT_TIMING("StartRenderingThread"); StartRenderingThread(); // 创建并启动渲染线程 } // ... ... }
游戏线程和TaskGraph系统的ENamedThreads::GameThread其实是一回事,都是同一个线程!
经过上面的初始化和设置后,其它地方就可以通过TaskGraph系统并行地处理任务了,也可以访问全局变量,以便判断游戏线程是否初始化完,当前线程是否游戏线程:
bool IsInGameThread() { return GIsGameThreadIdInitialized && FPlatformTLS::GetCurrentThreadId() == GGameThreadId; }
渲染线程(RenderThread)
RenderThread在TaskGraph系统中有一个任务队列,其他线程(主要是GameThread)通过宏ENQUEUE_RENDER_COMMAND向该队列中填充任务
RenderThread则不断从这个队列中取出任务来执行,从而生成与平台无关的Command List(渲染指令列表)。注:整个过程是异步的
RenderThread是其他线程(主要是GameThread)的奴隶,只是简单地作为工作线程不断执行它们赋予的工作。
RenderingThread.h声明了全部对外的接口,部分如下:
// Engine\Source\Runtime\RenderCore\Public\RenderingThread.h // 是否启用了独立的渲染线程, 如果为false, 则所有渲染命令会被立即执行, 而不是放入渲染命令队列. extern RENDERCORE_API bool GIsThreadedRendering; // 渲染线程是否应该被创建. 通常被命令行参数或ToggleRenderingThread控制台参数设置. extern RENDERCORE_API bool GUseThreadedRendering; // 是否开启RHI线程 extern RENDERCORE_API void SetRHIThreadEnabled(bool bEnableDedicatedThread, bool bEnableRHIOnTaskThreads); (......) // 开启渲染线程. extern RENDERCORE_API void StartRenderingThread(); // 停止渲染线程. extern RENDERCORE_API void StopRenderingThread(); // 检查渲染线程是否健康(是否Crash), 如果crash, 则会用UE_Log输出日志. extern RENDERCORE_API void CheckRenderingThreadHealth(); // 检查渲染线程是否健康(是否Crash) extern RENDERCORE_API bool IsRenderingThreadHealthy(); // 增加一个必须在下一个场景绘制前或flush渲染命令前完成的任务. extern RENDERCORE_API void AddFrameRenderPrerequisite(const FGraphEventRef& TaskToAdd); // 手机帧渲染前序任务, 保证所有渲染命令被入队. extern RENDERCORE_API void AdvanceFrameRenderPrerequisite(); // 等待所有渲染线程的渲染命令被执行完毕. 会卡住游戏线程, 只能被游戏线程调用. extern RENDERCORE_API void FlushRenderingCommands(bool bFlushDeferredDeletes = false); extern RENDERCORE_API void FlushPendingDeleteRHIResources_GameThread(); extern RENDERCORE_API void FlushPendingDeleteRHIResources_RenderThread(); extern RENDERCORE_API void TickRenderingTickables(); extern RENDERCORE_API void StartRenderCommandFenceBundler(); extern RENDERCORE_API void StopRenderCommandFenceBundler(); (......)
RenderingThread.h还有一个非常重要的宏ENQUEUE_RENDER_COMMAND
,它的作用是向渲染线程入队渲染指令。下面是它的声明和实现:
// 向渲染线程入队渲染指令, Type指明了渲染操作的名字. #define ENQUEUE_RENDER_COMMAND(Type) \ struct Type##Name \ { \ static const char* CStr() { return #Type; } \ static const TCHAR* TStr() { return TEXT(#Type); } \ }; \ EnqueueUniqueRenderCommand
上面最后一句使用了EnqueueUniqueRenderCommand
命令,继续追踪之:
/* UnrealEngine\Engine\Source\Runtime\RenderCore\Public\RenderingThread.h */ /** The parent class of commands stored in the rendering command queue. */ class RENDERCORE_API FRenderCommand { public: // All render commands run on the render thread static ENamedThreads::Type GetDesiredThread() // 所有渲染指令都必须在渲染线程执行. { check(!GIsThreadedRendering || ENamedThreads::GetRenderThread() != ENamedThreads::GameThread); return ENamedThreads::GetRenderThread(); // 开启渲染多线程时,返回渲染线程。不开启渲染多线程时,返回GameThread } static ESubsequentsMode::Type GetSubsequentsMode() { // Don't support tasks having dependencies on us, reduces task graph overhead tracking and dealing with subsequents return ESubsequentsMode::FireAndForget; } }; templateclass TEnqueueUniqueRenderCommandType : public FRenderCommand { public: TEnqueueUniqueRenderCommandType(LAMBDA&& InLambda) : Lambda(Forward (InLambda)) {} void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { TRACE_CPUPROFILER_EVENT_SCOPE_ON_CHANNEL_STR(TSTR::TStr(), RenderCommandsChannel); FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); Lambda(RHICmdList); } FORCEINLINE_DEBUGGABLE TStatId GetStatId() const { #if STATS static struct FThreadSafeStaticStat StatPtr_EnqueueUniqueRenderCommandType; return StatPtr_EnqueueUniqueRenderCommandType.GetStatId(); #else return TStatId(); #endif } private: LAMBDA Lambda; // 缓存渲染回调函数. }; /*************************************************************************************************************/ template // 传入的TSTR为结构体类型,里面包含CStr和TStr的静态方法,为渲染命令名字。 LAMBDA是回调函数 FORCEINLINE_DEBUGGABLE void EnqueueUniqueRenderCommand(LAMBDA&& Lambda) { QUICK_SCOPE_CYCLE_COUNTER(STAT_EnqueueUniqueRenderCommand); typedef TEnqueueUniqueRenderCommandType LAMBDA> EURCType; // EURCType类型即为TEnqueueUniqueRenderCommandType 类型 #if 0 // UE_SERVER && UE_BUILD_DEBUG UE_LOG(LogRHI, Warning, TEXT("Render command '%s' is being executed on a dedicated server."), TSTR::TStr()) #endif if (IsInRenderingThread()) // 如果在渲染线程内直接执行回调而不入队渲染命令. { FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand(); Lambda(RHICmdList); // 在渲染线程中,直接执行传入的lamda匿名函数 } else { if (ShouldExecuteOnRenderThread()) // if ((GIsThreadedRendering || !IsInGameThread())) // 使用渲染线程 or 当前不为GameThread // 需要在独立的渲染线程执行 { CheckNotBlockedOnRenderThread(); TGraphTask<EURCType>::CreateTask().ConstructAndDispatchWhenReady(Forward(Lambda)); // 向渲染线程的TaskGraph队列里面投递类型名为EURCType类型的任务,并将lamda匿名函数作为参数传入该任务的构造函数 } else { // 不在独立的渲染线程执行,则构建EURCType类型的对象,然后直接执行 EURCType TempCommand(Forward (Lambda)); FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId()); TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef()); } } }
为了更好理解入队渲染命令操作,举2个具体的例子:
例1:在GameThread执行LoadMap切地图,在卸载掉Old World之后,会在TrimMemory()函数中使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FlushCommand任务
ENQUEUE_RENDER_COMMAND(FlushCommand)( /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------ struct FlushCommandName { static const char* CStr() { return "FlushCommand"; } static const TCHAR* TStr() { return L"FlushCommand"; } }; EnqueueUniqueRenderCommand( */ [](FRHICommandList& RHICmdList) { GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources); RHIFlushResources(); GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources); });
例2:在GameThread中执行控制台变量命令,会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个OnCVarChange1任务,以便将新的数值传递到RenderThread的逻辑中使用
virtual void OnCVarChange(int32& Dest, int32 NewValue) { int32* DestPtr = &Dest; ENQUEUE_RENDER_COMMAND(OnCVarChange1)( /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------ struct OnCVarChange1Name { static const char* CStr() { return "OnCVarChange1"; } static const TCHAR* TStr() { return L"OnCVarChange1"; } }; EnqueueUniqueRenderCommand( */ [DestPtr, NewValue](FRHICommandListImmediate& RHICmdList) { *DestPtr = NewValue; }); }
FRenderingThread承载了渲染线程的主要工作,它的部分接口和实现代码如下:
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp class FRenderingThread : public FRunnable { private: bool bAcquiredThreadOwnership; // 当没有独立的RHI线程时, 渲染线程将被其它线程捕获. public: FEvent* TaskGraphBoundSyncEvent; // TaskGraph同步事件, 以便在主线程使用渲染线程之前就将渲染线程绑定到TaskGraph体系中. FRenderingThread() { bAcquiredThreadOwnership = false; // 获取同步事件. TaskGraphBoundSyncEvent = FPlatformProcess::GetSynchEventFromPool(true); RHIFlushResources(); } // FRunnable interface. virtual bool Init(void) override { // 获取当前线程ID到全局变量GRenderThreadId, 以便其它地方引用. GRenderThreadId = FPlatformTLS::GetCurrentThreadId(); // 处理线程捕获关系. if (!IsRunningRHIInSeparateThread()) { bAcquiredThreadOwnership = true; RHIAcquireThreadOwnership(); } return true; } (......) virtual uint32 Run(void) override { // 设置TLS. FMemory::SetupTLSCachesOnCurrentThread(); // 设置渲染线程平台相关的数据. FPlatformProcess::SetupRenderThread(); (......) { // 进入渲染线程主循环. RenderingThreadMain( TaskGraphBoundSyncEvent ); } FMemory::ClearAndDisableTLSCachesOnCurrentThread(); return 0; } };
可见它在运行之后会进入渲染线程逻辑,这里再进入RenderingThreadMain代码一探究竟:
void RenderingThreadMain( FEvent* TaskGraphBoundSyncEvent ) { LLM_SCOPE(ELLMTag::RenderingThreadMemory); // 将渲染线程和局部线程线程插槽设置成ActualRenderingThread和ActualRenderingThread_Local. ENamedThreads::Type RenderThread = ENamedThreads::Type(ENamedThreads::ActualRenderingThread); ENamedThreads::SetRenderThread(RenderThread); ENamedThreads::SetRenderThread_Local(ENamedThreads::Type(ENamedThreads::ActualRenderingThread_Local)); // 将当前线程附加到TaskGraph的RenderThread插槽中. FTaskGraphInterface::Get().AttachToThread(RenderThread); FPlatformMisc::MemoryBarrier(); // 触发同步事件, 通知主线程渲染线程已经附加到TaskGraph, 已经准备好接收任务. if( TaskGraphBoundSyncEvent != NULL ) { TaskGraphBoundSyncEvent->Trigger(); } (......) // 渲染线程不同阶段的处理. FCoreDelegates::PostRenderingThreadCreated.Broadcast(); check(GIsThreadedRendering); FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(RenderThread); FPlatformMisc::MemoryBarrier(); check(!GIsThreadedRendering); FCoreDelegates::PreRenderingThreadDestroyed.Broadcast(); (......) // 恢复线程线程到游戏线程. ENamedThreads::SetRenderThread(ENamedThreads::GameThread); ENamedThreads::SetRenderThread_Local(ENamedThreads::GameThread_Local); FPlatformMisc::MemoryBarrier(); }
不过这里还留有一个很大的疑问,那就是FRenderingThread只是获取当前线程作为渲染线程并附加到TaskGraph中,并没有创建线程。
那么是哪里创建的渲染线程呢?继续追踪,结果发现是在StartRenderingThread()
接口中创建了FRenderingThread实例,它的实现代码如下(节选):
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp void StartRenderingThread() { (......) // Turn on the threaded rendering flag. GIsThreadedRendering = true; // 创建FRenderingThread实例. GRenderingThreadRunnable = new FRenderingThread(); // 创建渲染线程!! GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask(), FPlatformAffinity::GetRenderingThreadFlags()); (......) // 开启渲染命令的栅栏. FRenderCommandFence Fence; Fence.BeginFence(); Fence.Wait(); (......) }
如果继续追踪,会发现StartRenderingThread()
是在FEngineLoop::PreInitPostStartupScreen
中调用的。
至此,渲染线程的创建、初始化以及主要接口的实现都剖析完了。
RHI线程(RHIThread)
RenderThread作为前端(frontend)产生的Command List是平台无关的,是抽象的图形API调用;
而RHIThread作为后端(backend)会执行和转换渲染线程的Command List成为指定图形API的调用(称为Graphical Command),并提交到GPU执行。
RHI线程的工作是转换渲染指令到指定图形API,创建、上传渲染资源到GPU。实现代码如下:
// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp class FRHIThread : public FRunnable { public: FRunnableThread* Thread; // 所在的RHI线程. FRHIThread() : Thread(nullptr) { check(IsInGameThread()); } void Start() { // 开始时创建RHI线程. Thread = FRunnableThread::Create(this, TEXT("RHIThread"), 512 * 1024, FPlatformAffinity::GetRHIThreadPriority(), FPlatformAffinity::GetRHIThreadMask(), FPlatformAffinity::GetRHIThreadFlags() ); check(Thread); } virtual uint32 Run() override { LLM_SCOPE(ELLMTag::RHIMisc); // 初始化TLS FMemory::SetupTLSCachesOnCurrentThread(); // 将FRHIThread所在的RHI线程附加到askGraph体系中,并指定到ENamedThreads::RHIThread。 FTaskGraphInterface::Get().AttachToThread(ENamedThreads::RHIThread); // 启动RHI线程,直到线程返回。 FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(ENamedThreads::RHIThread); // 清理TLS. FMemory::ClearAndDisableTLSCachesOnCurrentThread(); return 0; } // 单例接口。 static FRHIThread& Get() { static FRHIThread Singleton; // 使用了局部静态变量,可以保证线程安全。 return Singleton; } };
可见RHI线程不同于渲染线程,是直接在FRHIThread对象内创建实际的线程。而FRHIThread的创建也是在StartRenderingThread()
中:
void StartRenderingThread() { (......) if (GUseRHIThread_InternalUseOnly) { FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread)) { // 创建FRHIThread实例并启动它. FRHIThread::Get().Start(); } DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks); // 创建RHI线程拥有者捕获任务, 让游戏线程等待. FGraphEventRef CompletionEvent = TGraphTask::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread)); QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread); // 让游戏线程或局部线程等待RHI线程处理(捕获了线程拥有者, 大多数图形API为空)完毕. FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local); // 存储RHI线程id. GRHIThread_InternalUseOnly = FRHIThread::Get().Thread; check(GRHIThread_InternalUseOnly); GIsRunningRHIInDedicatedThread_InternalUseOnly = true; GIsRunningRHIInSeparateThread_InternalUseOnly = true; GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID(); GRHICommandList.LatchBypass(); } (......) }
以Fortnite(堡垒之夜)移动端为例,在开启RHI线程之前,渲染线程急剧地上下波动,而加了RHI线程之后,波动平缓许多,和游戏线程基本保持一致,帧率也提升不少:
GameThread、RenderThread、RHIThread之间的同步机制
这3个线程处理的数据通常是不同帧的,譬如GameThread处理N帧数据,RenderThread和RHIThread处理N-1帧数据。
但也存在例外,比如RenderThread和RHIThread运行很快,几乎不存在延迟,这种情况下,GameThread处理N帧,而RenderThread可能处理N或N-1帧,RHIThread也可能在转换N或N-1帧。
但是,RenderThread不能落后游戏线程一帧,否则GameThread会卡住,直到RenderThread处理所有指令。
游戏线程和渲染线程的同步
游戏线程不可能领先于渲染线程超过一帧(最多快一帧),否则游戏线程会等待渲染线程处理完。它们的同步机制涉及两个关键的概念:
// Engine\Source\Runtime\RenderCore\Public\RenderCommandFence.h // 渲染命令栅栏 class RENDERCORE_API FRenderCommandFence { public: // 向渲染命令队列增加一个栅栏. bSyncToRHIAndGPU是否同步RHI和GPU交换Buffer, 否则只等待渲染线程. void BeginFence(bool bSyncToRHIAndGPU = false); // 等待栅栏被执行. bProcessGameThreadTasks没有作用. void Wait(bool bProcessGameThreadTasks = false) const; // 是否完成了栅栏. bool IsFenceComplete() const; private: mutable FGraphEventRef CompletionEvent; // 处理完成同步的事件 ENamedThreads::Type TriggerThreadIndex; // 处理完之后需要触发的线程类型. }; // Engine\Source\Runtime\Engine\Public\UnrealEngine.h class FFrameEndSync { FRenderCommandFence Fence[2]; // 渲染栅栏对. int32 EventIndex; // 当前事件索引 public: // 同步游戏线程和渲染线程. bAllowOneFrameThreadLag是否允许渲染线程一帧的延迟. void Sync( bool bAllowOneFrameThreadLag ) { Fence[EventIndex].BeginFence(true); // 开启栅栏, 强制同步RHI和GPU交换链的. bool bEmptyGameThreadTasks = !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread); // 保证游戏线程至少跑过一次任务. if (bEmptyGameThreadTasks) { FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread); } // 如果允许延迟, 交换事件索引. if( bAllowOneFrameThreadLag ) { EventIndex = (EventIndex + 1) % 2; } (......) // 开启栅栏等待. Fence[EventIndex].Wait(bEmptyGameThreadTasks); } };
在FRenderCommandFence的BeginFence函数中
当GameThread与RHI线程及GPU同步时,GameThread会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FSyncFrameCommand任务,以便将Command List同步投递到RHI线程
当GameThread与RenderThread同步时,GameThread会创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行
在FRenderCommandFence的Wait函数中,会检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行则调用GameThreadWaitForTask函数来阻塞等待(通过Event实现)
void FRenderCommandFence::BeginFence(bool bSyncToRHIAndGPU) { if (!GIsThreadedRendering) { return; } else { // Render thread is a default trigger for the CompletionEvent TriggerThreadIndex = ENamedThreads::ActualRenderingThread; if (BundledCompletionEvent.GetReference() && IsInGameThread()) { CompletionEvent = BundledCompletionEvent; return; } int32 GTSyncType = CVarGTSyncType.GetValueOnAnyThread(); if (bSyncToRHIAndGPU) { // Don't sync to the RHI and GPU if GtSyncType is disabled, or we're not vsyncing //@TODO: do this logic in the caller? static auto CVarVsync = IConsoleManager::Get().FindConsoleVariable(TEXT("r.VSync")); // 是否开了VSync check(CVarVsync != nullptr); if ( GTSyncType == 0 || CVarVsync->GetInt() == 0 ) // r.GTSyncType为0或r.VSync为0时,GameThread不与RHI线程及GPU同步 { bSyncToRHIAndGPU = false; } } if (bSyncToRHIAndGPU) // GameThread与RHI线程及GPU同步时 { if (IsRHIThreadRunning()) { // Change trigger thread to RHI TriggerThreadIndex = ENamedThreads::RHIThread; } // Create a task graph event which we can pass to the render or RHI threads. CompletionEvent = FGraphEvent::CreateGraphEvent(); FGraphEventRef InCompletionEvent = CompletionEvent; /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------ struct FSyncFrameCommandName { static const char* CStr() { return "FSyncFrameCommand"; } static const TCHAR* TStr() { return L"FSyncFrameCommand"; } }; EnqueueUniqueRenderCommand( */ ENQUEUE_RENDER_COMMAND(FSyncFrameCommand)( [InCompletionEvent, GTSyncType](FRHICommandListImmediate& RHICmdList) { if (IsRHIThreadRunning()) // 如果开启了RHI线程 { ALLOC_COMMAND_CL(RHICmdList, FRHISyncFrameCommand)(InCompletionEvent, GTSyncType); // 将创建的CompletionEvent投递到RHI线程的TaskGraph的任务队列中 RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread); } else // 渲染线程直接执行 { FRHISyncFrameCommand Command(InCompletionEvent, GTSyncType); Command.Execute(RHICmdList); } }); } else // GameThead与RenderThread同步 { // Sync Game Thread with Render Thread only DECLARE_CYCLE_STAT(TEXT("FNullGraphTask.FenceRenderCommand"), STAT_FNullGraphTask_FenceRenderCommand, STATGROUP_TaskGraphTasks); CompletionEvent = TGraphTask<FNullGraphTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady( GET_STATID(STAT_FNullGraphTask_FenceRenderCommand), ENamedThreads::GetRenderThread()); } } } /** * Waits for pending fence commands to retire. */ void FRenderCommandFence::Wait(bool bProcessGameThreadTasks) const { if (!IsFenceComplete()) { StopRenderCommandFenceBundler(); GameThreadWaitForTask(CompletionEvent, TriggerThreadIndex, bProcessGameThreadTasks); } } bool FRenderCommandFence::IsFenceComplete() const { if (!GIsThreadedRendering) { return true; } check(IsInGameThread() || IsInAsyncLoadingThread()); CheckRenderingThreadHealth(); if (!CompletionEvent.GetReference() || CompletionEvent->IsComplete()) { CompletionEvent = NULL; // this frees the handle for other uses, the NULL state is considered completed return true; } return false; }
而FFrameEndSync
的使用是在FEngineLoop::Tick
中:
// Engine\Source\Runtime\Launch\Private\LaunchEngineLoop.cpp void FEngineLoop::Tick() { (......) // 在引擎循环的帧末尾添加游戏线程和渲染线程的同步事件. { static FFrameEndSync FrameEndSync; // 局部静态变量, 线程安全. static auto CVarAllowOneFrameThreadLag = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.OneFrameThreadLag")); // 同步游戏和渲染线程, 是否允许一帧的延迟可由控制台命令控制. 默认是开启的. FrameEndSync.Sync( CVarAllowOneFrameThreadLag->GetValueOnGameThread() != 0 ); } (......) }
FlushRenderingCommands
在游戏线程中调用,会阻塞游戏线程,强行等待所有的渲染线程pending render command以及RHI线程中的指令执行完,相当于一次完整地对渲染线程的同步
/ * Waits for the rendering thread to finish executing all pending rendering commands. Should only be used from the game thread. */ void FlushRenderingCommands(bool bFlushDeferredDeletes) { if (!GIsRHIInitialized) { return; } FSuspendRenderingTickables SuspendRenderingTickables; // Need to flush GT because render commands from threads other than GT are sent to // the main queue of GT when RT is disabled if (!GIsThreadedRendering && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread) && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread_Local)) { FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread); FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread_Local); } ENQUEUE_RENDER_COMMAND(FlushPendingDeleteRHIResourcesCmd)( [bFlushDeferredDeletes](FRHICommandListImmediate& RHICmdList) { RHICmdList.ImmediateFlush( bFlushDeferredDeletes ? EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes : EImmediateFlushType::FlushRHIThreadFlushResources); }); // Find the objects which may be cleaned up once the rendering thread command queue has been flushed. FPendingCleanupObjects* PendingCleanupObjects = GetPendingCleanupObjects(); // Issue a fence command to the rendering thread and wait for it to complete. FRenderCommandFence Fence; Fence.BeginFence(); // 创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行 Fence.Wait(); // 检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行就会阻塞等待 // Delete the objects which were enqueued for deferred cleanup before the command queue flush. delete PendingCleanupObjects; }
渲染线程和RHI线程的同步
RenderThread每次在调用RenderViewFamily_RenderThread的起始处,会阻塞等待所有RHI指令处理完成,然后才开始当前帧的渲染逻辑。
FMobileSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:
FDeferredShadingSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:
阻塞时的stats栈(移动端)
参考
UE4主线程与渲染线程同步