Skip to content

Commit

Permalink
[VP] Integrate L0 FC
Browse files Browse the repository at this point in the history
l0 fc enable and tool enable
  • Loading branch information
peiyigu-intel authored and intel-mediadev committed Jul 4, 2024
1 parent 63da90a commit 4b9ed1d
Show file tree
Hide file tree
Showing 26 changed files with 243 additions and 1,627 deletions.
2 changes: 2 additions & 0 deletions media_common/agnostic/common/hw/mhw_render.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,8 @@ typedef struct _MHW_GPGPU_WALKER_PARAMS

bool isGenerateLocalID;
MHW_EMIT_LOCAL_MODE emitLocal;

bool hasBarrier;

} MHW_GPGPU_WALKER_PARAMS, *PMHW_GPGPU_WALKER_PARAMS;

Expand Down
1 change: 1 addition & 0 deletions media_common/agnostic/common/hw/mhw_state_heap.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ typedef struct _MHW_ID_ENTRY_PARAMS
int32_t iCrsThdConDataRdLn; //!
PMHW_STATE_HEAP pGeneralStateHeap; //! General state heap in use
MemoryBlock *memoryBlock; //! Memory block associated with the state heap
uint32_t preferredSlmAllocationSize; //! SLM Allocation Size for per SubSlice
} MHW_ID_ENTRY_PARAMS, *PMHW_ID_ENTRY_PARAMS;

typedef struct _MHW_PLANE_SETTING
Expand Down
9 changes: 8 additions & 1 deletion media_common/agnostic/common/renderhal/renderhal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,8 @@ typedef struct _RENDERHAL_SURFACE_STATE_PARAMS
uint32_t bVmeUse : 1; // Flag for VME use
uint32_t bBufferUse : 1; // Flags for 1D buffer use
uint32_t bSurfaceTypeDefined : 1;
uint32_t : 2;
uint32_t : 1;
uint32_t forceCommonSurfaceMessage : 1;
uint32_t surfaceType : 11;
MOS_COMPONENT Component : 4;
RENDERHAL_MEMORY_OBJECT_CONTROL MemObjCtl; // Caching attributes
Expand Down Expand Up @@ -1378,6 +1379,12 @@ typedef struct _RENDERHAL_INTERFACE
PRENDERHAL_INTERFACE pRenderHal,
PRENDERHAL_SURFACE_STATE_PARAMS pParams);

MOS_STATUS (*pfnGetPlaneDefinitionForCommonMessage) (
PRENDERHAL_INTERFACE pRenderHal,
MOS_FORMAT format,
bool isRenderTarget,
RENDERHAL_PLANE_DEFINITION &planeDefinition);

//---------------------------
// State Setup - HW + OS Specific
//---------------------------
Expand Down
2 changes: 1 addition & 1 deletion media_common/agnostic/common/vp/hal/vp_common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ enum VpKernelID
// mediacopy-render copy
kernelRenderCopy,

kernelFcDScale444,
kernelL0FcCommon,

baseKernelMaxNumID
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ class Impl : public render::Impl<mhw::render::xe2_hpg_next::Cmd>
cmd.InterfaceDescriptor.DW5.NumberOfThreadsInGpgpuThreadGroup = params.dwNumberofThreadsInGPGPUGroup;
cmd.InterfaceDescriptor.DW5.SharedLocalMemorySize = params.dwSharedLocalMemorySize;

cmd.InterfaceDescriptor.DW7.PreferredSlmAllocationSizePerSubslice = params.preferredSlmAllocationSize;

// when Barriers is not 0, the EU fusion will close.
// Assigns barrier count.
if (params.bBarrierEnable)
Expand Down
1 change: 1 addition & 0 deletions media_softlet/agnostic/common/hw/mhw_render_cmdpar.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ struct _MHW_PAR_T(COMPUTE_WALKER)
uint8_t *inlineData = nullptr;
bool isGenerateLocalId = false;
MHW_EMIT_LOCAL_MODE emitLocal = MHW_EMIT_LOCAL_NONE;
uint32_t preferredSlmAllocationSize = 0;

};

Expand Down
102 changes: 100 additions & 2 deletions media_softlet/agnostic/common/renderhal/renderhal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4016,6 +4016,12 @@ MOS_STATUS RenderHal_GetSurfaceStateEntries(
}
}

if (pParams->forceCommonSurfaceMessage)
{
MHW_RENDERHAL_CHK_NULL_RETURN(pRenderHal->pfnGetPlaneDefinitionForCommonMessage);
MHW_RENDERHAL_CHK_STATUS_RETURN(pRenderHal->pfnGetPlaneDefinitionForCommonMessage(pRenderHal, pSurface->Format, pRenderHalSurface->SurfType == RENDERHAL_SURF_OUT_RENDERTARGET, PlaneDefinition));
}

// Get plane definitions
MHW_RENDERHAL_ASSERT(PlaneDefinition < RENDERHAL_PLANES_DEFINITION_COUNT);
*piNumEntries = pRenderHal->pPlaneDefinitions[PlaneDefinition].dwNumPlanes;
Expand Down Expand Up @@ -4061,7 +4067,14 @@ MOS_STATUS RenderHal_GetSurfaceStateEntries(
// Adjust the width
if (bWidthInDword)
{
if (PlaneDefinition == RENDERHAL_PLANES_R32G32B32A32F)
if (pParams->forceCommonSurfaceMessage &&
(PlaneDefinition == RENDERHAL_PLANES_R8 ||
PlaneDefinition == RENDERHAL_PLANES_R16_UNORM))
{
//For packed 422 formats, single channel format is used for writing, so the width need to be double.
dwSurfaceWidth = dwSurfaceWidth << 1;
}
else if (PlaneDefinition == RENDERHAL_PLANES_R32G32B32A32F)
{
dwSurfaceWidth = dwSurfaceWidth << 2;
}
Expand Down Expand Up @@ -4135,7 +4148,7 @@ MOS_STATUS RenderHal_GetSurfaceStateEntries(

pSurfaceEntry->YUVPlane = pPlane->ui8PlaneID;
pSurfaceEntry->bAVS = pPlane->bAdvanced;
pSurfaceEntry->isOutput = pParams->isOutput;
pSurfaceEntry->isOutput = pParams->isOutput;
pSurfaceEntry->bVertStride = pParams->bVertStride;
pSurfaceEntry->bVertStrideOffs = pParams->bVertStrideOffs;
pSurfaceEntry->bTiledSurface = (pSurface->TileType != MOS_TILE_LINEAR)
Expand All @@ -4161,6 +4174,89 @@ MOS_STATUS RenderHal_GetSurfaceStateEntries(
return eStatus;
}

//!
//! \brief Get Plane Definition For L0 FC
//! \details Get Specific Plane Definition for L0 FC usage
//! \param PRENDERHAL_INTERFACE pRenderHal
//! [in] Pointer to Hardware Interface Structure
//! \param MOS_FORMAT format
//! [in] surface format
//! \param bool isRenderTaget
//! [in] the surface type is RENDERHAL_SURF_OUT_RENDERTARGET
//! \param RENDERHAL_PLANE_DEFINITION &planeDefinition
//! [out] Plane Definition
//! \return MOS_STATUS
//! Error code if invalid parameters, MOS_STATUS_SUCCESS otherwise
//!
MOS_STATUS RenderHal_GetPlaneDefinitionForCommonMessage(
PRENDERHAL_INTERFACE pRenderHal,
MOS_FORMAT format,
bool isRenderTarget,
RENDERHAL_PLANE_DEFINITION& planeDefinition)
{
switch (format)
{
case Format_A8R8G8B8:
case Format_X8R8G8B8:
case Format_A16R16G16B16:
case Format_R10G10B10A2:
case Format_AYUV:
case Format_A16R16G16B16F:
case Format_A8B8G8R8:
case Format_X8B8G8R8:
case Format_A16B16G16R16:
case Format_B10G10R10A2:
case Format_A16B16G16R16F:
case Format_Y410:
case Format_NV12:
case Format_P010:
case Format_P016:
case Format_P210:
case Format_P216:
//already handled rightly in normal non-adv GetPlaneDefinition
break;
case Format_400P:
planeDefinition = RENDERHAL_PLANES_R8;
break;
case Format_YUY2:
case Format_YUYV:
case Format_YVYU:
case Format_UYVY:
case Format_VYUY:
if (isRenderTarget)
{
//For writing, packed 422 formats use R8 to write each channel separately
planeDefinition = RENDERHAL_PLANES_R8;
}
else
{
//For reading, packed 422 formats use R8G8 for Y and A8R8G8B8 for UV
planeDefinition = RENDERHAL_PLANES_YUY2_2PLANES;
}
break;
case Format_Y210:
case Format_Y216:
if (isRenderTarget)
{
//For writing, packed 422 formats use R16 to write each channel separately
planeDefinition = RENDERHAL_PLANES_R16_UNORM;
}
else
{
//For reading, packed 422 formats use RG16 for Y and ARGB16 for UV
planeDefinition = RENDERHAL_PLANES_Y210;
}
break;
case Format_Y416:
planeDefinition = RENDERHAL_PLANES_A16B16G16R16;
break;
default:
return MOS_STATUS_INVALID_PARAMETER;
}

return MOS_STATUS_SUCCESS;
}

//!
//! \brief Enable Palette
//! \details Enable HW palette - reuse previous palette data
Expand Down Expand Up @@ -7074,6 +7170,8 @@ MOS_STATUS RenderHal_InitInterface(
pRenderHal->pfnSetSurfaceStateBuffer = RenderHal_SetSurfaceStateBuffer;
pRenderHal->pfnCalculateYOffset = RenderHal_CalculateYOffset;

pRenderHal->pfnGetPlaneDefinitionForCommonMessage = RenderHal_GetPlaneDefinitionForCommonMessage;

// Media states management functions
pRenderHal->pfnAllocateBB = RenderHal_AllocateBB;
pRenderHal->pfnFreeBB = RenderHal_FreeBB;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -917,9 +917,74 @@ bool XRenderHal_Platform_Interface_Next::PerThreadScratchSpaceStart64Byte(RENDER
return true;
}

uint32_t XRenderHal_Platform_Interface_Next::CalculatePreferredSlmAllocationSizeFromSlmSize(
RENDERHAL_INTERFACE *renderHal,
uint32_t slmSize,
uint32_t numberOfThreadsPerThreadGroup)
{
if (!renderHal || !renderHal->pOsInterface || !renderHal->pOsInterface->pfnGetGtSystemInfo)
{
MHW_RENDERHAL_ASSERTMESSAGE("renderhal or osInterface or pfnGetGtSystemInfo is nullptr");
return 0;
}
if (numberOfThreadsPerThreadGroup == 0)
{
MHW_RENDERHAL_ASSERTMESSAGE("numberOfThreadsPerThreadGroup is 0");
return 0;
}

MEDIA_SYSTEM_INFO *gtInfo = renderHal->pOsInterface->pfnGetGtSystemInfo(renderHal->pOsInterface);
if (!gtInfo)
{
MHW_RENDERHAL_ASSERTMESSAGE("GtSystemInfo is nullptr");
return 0;
}
if (gtInfo->SubSliceCount == 0)
{
MHW_RENDERHAL_ASSERTMESSAGE("SubSliceCount is 0");
return 0;
}
uint32_t preferredSlmAllicationSize = 0;
slmSize = slmSize / 1024 + (slmSize % 1024 != 0);
uint32_t threadsPerDssCount = gtInfo->ThreadCount / gtInfo->SubSliceCount;
uint32_t workGroupCountPerDss = (threadsPerDssCount + numberOfThreadsPerThreadGroup - 1) / numberOfThreadsPerThreadGroup;
uint32_t slmSizePerSubSlice = slmSize * workGroupCountPerDss;
if (slmSize == 0)
{
preferredSlmAllicationSize = 0;
}
else if (slmSizePerSubSlice <= 16)
{
preferredSlmAllicationSize = 1;
}
else if (slmSizePerSubSlice <= 32)
{
preferredSlmAllicationSize = 2;
}
else if (slmSizePerSubSlice > 256)
{
if (slmSizePerSubSlice > 384)
{
MHW_RENDERHAL_ASSERTMESSAGE("slmSizePerSubSlice %d is bigger than max size", slmSizePerSubSlice);
}
else
{
preferredSlmAllicationSize = 10;
}
}
else
{
preferredSlmAllicationSize = slmSizePerSubSlice / 32 + (slmSizePerSubSlice % 32 != 0) + 1;
}

return preferredSlmAllicationSize;
}

uint32_t XRenderHal_Platform_Interface_Next::EncodeSLMSize(uint32_t SLMSize)
{
uint32_t EncodedValue;
SLMSize = SLMSize / 1024 + (SLMSize % 1024 != 0);

if (SLMSize <= 2)
{
EncodedValue = SLMSize;
Expand All @@ -929,6 +994,10 @@ uint32_t XRenderHal_Platform_Interface_Next::EncodeSLMSize(uint32_t SLMSize)
EncodedValue = 0;
do
{
if (SLMSize != 1 && (SLMSize & 0x1) != 0)
{
++SLMSize;
}
SLMSize >>= 1;
EncodedValue++;
} while (SLMSize);
Expand Down Expand Up @@ -1154,7 +1223,7 @@ MOS_STATUS XRenderHal_Platform_Interface_Next::SendComputeWalker(
pRenderHal->pStateHeap->dwOffsetSampler +
pGpGpuWalkerParams->InterfaceDescriptorOffset * pRenderHal->pStateHeap->dwSizeSamplers;
mhwIdEntryParams.dwBindingTableOffset = pGpGpuWalkerParams->BindingTableID * pRenderHal->pStateHeap->iBindingTableSize;
mhwIdEntryParams.dwSharedLocalMemorySize = pGpGpuWalkerParams->SLMSize;
mhwIdEntryParams.dwSharedLocalMemorySize = m_renderHal->pfnEncodeSLMSize(m_renderHal, pGpGpuWalkerParams->SLMSize);
if (pGpGpuWalkerParams->isGenerateLocalID && pGpGpuWalkerParams->emitLocal != MHW_EMIT_LOCAL_NONE)
{
//When COMPUTE_WALKER Emit Local ID is enabled, thread group number need to divide MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT
Expand All @@ -1169,9 +1238,11 @@ MOS_STATUS XRenderHal_Platform_Interface_Next::SendComputeWalker(
{
mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup = pGpGpuWalkerParams->ThreadWidth * pGpGpuWalkerParams->ThreadHeight;
}
mhwIdEntryParams.preferredSlmAllocationSize = CalculatePreferredSlmAllocationSizeFromSlmSize(m_renderHal, pGpGpuWalkerParams->SLMSize, mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup);
//This only a WA to disable EU fusion for multi-layer blending cases or single layer do colorfill and rotation together.
//Need remove it after kernel or compiler fix it.
mhwIdEntryParams.bBarrierEnable = pRenderHal->eufusionBypass ? 1 : 0;
mhwIdEntryParams.bBarrierEnable |= pGpGpuWalkerParams->hasBarrier;
pGpGpuWalkerParams->IndirectDataStartAddress = pGpGpuWalkerParams->IndirectDataStartAddress + pRenderHal->pStateHeap->pCurMediaState->dwOffset;

MHW_RENDERHAL_CHK_NULL_RETURN(m_renderItf);
Expand Down Expand Up @@ -1472,28 +1543,29 @@ MHW_SETPAR_DECL_SRC(COMPUTE_WALKER, XRenderHal_Platform_Interface_Next)
MHW_RENDERHAL_CHK_NULL_RETURN(m_gpgpuWalkerParams);
MHW_RENDERHAL_CHK_NULL_RETURN(m_interfaceDescriptorParams);

params.IndirectDataLength = m_gpgpuWalkerParams->IndirectDataLength;
params.IndirectDataLength = m_gpgpuWalkerParams->IndirectDataLength;
params.IndirectDataStartAddress = m_gpgpuWalkerParams->IndirectDataStartAddress;
params.ThreadWidth = m_gpgpuWalkerParams->ThreadWidth;
params.ThreadHeight = m_gpgpuWalkerParams->ThreadHeight;
params.ThreadDepth = m_gpgpuWalkerParams->ThreadDepth;
params.ThreadWidth = m_gpgpuWalkerParams->ThreadWidth;
params.ThreadHeight = m_gpgpuWalkerParams->ThreadHeight;
params.ThreadDepth = m_gpgpuWalkerParams->ThreadDepth;

params.GroupWidth = m_gpgpuWalkerParams->GroupWidth;
params.GroupHeight = m_gpgpuWalkerParams->GroupHeight;
params.GroupDepth = m_gpgpuWalkerParams->GroupDepth;
params.GroupWidth = m_gpgpuWalkerParams->GroupWidth;
params.GroupHeight = m_gpgpuWalkerParams->GroupHeight;
params.GroupDepth = m_gpgpuWalkerParams->GroupDepth;
params.GroupStartingX = m_gpgpuWalkerParams->GroupStartingX;
params.GroupStartingY = m_gpgpuWalkerParams->GroupStartingY;
params.GroupStartingZ = m_gpgpuWalkerParams->GroupStartingZ;

params.dwKernelOffset = m_interfaceDescriptorParams->dwKernelOffset;
params.dwSamplerCount = m_interfaceDescriptorParams->dwSamplerCount;
params.dwSamplerOffset = m_interfaceDescriptorParams->dwSamplerOffset;
params.dwBindingTableOffset = m_interfaceDescriptorParams->dwBindingTableOffset;;
params.bBarrierEnable = m_interfaceDescriptorParams->bBarrierEnable;
params.dwKernelOffset = m_interfaceDescriptorParams->dwKernelOffset;
params.dwSamplerCount = m_interfaceDescriptorParams->dwSamplerCount;
params.dwSamplerOffset = m_interfaceDescriptorParams->dwSamplerOffset;
params.dwBindingTableOffset = m_interfaceDescriptorParams->dwBindingTableOffset;
params.bBarrierEnable = m_interfaceDescriptorParams->bBarrierEnable;
params.dwNumberofThreadsInGPGPUGroup = m_interfaceDescriptorParams->dwNumberofThreadsInGPGPUGroup;
params.dwSharedLocalMemorySize = m_interfaceDescriptorParams->dwSharedLocalMemorySize;
params.IndirectDataStartAddress = m_gpgpuWalkerParams->IndirectDataStartAddress;
params.forcePreferredSLMZero = m_gpgpuWalkerParams->ForcePreferredSLMZero;
params.dwSharedLocalMemorySize = m_interfaceDescriptorParams->dwSharedLocalMemorySize;
params.preferredSlmAllocationSize = m_interfaceDescriptorParams->preferredSlmAllocationSize;
params.IndirectDataStartAddress = m_gpgpuWalkerParams->IndirectDataStartAddress;
params.forcePreferredSLMZero = m_gpgpuWalkerParams->ForcePreferredSLMZero;

if (m_gpgpuWalkerParams->ThreadDepth == 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,11 +285,25 @@ class XRenderHal_Platform_Interface_Next : public XRenderHal_Platform_Interface,
//! \brief Encode SLM Size for Interface Descriptor
//! \details Setup SLM size
//! \param uint32_t SLMSize
//! [in] SLM size in 1K
//! [in] SLM size
//! \return encoded output
//!
uint32_t EncodeSLMSize(uint32_t SLMSize) override;

//!
//! \brief Calculate Preferred Slm Allocation Size for Interface Descriptor
//! \details Setup Preferred Slm Allocation Size size
//! \param PRENDERHAL_INTERFACE pRenderHal
//! [in] Pointer to RenderHal interface
//! \param uint32_t SLMSize
//! [in] SLM size
//! \return Preferred Slm Allocation Size
//!
virtual uint32_t CalculatePreferredSlmAllocationSizeFromSlmSize(
RENDERHAL_INTERFACE *renderHal,
uint32_t slmSize,
uint32_t numberOfThreadsPerThreadGroup);

//!
//! \brief Set Chroma Direction
//! \details Setup Chroma Direction for hpg_base
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,9 @@ MOS_STATUS RenderCmdPacket::PrepareComputeWalkerParams(KERNEL_WALKER_PARAMS para

gpgpuWalker.isGenerateLocalID = params.isGenerateLocalID;
gpgpuWalker.emitLocal = params.emitLocal;

gpgpuWalker.SLMSize = params.slmSize;
gpgpuWalker.hasBarrier = params.hasBarrier;

return MOS_STATUS_SUCCESS;
}
Expand Down
Loading

0 comments on commit 4b9ed1d

Please sign in to comment.