mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-06 01:50:38 +00:00
Update HWCPipe to support performance counters of Dimensity 9000 SoC
Base HWCPipe commit: https://github.com/ARM-software/HWCPipe/commit/8cc02065b4ef249127aa0164dc0d62d65c0d4203
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Arm Software
|
||||
Copyright (c) 2019-2022 Arm Limited
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
+52
-16
@@ -42,17 +42,41 @@ enum class CpuCounter
|
||||
BranchInstructions,
|
||||
BranchMisses,
|
||||
|
||||
L1Accesses,
|
||||
InstrRetired,
|
||||
L2Accesses,
|
||||
L3Accesses,
|
||||
BusReads,
|
||||
BusWrites,
|
||||
MemReads,
|
||||
MemWrites,
|
||||
ASESpec,
|
||||
VFPSpec,
|
||||
CryptoSpec,
|
||||
|
||||
MaxValue
|
||||
};
|
||||
|
||||
// Mapping from CPU counter names to enum values. Used for JSON initialization.
|
||||
const std::unordered_map<std::string, CpuCounter> cpu_counter_names{
|
||||
{"Cycles", CpuCounter::Cycles},
|
||||
{"Instructions", CpuCounter::Instructions},
|
||||
{"CacheReferences", CpuCounter::CacheReferences},
|
||||
{"CacheMisses", CpuCounter::CacheMisses},
|
||||
{"BranchInstructions", CpuCounter::BranchInstructions},
|
||||
{"BranchMisses", CpuCounter::BranchMisses},
|
||||
const std::unordered_map<std::string, CpuCounter> cpu_counter_names {
|
||||
{"Cycles", CpuCounter::Cycles},
|
||||
{"Instructions", CpuCounter::Instructions},
|
||||
{"CacheReferences", CpuCounter::CacheReferences},
|
||||
{"CacheMisses", CpuCounter::CacheMisses},
|
||||
{"BranchInstructions", CpuCounter::BranchInstructions},
|
||||
{"BranchMisses", CpuCounter::BranchMisses},
|
||||
|
||||
{"L1Accesses", CpuCounter::L1Accesses},
|
||||
{"InstrRetired", CpuCounter::InstrRetired},
|
||||
{"L2Accesses", CpuCounter::L2Accesses},
|
||||
{"L3Accesses", CpuCounter::L3Accesses},
|
||||
{"BusReads", CpuCounter::BusReads},
|
||||
{"BusWrites", CpuCounter::BusWrites},
|
||||
{"MemReads", CpuCounter::MemReads},
|
||||
{"MemWrites", CpuCounter::MemWrites},
|
||||
{"ASESpec", CpuCounter::ASESpec},
|
||||
{"VFPSpec", CpuCounter::VFPSpec},
|
||||
{"CryptoSpec", CpuCounter::CryptoSpec},
|
||||
};
|
||||
|
||||
// A hash function for CpuCounter values
|
||||
@@ -72,23 +96,35 @@ struct CpuCounterInfo
|
||||
};
|
||||
|
||||
// Mapping from each counter to its corresponding information (description and unit)
|
||||
const std::unordered_map<CpuCounter, CpuCounterInfo, CpuCounterHash> cpu_counter_info{
|
||||
{CpuCounter::Cycles, {"Number of CPU cycles", "cycles"}},
|
||||
{CpuCounter::Instructions, {"Number of CPU instructions", "instructions"}},
|
||||
{CpuCounter::CacheReferences, {"Number of cache references", "references"}},
|
||||
{CpuCounter::CacheMisses, {"Number of cache misses", "misses"}},
|
||||
{CpuCounter::BranchInstructions, {"Number of branch instructions", "instructions"}},
|
||||
{CpuCounter::BranchMisses, {"Number of branch misses", "misses"}},
|
||||
const std::unordered_map<CpuCounter, CpuCounterInfo, CpuCounterHash> cpu_counter_info {
|
||||
{CpuCounter::Cycles, {"Number of CPU cycles", "cycles"}},
|
||||
{CpuCounter::Instructions, {"Number of CPU instructions", "instructions"}},
|
||||
{CpuCounter::CacheReferences, {"Number of cache references", "references"}},
|
||||
{CpuCounter::CacheMisses, {"Number of cache misses", "misses"}},
|
||||
{CpuCounter::BranchInstructions, {"Number of branch instructions", "instructions"}},
|
||||
{CpuCounter::BranchMisses, {"Number of branch misses", "misses"}},
|
||||
|
||||
{CpuCounter::L1Accesses, {"L1 data cache accesses", "accesses"}},
|
||||
{CpuCounter::InstrRetired, {"All retired instructions", "instructions"}},
|
||||
{CpuCounter::L2Accesses, {"L2 data cache accesses", "accesses"}},
|
||||
{CpuCounter::L3Accesses, {"L3 data cache accesses", "accesses"}},
|
||||
{CpuCounter::BusReads, {"Bus access reads", "beats"}},
|
||||
{CpuCounter::BusWrites, {"Bus access writes", "beats"}},
|
||||
{CpuCounter::MemReads, {"Data memory access, load instructions", "instructions"}},
|
||||
{CpuCounter::MemWrites, {"Data memory access, store instructions", "instructions"}},
|
||||
{CpuCounter::ASESpec, {"Speculatively executed SIMD operations", "operations"}},
|
||||
{CpuCounter::VFPSpec, {"Speculatively executed floating point operations", "operations"}},
|
||||
{CpuCounter::CryptoSpec, {"Speculatively executed cryptographic operations", "operations"}},
|
||||
};
|
||||
|
||||
typedef std::unordered_set<CpuCounter, CpuCounterHash> CpuCounterSet;
|
||||
typedef std::unordered_map<CpuCounter, Value, CpuCounterHash>
|
||||
CpuMeasurements;
|
||||
CpuMeasurements;
|
||||
|
||||
/** An interface for classes that collect CPU performance data. */
|
||||
class CpuProfiler
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~CpuProfiler() = default;
|
||||
|
||||
// Returns the enabled counters
|
||||
|
||||
+97
-58
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019 ARM Limited.
|
||||
* Copyright (c) 2019-2022 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -36,16 +36,25 @@ namespace hwcpipe
|
||||
enum class GpuCounter
|
||||
{
|
||||
GpuCycles,
|
||||
ComputeCycles,
|
||||
VertexCycles,
|
||||
VertexComputeCycles,
|
||||
FragmentCycles,
|
||||
TilerCycles,
|
||||
|
||||
ComputeJobs,
|
||||
VertexJobs,
|
||||
VertexComputeJobs,
|
||||
FragmentJobs,
|
||||
Pixels,
|
||||
|
||||
CulledPrimitives,
|
||||
VisiblePrimitives,
|
||||
InputPrimitives,
|
||||
|
||||
Tiles,
|
||||
TransactionEliminations,
|
||||
|
||||
EarlyZTests,
|
||||
EarlyZKilled,
|
||||
LateZTests,
|
||||
@@ -54,13 +63,17 @@ enum class GpuCounter
|
||||
Instructions,
|
||||
DivergedInstructions,
|
||||
|
||||
ShaderComputeCycles,
|
||||
ShaderFragmentCycles,
|
||||
ShaderCycles,
|
||||
ShaderArithmeticCycles,
|
||||
ShaderInterpolatorCycles,
|
||||
ShaderLoadStoreCycles,
|
||||
ShaderTextureCycles,
|
||||
|
||||
CacheReadLookups,
|
||||
CacheWriteLookups,
|
||||
|
||||
ExternalMemoryReadAccesses,
|
||||
ExternalMemoryWriteAccesses,
|
||||
ExternalMemoryReadStalls,
|
||||
@@ -72,39 +85,52 @@ enum class GpuCounter
|
||||
};
|
||||
|
||||
// Mapping from GPU counter names to enum values. Used for JSON initialization.
|
||||
const std::unordered_map<std::string, GpuCounter> gpu_counter_names{
|
||||
{"GpuCycles", GpuCounter::GpuCycles},
|
||||
{"VertexComputeCycles", GpuCounter::VertexComputeCycles},
|
||||
{"FragmentCycles", GpuCounter::FragmentCycles},
|
||||
{"TilerCycles", GpuCounter::TilerCycles},
|
||||
const std::unordered_map<std::string, GpuCounter> gpu_counter_names {
|
||||
{"GpuCycles", GpuCounter::GpuCycles},
|
||||
{"ComputeCycles", GpuCounter::ComputeCycles},
|
||||
{"VertexCycles", GpuCounter::VertexCycles},
|
||||
{"VertexComputeCycles", GpuCounter::VertexComputeCycles},
|
||||
{"FragmentCycles", GpuCounter::FragmentCycles},
|
||||
{"TilerCycles", GpuCounter::TilerCycles},
|
||||
|
||||
{"VertexComputeJobs", GpuCounter::VertexComputeJobs},
|
||||
{"Tiles", GpuCounter::Tiles},
|
||||
{"TransactionEliminations", GpuCounter::TransactionEliminations},
|
||||
{"FragmentJobs", GpuCounter::FragmentJobs},
|
||||
{"Pixels", GpuCounter::Pixels},
|
||||
{"ComputeJobs", GpuCounter::VertexComputeJobs},
|
||||
{"VertexJobs", GpuCounter::VertexJobs},
|
||||
{"VertexComputeJobs", GpuCounter::VertexComputeJobs},
|
||||
{"FragmentJobs", GpuCounter::FragmentJobs},
|
||||
{"Pixels", GpuCounter::Pixels},
|
||||
|
||||
{"EarlyZTests", GpuCounter::EarlyZTests},
|
||||
{"EarlyZKilled", GpuCounter::EarlyZKilled},
|
||||
{"LateZTests", GpuCounter::LateZTests},
|
||||
{"LateZKilled", GpuCounter::LateZKilled},
|
||||
{"CulledPrimitives", GpuCounter::CulledPrimitives},
|
||||
{"VisiblePrimitives", GpuCounter::VisiblePrimitives},
|
||||
{"InputPrimitives", GpuCounter::InputPrimitives},
|
||||
|
||||
{"Instructions", GpuCounter::Instructions},
|
||||
{"DivergedInstructions", GpuCounter::DivergedInstructions},
|
||||
{"Tiles", GpuCounter::Tiles},
|
||||
{"TransactionEliminations", GpuCounter::TransactionEliminations},
|
||||
|
||||
{"ShaderCycles", GpuCounter::ShaderCycles},
|
||||
{"ShaderArithmeticCycles", GpuCounter::ShaderArithmeticCycles},
|
||||
{"ShaderLoadStoreCycles", GpuCounter::ShaderLoadStoreCycles},
|
||||
{"ShaderTextureCycles", GpuCounter::ShaderTextureCycles},
|
||||
{"EarlyZTests", GpuCounter::EarlyZTests},
|
||||
{"EarlyZKilled", GpuCounter::EarlyZKilled},
|
||||
{"LateZTests", GpuCounter::LateZTests},
|
||||
{"LateZKilled", GpuCounter::LateZKilled},
|
||||
|
||||
{"CacheReadLookups", GpuCounter::CacheReadLookups},
|
||||
{"CacheWriteLookups", GpuCounter::CacheWriteLookups},
|
||||
{"ExternalMemoryReadAccesses", GpuCounter::ExternalMemoryReadAccesses},
|
||||
{"ExternalMemoryWriteAccesses", GpuCounter::ExternalMemoryWriteAccesses},
|
||||
{"ExternalMemoryReadStalls", GpuCounter::ExternalMemoryReadStalls},
|
||||
{"ExternalMemoryWriteStalls", GpuCounter::ExternalMemoryWriteStalls},
|
||||
{"ExternalMemoryReadBytes", GpuCounter::ExternalMemoryReadBytes},
|
||||
{"ExternalMemoryWriteBytes", GpuCounter::ExternalMemoryWriteBytes},
|
||||
{"Instructions", GpuCounter::Instructions},
|
||||
{"DivergedInstructions", GpuCounter::DivergedInstructions},
|
||||
|
||||
{"ShaderComputeCycles", GpuCounter::ShaderComputeCycles},
|
||||
{"ShaderFragmentCycles", GpuCounter::ShaderFragmentCycles},
|
||||
{"ShaderCycles", GpuCounter::ShaderCycles},
|
||||
{"ShaderArithmeticCycles", GpuCounter::ShaderArithmeticCycles},
|
||||
{"ShaderInterpolatorCycles", GpuCounter::ShaderInterpolatorCycles},
|
||||
{"ShaderLoadStoreCycles", GpuCounter::ShaderLoadStoreCycles},
|
||||
{"ShaderTextureCycles", GpuCounter::ShaderTextureCycles},
|
||||
|
||||
{"CacheReadLookups", GpuCounter::CacheReadLookups},
|
||||
{"CacheWriteLookups", GpuCounter::CacheWriteLookups},
|
||||
|
||||
{"ExternalMemoryReadAccesses", GpuCounter::ExternalMemoryReadAccesses},
|
||||
{"ExternalMemoryWriteAccesses", GpuCounter::ExternalMemoryWriteAccesses},
|
||||
{"ExternalMemoryReadStalls", GpuCounter::ExternalMemoryReadStalls},
|
||||
{"ExternalMemoryWriteStalls", GpuCounter::ExternalMemoryWriteStalls},
|
||||
{"ExternalMemoryReadBytes", GpuCounter::ExternalMemoryReadBytes},
|
||||
{"ExternalMemoryWriteBytes", GpuCounter::ExternalMemoryWriteBytes},
|
||||
};
|
||||
|
||||
// A hash function for GpuCounter values
|
||||
@@ -124,39 +150,52 @@ struct GpuCounterInfo
|
||||
};
|
||||
|
||||
// Mapping from each counter to its corresponding information (description and unit)
|
||||
const std::unordered_map<GpuCounter, GpuCounterInfo, GpuCounterHash> gpu_counter_info{
|
||||
{GpuCounter::GpuCycles, {"Number of GPU cycles", "cycles"}},
|
||||
{GpuCounter::VertexComputeCycles, {"Number of vertex/compute cycles", "cycles"}},
|
||||
{GpuCounter::FragmentCycles, {"Number of fragment cycles", "cycles"}},
|
||||
{GpuCounter::TilerCycles, {"Number of tiler cycles", "cycles"}},
|
||||
const std::unordered_map<GpuCounter, GpuCounterInfo, GpuCounterHash> gpu_counter_info {
|
||||
{GpuCounter::GpuCycles, {"Number of GPU cycles", "cycles"}},
|
||||
{GpuCounter::ComputeCycles, {"Number of compute cycles", "cycles"}},
|
||||
{GpuCounter::VertexCycles, {"Number of vertex cycles", "cycles"}},
|
||||
{GpuCounter::VertexComputeCycles, {"Number of vertex/compute cycles", "cycles"}},
|
||||
{GpuCounter::FragmentCycles, {"Number of fragment cycles", "cycles"}},
|
||||
{GpuCounter::TilerCycles, {"Number of tiler cycles", "cycles"}},
|
||||
|
||||
{GpuCounter::VertexComputeJobs, {"Number of vertex/compute jobs", "jobs"}},
|
||||
{GpuCounter::Tiles, {"Number of physical tiles written", "tiles"}},
|
||||
{GpuCounter::TransactionEliminations, {"Number of transaction eliminations", "tiles"}},
|
||||
{GpuCounter::FragmentJobs, {"Number of fragment jobs", "jobs"}},
|
||||
{GpuCounter::Pixels, {"Number of pixels shaded", "cycles"}},
|
||||
{GpuCounter::ComputeJobs, {"Number of compute jobs", "jobs"}},
|
||||
{GpuCounter::VertexJobs, {"Number of vertex jobs", "jobs"}},
|
||||
{GpuCounter::VertexComputeJobs, {"Number of vertex/compute jobs", "jobs"}},
|
||||
{GpuCounter::FragmentJobs, {"Number of fragment jobs", "jobs"}},
|
||||
{GpuCounter::Pixels, {"Number of pixels shaded", "cycles"}},
|
||||
|
||||
{GpuCounter::EarlyZTests, {"Early-Z tests performed", "tests"}},
|
||||
{GpuCounter::EarlyZKilled, {"Early-Z tests resulting in a kill", "tests"}},
|
||||
{GpuCounter::LateZTests, {"Late-Z tests performed", "tests"}},
|
||||
{GpuCounter::LateZKilled, {"Late-Z tests resulting in a kill", "tests"}},
|
||||
{GpuCounter::CulledPrimitives, {"Number of culled primitives", "triangles"}},
|
||||
{GpuCounter::VisiblePrimitives, {"Number of visible primitives", "triangles"}},
|
||||
{GpuCounter::InputPrimitives, {"Number of input primitives", "triangles"}},
|
||||
|
||||
{GpuCounter::Instructions, {"Number of shader instructions", "instructions"}},
|
||||
{GpuCounter::DivergedInstructions, {"Number of diverged shader instructions", "instructions"}},
|
||||
{GpuCounter::Tiles, {"Number of physical tiles written", "tiles"}},
|
||||
{GpuCounter::TransactionEliminations, {"Number of transaction eliminations", "tiles"}},
|
||||
|
||||
{GpuCounter::ShaderCycles, {"Shader total cycles", "cycles"}},
|
||||
{GpuCounter::ShaderArithmeticCycles, {"Shader arithmetic cycles", "cycles"}},
|
||||
{GpuCounter::ShaderLoadStoreCycles, {"Shader load/store cycles", "cycles"}},
|
||||
{GpuCounter::ShaderTextureCycles, {"Shader texture cycles", "cycles"}},
|
||||
{GpuCounter::EarlyZTests, {"Number of early-Z tests performed", "tests"}},
|
||||
{GpuCounter::EarlyZKilled, {"Number of early-Z tests resulting in a kill", "tests"}},
|
||||
{GpuCounter::LateZTests, {"Number of late-Z tests performed", "tests"}},
|
||||
{GpuCounter::LateZKilled, {"Number of late-Z tests resulting in a kill", "tests"}},
|
||||
|
||||
{GpuCounter::CacheReadLookups, {"Cache read lookups", "lookups"}},
|
||||
{GpuCounter::CacheWriteLookups, {"Cache write lookups", "lookups"}},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, {"Reads from external memory", "accesses"}},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, {"Writes to external memory", "accesses"}},
|
||||
{GpuCounter::ExternalMemoryReadStalls, {"Stalls when reading from external memory", "stalls"}},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, {"Stalls when writing to external memory", "stalls"}},
|
||||
{GpuCounter::ExternalMemoryReadBytes, {"Bytes read to external memory", "B"}},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, {"Bytes written to external memory", "B"}},
|
||||
{GpuCounter::Instructions, {"Number of shader instructions", "instructions"}},
|
||||
{GpuCounter::DivergedInstructions, {"Number of diverged shader instructions", "instructions"}},
|
||||
|
||||
{GpuCounter::ShaderComputeCycles, {"Number of shader vertex/compute cycles", "cycles"}},
|
||||
{GpuCounter::ShaderFragmentCycles, {"Number of shader fragment cycles", "cycles"}},
|
||||
{GpuCounter::ShaderCycles, {"Number of shader core cycles", "cycles"}},
|
||||
{GpuCounter::ShaderArithmeticCycles, {"Number of shader arithmetic cycles", "cycles"}},
|
||||
{GpuCounter::ShaderInterpolatorCycles, {"Number of shader interpolator cycles", "cycles"}},
|
||||
{GpuCounter::ShaderLoadStoreCycles, {"Number of shader load/store cycles", "cycles"}},
|
||||
{GpuCounter::ShaderTextureCycles, {"Number of shader texture cycles", "cycles"}},
|
||||
|
||||
{GpuCounter::CacheReadLookups, {"Number of cache read lookups", "lookups"}},
|
||||
{GpuCounter::CacheWriteLookups, {"Number of cache write lookups", "lookups"}},
|
||||
|
||||
{GpuCounter::ExternalMemoryReadAccesses, {"Number of reads from external memory", "accesses"}},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, {"Number of writes to external memory", "accesses"}},
|
||||
{GpuCounter::ExternalMemoryReadStalls, {"Number of stall cycles when reading from external memory", "cycles"}},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, {"Number of stall cycles when writing to external memory", "cycles"}},
|
||||
{GpuCounter::ExternalMemoryReadBytes, {"Number of bytes read to external memory", "bytes"}},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, {"Number of bytes written to external memory", "bytes"}},
|
||||
};
|
||||
|
||||
typedef std::unordered_set<GpuCounter, GpuCounterHash> GpuCounterSet;
|
||||
@@ -165,7 +204,7 @@ typedef std::unordered_map<GpuCounter, Value, GpuCounterHash> GpuMeasurements;
|
||||
/** An interface for classes that collect GPU performance data. */
|
||||
class GpuProfiler
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~GpuProfiler() = default;
|
||||
|
||||
// Returns the enabled counters
|
||||
|
||||
@@ -26,12 +26,12 @@
|
||||
#include "hwcpipe_log.h"
|
||||
|
||||
#ifdef __linux__
|
||||
# include "vendor/arm/pmu/pmu_profiler.h"
|
||||
# include "vendor/arm/mali/mali_profiler.h"
|
||||
#include "vendor/arm/pmu/pmu_profiler.h"
|
||||
#include "vendor/arm/mali/mali_profiler.h"
|
||||
#endif
|
||||
|
||||
#ifndef HWCPIPE_NO_JSON
|
||||
#include <json.hpp>
|
||||
#include <json.hpp>
|
||||
using json = nlohmann::json;
|
||||
#endif
|
||||
|
||||
@@ -44,7 +44,7 @@ HWCPipe::HWCPipe(const char *json_string)
|
||||
{
|
||||
auto json = json::parse(json_string);
|
||||
|
||||
CpuCounterSet enabled_cpu_counters{};
|
||||
CpuCounterSet enabled_cpu_counters {};
|
||||
auto cpu = json.find("cpu");
|
||||
if (cpu != json.end())
|
||||
{
|
||||
@@ -62,7 +62,7 @@ HWCPipe::HWCPipe(const char *json_string)
|
||||
}
|
||||
}
|
||||
|
||||
GpuCounterSet enabled_gpu_counters{};
|
||||
GpuCounterSet enabled_gpu_counters {};
|
||||
auto gpu = json.find("gpu");
|
||||
if (gpu != json.end())
|
||||
{
|
||||
@@ -91,25 +91,29 @@ HWCPipe::HWCPipe(CpuCounterSet enabled_cpu_counters, GpuCounterSet enabled_gpu_c
|
||||
|
||||
HWCPipe::HWCPipe()
|
||||
{
|
||||
CpuCounterSet enabled_cpu_counters{CpuCounter::Cycles,
|
||||
CpuCounter::Instructions,
|
||||
CpuCounter::CacheReferences,
|
||||
CpuCounter::CacheMisses,
|
||||
CpuCounter::BranchInstructions,
|
||||
CpuCounter::BranchMisses};
|
||||
CpuCounterSet enabled_cpu_counters {
|
||||
CpuCounter::Cycles,
|
||||
CpuCounter::Instructions,
|
||||
CpuCounter::CacheReferences,
|
||||
CpuCounter::CacheMisses,
|
||||
CpuCounter::BranchInstructions,
|
||||
CpuCounter::BranchMisses,
|
||||
};
|
||||
|
||||
GpuCounterSet enabled_gpu_counters{GpuCounter::GpuCycles,
|
||||
GpuCounter::VertexComputeCycles,
|
||||
GpuCounter::FragmentCycles,
|
||||
GpuCounter::TilerCycles,
|
||||
GpuCounter::CacheReadLookups,
|
||||
GpuCounter::CacheWriteLookups,
|
||||
GpuCounter::ExternalMemoryReadAccesses,
|
||||
GpuCounter::ExternalMemoryWriteAccesses,
|
||||
GpuCounter::ExternalMemoryReadStalls,
|
||||
GpuCounter::ExternalMemoryWriteStalls,
|
||||
GpuCounter::ExternalMemoryReadBytes,
|
||||
GpuCounter::ExternalMemoryWriteBytes};
|
||||
GpuCounterSet enabled_gpu_counters {
|
||||
GpuCounter::GpuCycles,
|
||||
GpuCounter::VertexComputeCycles,
|
||||
GpuCounter::FragmentCycles,
|
||||
GpuCounter::TilerCycles,
|
||||
GpuCounter::CacheReadLookups,
|
||||
GpuCounter::CacheWriteLookups,
|
||||
GpuCounter::ExternalMemoryReadAccesses,
|
||||
GpuCounter::ExternalMemoryWriteAccesses,
|
||||
GpuCounter::ExternalMemoryReadStalls,
|
||||
GpuCounter::ExternalMemoryWriteStalls,
|
||||
GpuCounter::ExternalMemoryReadBytes,
|
||||
GpuCounter::ExternalMemoryWriteBytes,
|
||||
};
|
||||
|
||||
create_profilers(std::move(enabled_cpu_counters), std::move(enabled_gpu_counters));
|
||||
}
|
||||
@@ -174,7 +178,10 @@ void HWCPipe::create_profilers(CpuCounterSet enabled_cpu_counters, GpuCounterSet
|
||||
#ifdef __linux__
|
||||
try
|
||||
{
|
||||
cpu_profiler_ = std::unique_ptr<PmuProfiler>(new PmuProfiler(enabled_cpu_counters));
|
||||
if (enabled_cpu_counters.size() != 0)
|
||||
{
|
||||
cpu_profiler_ = std::unique_ptr<PmuProfiler>(new PmuProfiler(enabled_cpu_counters));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &e)
|
||||
{
|
||||
@@ -183,7 +190,10 @@ void HWCPipe::create_profilers(CpuCounterSet enabled_cpu_counters, GpuCounterSet
|
||||
|
||||
try
|
||||
{
|
||||
gpu_profiler_ = std::unique_ptr<MaliProfiler>(new MaliProfiler(enabled_gpu_counters));
|
||||
if (enabled_gpu_counters.size() != 0)
|
||||
{
|
||||
gpu_profiler_ = std::unique_ptr<MaliProfiler>(new MaliProfiler(enabled_gpu_counters));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &e)
|
||||
{
|
||||
|
||||
@@ -37,14 +37,14 @@ namespace hwcpipe
|
||||
{
|
||||
struct Measurements
|
||||
{
|
||||
const CpuMeasurements *cpu{nullptr};
|
||||
const GpuMeasurements *gpu{nullptr};
|
||||
const CpuMeasurements *cpu {nullptr};
|
||||
const GpuMeasurements *gpu {nullptr};
|
||||
};
|
||||
|
||||
/** A class that collects CPU/GPU performance data. */
|
||||
class HWCPipe
|
||||
{
|
||||
public:
|
||||
public:
|
||||
#ifndef HWCPIPE_NO_JSON
|
||||
// Initializes HWCPipe via a JSON configuration string
|
||||
explicit HWCPipe(const char *json_string);
|
||||
@@ -83,9 +83,9 @@ class HWCPipe
|
||||
return gpu_profiler_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<CpuProfiler> cpu_profiler_{};
|
||||
std::unique_ptr<GpuProfiler> gpu_profiler_{};
|
||||
private:
|
||||
std::unique_ptr<CpuProfiler> cpu_profiler_ {};
|
||||
std::unique_ptr<GpuProfiler> gpu_profiler_ {};
|
||||
|
||||
void create_profilers(CpuCounterSet enabled_cpu_counters, GpuCounterSet enabled_gpu_counters);
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019 ARM Limited.
|
||||
* Copyright (c) 2019-2022 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -27,11 +27,11 @@
|
||||
#define HWCPIPE_TAG "HWCPipe"
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
# include <android/log.h>
|
||||
#include <android/log.h>
|
||||
|
||||
# define HWCPIPE_LOG(...) //__android_log_print(ANDROID_LOG_VERBOSE, HWCPIPE_TAG, __VA_ARGS__)
|
||||
#define HWCPIPE_LOG(...) __android_log_print(ANDROID_LOG_VERBOSE, HWCPIPE_TAG, __VA_ARGS__)
|
||||
#else
|
||||
# define HWCPIPE_LOG(...) \
|
||||
#define HWCPIPE_LOG(...) \
|
||||
{ \
|
||||
fprintf(stdout, "%s [INFO] : ", HWCPIPE_TAG); \
|
||||
fprintf(stdout, __VA_ARGS__); \
|
||||
|
||||
@@ -28,19 +28,19 @@ namespace hwcpipe
|
||||
{
|
||||
class Value
|
||||
{
|
||||
public:
|
||||
public:
|
||||
Value() :
|
||||
is_int_(true),
|
||||
int_(0),
|
||||
double_(0.0f)
|
||||
is_int_(true),
|
||||
int_(0),
|
||||
double_(0.0f)
|
||||
{}
|
||||
Value(long long value) :
|
||||
is_int_(true),
|
||||
int_(value)
|
||||
is_int_(true),
|
||||
int_(value)
|
||||
{}
|
||||
Value(double value) :
|
||||
is_int_(false),
|
||||
double_(value)
|
||||
is_int_(false),
|
||||
double_(value)
|
||||
{}
|
||||
|
||||
template <typename T>
|
||||
@@ -61,9 +61,9 @@ class Value
|
||||
is_int_ = false;
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
bool is_int_;
|
||||
long long int_{0};
|
||||
double double_{0.0};
|
||||
long long int_ {0};
|
||||
double double_ {0.0};
|
||||
};
|
||||
} // namespace hwcpipe
|
||||
|
||||
+77
-77
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2019 ARM Limited.
|
||||
* Copyright (c) 2017-2022 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -37,16 +37,14 @@
|
||||
|
||||
#include "hwc_names.hpp"
|
||||
|
||||
#ifndef DOXYGEN_SKIP_THIS
|
||||
|
||||
# if defined(ANDROID) || defined(__ANDROID__)
|
||||
/* We use _IOR_BAD/_IOW_BAD rather than _IOR/_IOW otherwise fails to compile with NDK-BUILD because of _IOC_TYPECHECK is defined, not because the paramter is invalid */
|
||||
# define MALI_IOR(a, b, c) _IOR_BAD(a, b, c)
|
||||
# define MALI_IOW(a, b, c) _IOW_BAD(a, b, c)
|
||||
# else
|
||||
# define MALI_IOR(a, b, c) _IOR(a, b, c)
|
||||
# define MALI_IOW(a, b, c) _IOW(a, b, c)
|
||||
# endif
|
||||
#if defined(ANDROID) || defined(__ANDROID__)
|
||||
/* We use _IOR_BAD/_IOW_BAD rather than _IOR/_IOW otherwise fails to compile with NDK-BUILD because of _IOC_TYPECHECK is defined, not because the paramter is invalid */
|
||||
#define MALI_IOR(a, b, c) _IOR_BAD(a, b, c)
|
||||
#define MALI_IOW(a, b, c) _IOW_BAD(a, b, c)
|
||||
#else
|
||||
#define MALI_IOR(a, b, c) _IOR(a, b, c)
|
||||
#define MALI_IOW(a, b, c) _IOW(a, b, c)
|
||||
#endif
|
||||
|
||||
namespace mali_userspace
|
||||
{
|
||||
@@ -57,8 +55,8 @@ union uk_header
|
||||
uint64_t sizer;
|
||||
};
|
||||
|
||||
# define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
|
||||
# define BASE_MAX_COHERENT_GROUPS 16
|
||||
#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
|
||||
#define BASE_MAX_COHERENT_GROUPS 16
|
||||
|
||||
struct mali_base_gpu_core_props
|
||||
{
|
||||
@@ -117,7 +115,7 @@ struct mali_base_gpu_coherent_group_info
|
||||
mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
|
||||
};
|
||||
|
||||
# define GPU_MAX_JOB_SLOTS 16
|
||||
#define GPU_MAX_JOB_SLOTS 16
|
||||
struct gpu_raw_gpu_props
|
||||
{
|
||||
uint64_t shader_present;
|
||||
@@ -164,35 +162,35 @@ struct kbase_uk_gpuprops
|
||||
mali_base_gpu_props props;
|
||||
};
|
||||
|
||||
# define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
|
||||
# define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
|
||||
# define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
|
||||
# define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
|
||||
#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
|
||||
#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
|
||||
#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
|
||||
#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
|
||||
|
||||
# define KBASE_GPUPROP_PRODUCT_ID 1
|
||||
# define KBASE_GPUPROP_MINOR_REVISION 3
|
||||
# define KBASE_GPUPROP_MAJOR_REVISION 4
|
||||
#define KBASE_GPUPROP_PRODUCT_ID 1
|
||||
#define KBASE_GPUPROP_MINOR_REVISION 3
|
||||
#define KBASE_GPUPROP_MAJOR_REVISION 4
|
||||
|
||||
# define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
|
||||
# define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_0 64
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_1 65
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_2 66
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_3 67
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_4 68
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_5 69
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_6 70
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_7 71
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_8 72
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_9 73
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_10 74
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_11 75
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_12 76
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_13 77
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_14 78
|
||||
# define KBASE_GPUPROP_COHERENCY_GROUP_15 79
|
||||
#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
|
||||
#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
|
||||
#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
|
||||
|
||||
# define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
|
||||
#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
|
||||
|
||||
struct gpu_props
|
||||
{
|
||||
@@ -212,36 +210,36 @@ static const struct
|
||||
size_t offset;
|
||||
int size;
|
||||
} gpu_property_mapping[] = {
|
||||
# define PROP(name, member) \
|
||||
{ \
|
||||
KBASE_GPUPROP_##name, offsetof(struct gpu_props, member), \
|
||||
sizeof(((struct gpu_props *) 0)->member) \
|
||||
}
|
||||
PROP(PRODUCT_ID, product_id),
|
||||
PROP(MINOR_REVISION, minor_revision),
|
||||
PROP(MAJOR_REVISION, major_revision),
|
||||
PROP(COHERENCY_NUM_GROUPS, num_groups),
|
||||
PROP(COHERENCY_NUM_CORE_GROUPS, num_core_groups),
|
||||
PROP(COHERENCY_GROUP_0, core_mask[0]),
|
||||
PROP(COHERENCY_GROUP_1, core_mask[1]),
|
||||
PROP(COHERENCY_GROUP_2, core_mask[2]),
|
||||
PROP(COHERENCY_GROUP_3, core_mask[3]),
|
||||
PROP(COHERENCY_GROUP_4, core_mask[4]),
|
||||
PROP(COHERENCY_GROUP_5, core_mask[5]),
|
||||
PROP(COHERENCY_GROUP_6, core_mask[6]),
|
||||
PROP(COHERENCY_GROUP_7, core_mask[7]),
|
||||
PROP(COHERENCY_GROUP_8, core_mask[8]),
|
||||
PROP(COHERENCY_GROUP_9, core_mask[9]),
|
||||
PROP(COHERENCY_GROUP_10, core_mask[10]),
|
||||
PROP(COHERENCY_GROUP_11, core_mask[11]),
|
||||
PROP(COHERENCY_GROUP_12, core_mask[12]),
|
||||
PROP(COHERENCY_GROUP_13, core_mask[13]),
|
||||
PROP(COHERENCY_GROUP_14, core_mask[14]),
|
||||
PROP(COHERENCY_GROUP_15, core_mask[15]),
|
||||
#define PROP(name, member) \
|
||||
{ \
|
||||
KBASE_GPUPROP_##name, offsetof(struct gpu_props, member), \
|
||||
sizeof(((struct gpu_props *) 0)->member) \
|
||||
}
|
||||
PROP(PRODUCT_ID, product_id),
|
||||
PROP(MINOR_REVISION, minor_revision),
|
||||
PROP(MAJOR_REVISION, major_revision),
|
||||
PROP(COHERENCY_NUM_GROUPS, num_groups),
|
||||
PROP(COHERENCY_NUM_CORE_GROUPS, num_core_groups),
|
||||
PROP(COHERENCY_GROUP_0, core_mask[0]),
|
||||
PROP(COHERENCY_GROUP_1, core_mask[1]),
|
||||
PROP(COHERENCY_GROUP_2, core_mask[2]),
|
||||
PROP(COHERENCY_GROUP_3, core_mask[3]),
|
||||
PROP(COHERENCY_GROUP_4, core_mask[4]),
|
||||
PROP(COHERENCY_GROUP_5, core_mask[5]),
|
||||
PROP(COHERENCY_GROUP_6, core_mask[6]),
|
||||
PROP(COHERENCY_GROUP_7, core_mask[7]),
|
||||
PROP(COHERENCY_GROUP_8, core_mask[8]),
|
||||
PROP(COHERENCY_GROUP_9, core_mask[9]),
|
||||
PROP(COHERENCY_GROUP_10, core_mask[10]),
|
||||
PROP(COHERENCY_GROUP_11, core_mask[11]),
|
||||
PROP(COHERENCY_GROUP_12, core_mask[12]),
|
||||
PROP(COHERENCY_GROUP_13, core_mask[13]),
|
||||
PROP(COHERENCY_GROUP_14, core_mask[14]),
|
||||
PROP(COHERENCY_GROUP_15, core_mask[15]),
|
||||
|
||||
PROP(L2_NUM_L2_SLICES, l2_slices),
|
||||
# undef PROP
|
||||
{0, 0, 0}};
|
||||
PROP(L2_NUM_L2_SLICES, l2_slices),
|
||||
#undef PROP
|
||||
{0, 0, 0}};
|
||||
|
||||
struct kbase_hwcnt_reader_metadata
|
||||
{
|
||||
@@ -307,11 +305,12 @@ struct kbase_ioctl_hwcnt_reader_setup
|
||||
uint32_t mmu_l2_bm;
|
||||
};
|
||||
|
||||
# define KBASE_IOCTL_TYPE 0x80
|
||||
# define KBASE_IOCTL_GET_GPUPROPS MALI_IOW(KBASE_IOCTL_TYPE, 3, struct mali_userspace::kbase_ioctl_get_gpuprops)
|
||||
# define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 0, struct mali_userspace::kbase_ioctl_version_check)
|
||||
# define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct mali_userspace::kbase_ioctl_set_flags)
|
||||
# define KBASE_IOCTL_HWCNT_READER_SETUP _IOW(KBASE_IOCTL_TYPE, 8, struct mali_userspace::kbase_ioctl_hwcnt_reader_setup)
|
||||
#define KBASE_IOCTL_TYPE 0x80
|
||||
#define KBASE_IOCTL_GET_GPUPROPS MALI_IOW(KBASE_IOCTL_TYPE, 3, struct mali_userspace::kbase_ioctl_get_gpuprops)
|
||||
#define KBASE_IOCTL_VERSION_CHECK_JM _IOWR(KBASE_IOCTL_TYPE, 0, struct mali_userspace::kbase_ioctl_version_check)
|
||||
#define KBASE_IOCTL_VERSION_CHECK_CSF _IOWR(KBASE_IOCTL_TYPE, 52, struct mali_userspace::kbase_ioctl_version_check)
|
||||
#define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct mali_userspace::kbase_ioctl_set_flags)
|
||||
#define KBASE_IOCTL_HWCNT_READER_SETUP _IOW(KBASE_IOCTL_TYPE, 8, struct mali_userspace::kbase_ioctl_hwcnt_reader_setup)
|
||||
|
||||
/** IOCTL parameters to set flags */
|
||||
struct kbase_uk_hwcnt_reader_set_flags
|
||||
@@ -350,7 +349,9 @@ struct uku_version_check_args
|
||||
|
||||
enum
|
||||
{
|
||||
UKP_FUNC_ID_CHECK_VERSION = 0,
|
||||
UKP_FUNC_ID_CHECK_VERSION_JM = 0,
|
||||
UKP_FUNC_ID_CHECK_VERSION_CSF = 52,
|
||||
|
||||
/* Related to mali0 ioctl interface */
|
||||
LINUX_UK_BASE_MAGIC = 0x80,
|
||||
BASE_CONTEXT_CREATE_KERNEL_FLAGS = 0x2,
|
||||
@@ -408,6 +409,5 @@ static inline int mali_ioctl(int fd, T &arg)
|
||||
|
||||
return 0;
|
||||
}
|
||||
} // namespace mali_userspace
|
||||
|
||||
#endif /* DOXYGEN_SKIP_THIS */
|
||||
} // namespace mali_userspace
|
||||
|
||||
+5963
-3217
File diff suppressed because it is too large
Load Diff
Vendored
+226
-63
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2019 ARM Limited.
|
||||
* Copyright (c) 2017-2022 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "hwcpipe_log.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
|
||||
using mali_userspace::MALI_NAME_BLOCK_JM;
|
||||
using mali_userspace::MALI_NAME_BLOCK_MMU;
|
||||
@@ -56,18 +57,38 @@ MaliHWInfo get_mali_hw_info(const char *path)
|
||||
}
|
||||
|
||||
{
|
||||
// Try matching Job Manager version IOCTL
|
||||
bool checked_version = true;
|
||||
mali_userspace::kbase_uk_hwcnt_reader_version_check_args version_check_args;
|
||||
version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION; // NOLINT
|
||||
version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION_JM;
|
||||
version_check_args.major = 10;
|
||||
version_check_args.minor = 2;
|
||||
|
||||
if (mali_userspace::mali_ioctl(fd, version_check_args) != 0)
|
||||
{
|
||||
mali_userspace::kbase_ioctl_version_check _version_check_args = {0, 0};
|
||||
if (ioctl(fd, KBASE_IOCTL_VERSION_CHECK, &_version_check_args) < 0)
|
||||
if (ioctl(fd, KBASE_IOCTL_VERSION_CHECK_JM, &_version_check_args) < 0)
|
||||
{
|
||||
close(fd);
|
||||
throw std::runtime_error("Failed to check version.");
|
||||
checked_version = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Try matching CSF version IOCTL
|
||||
if (!checked_version)
|
||||
{
|
||||
mali_userspace::kbase_uk_hwcnt_reader_version_check_args version_check_args;
|
||||
version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION_CSF;
|
||||
version_check_args.major = 1;
|
||||
version_check_args.minor = 4;
|
||||
|
||||
if (mali_userspace::mali_ioctl(fd, version_check_args) != 0)
|
||||
{
|
||||
mali_userspace::kbase_ioctl_version_check _version_check_args = {0, 0};
|
||||
if (ioctl(fd, KBASE_IOCTL_VERSION_CHECK_CSF, &_version_check_args) < 0)
|
||||
{
|
||||
close(fd);
|
||||
throw std::runtime_error("Failed to check version.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -216,70 +237,176 @@ MaliHWInfo get_mali_hw_info(const char *path)
|
||||
typedef std::function<uint64_t(void)> MaliValueGetter;
|
||||
|
||||
MaliProfiler::MaliProfiler(const GpuCounterSet &enabled_counters) :
|
||||
enabled_counters_(enabled_counters)
|
||||
enabled_counters_(enabled_counters)
|
||||
{
|
||||
// Throws if setup fails
|
||||
init();
|
||||
|
||||
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> valhall_csf_mappings = {
|
||||
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
|
||||
{GpuCounter::ComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_COMP_ACTIVE"); }},
|
||||
{GpuCounter::VertexCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_TILER_ACTIVE"); }},
|
||||
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_FRAGMENT_ACTIVE"); }},
|
||||
{GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
|
||||
|
||||
{GpuCounter::ComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_COMP_JOB_COMPLETED"); }},
|
||||
{GpuCounter::VertexJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_TILER_JOB_COMPLETED"); }},
|
||||
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_FRAG_JOB_COMPLETED"); }},
|
||||
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "ITER_FRAG_TASK_COMPLETED") * 1024; }},
|
||||
|
||||
{GpuCounter::CulledPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CULLED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CLIPPED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_SAT_CULLED"); }},
|
||||
{GpuCounter::VisiblePrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_VISIBLE"); }},
|
||||
{GpuCounter::InputPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TRIANGLES") + get_counter_value(MALI_NAME_BLOCK_TILER, "LINES") + get_counter_value(MALI_NAME_BLOCK_TILER, "POINTS"); }},
|
||||
|
||||
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
|
||||
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
|
||||
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
|
||||
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
|
||||
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
|
||||
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
|
||||
|
||||
{GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_MSG"); }},
|
||||
{GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
|
||||
|
||||
{GpuCounter::ShaderComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "COMPUTE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderFragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_ACTIVE"); }},
|
||||
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
|
||||
// The three units run in parallel so we can approximate cycles by taking the largest value. SFU instructions use 4 cycles per warp.
|
||||
{GpuCounter::ShaderArithmeticCycles, [this] { return std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA"), std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT"), 4 * get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU"))); }},
|
||||
{GpuCounter::ShaderInterpolatorCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "VARY_SLOT_16") + get_counter_value(MALI_NAME_BLOCK_SHADER, "VARY_SLOT_32"); }},
|
||||
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
|
||||
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
|
||||
|
||||
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
|
||||
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
|
||||
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
|
||||
};
|
||||
|
||||
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> valhall_mappings = {
|
||||
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
|
||||
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
|
||||
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
|
||||
{GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
|
||||
|
||||
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
|
||||
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
|
||||
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
|
||||
|
||||
{GpuCounter::CulledPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CULLED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CLIPPED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_SAT_CULLED"); }},
|
||||
{GpuCounter::VisiblePrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_VISIBLE"); }},
|
||||
{GpuCounter::InputPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TRIANGLES") + get_counter_value(MALI_NAME_BLOCK_TILER, "LINES") + get_counter_value(MALI_NAME_BLOCK_TILER, "POINTS"); }},
|
||||
|
||||
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
|
||||
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
|
||||
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
|
||||
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
|
||||
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
|
||||
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
|
||||
|
||||
{GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_MSG"); }},
|
||||
{GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
|
||||
|
||||
{GpuCounter::ShaderComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "COMPUTE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderFragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_ACTIVE"); }},
|
||||
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
|
||||
// The three units run in parallel so we can approximate cycles by taking the largest value. SFU instructions use 4 cycles per warp.
|
||||
{GpuCounter::ShaderArithmeticCycles, [this] { return std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA"), std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT"), 4 * get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU"))); }},
|
||||
{GpuCounter::ShaderInterpolatorCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "VARY_SLOT_16") + get_counter_value(MALI_NAME_BLOCK_SHADER, "VARY_SLOT_32"); }},
|
||||
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
|
||||
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
|
||||
|
||||
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
|
||||
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
|
||||
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
|
||||
};
|
||||
|
||||
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> bifrost_mappings = {
|
||||
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
|
||||
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
|
||||
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
|
||||
{GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
|
||||
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
|
||||
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
|
||||
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
|
||||
{GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
|
||||
|
||||
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
|
||||
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
|
||||
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
|
||||
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
|
||||
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
|
||||
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
|
||||
|
||||
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
|
||||
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
|
||||
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
|
||||
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
|
||||
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
|
||||
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
|
||||
{GpuCounter::CulledPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CULLED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CLIPPED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_SAT_CULLED"); }},
|
||||
{GpuCounter::VisiblePrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_VISIBLE"); }},
|
||||
{GpuCounter::InputPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TRIANGLES") + get_counter_value(MALI_NAME_BLOCK_TILER, "LINES") + get_counter_value(MALI_NAME_BLOCK_TILER, "POINTS"); }},
|
||||
|
||||
{GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
|
||||
{GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
|
||||
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
|
||||
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
|
||||
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
|
||||
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
|
||||
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
|
||||
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
|
||||
|
||||
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
|
||||
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
|
||||
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
|
||||
{GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
|
||||
{GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
|
||||
|
||||
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
|
||||
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
|
||||
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
|
||||
{GpuCounter::ShaderComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "COMPUTE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderFragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_ACTIVE"); }},
|
||||
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
|
||||
{GpuCounter::ShaderInterpolatorCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "VARY_SLOT_16") + get_counter_value(MALI_NAME_BLOCK_SHADER, "VARY_SLOT_32"); }},
|
||||
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
|
||||
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
|
||||
|
||||
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
|
||||
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
|
||||
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
|
||||
};
|
||||
|
||||
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> midgard_mappings = {
|
||||
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
|
||||
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
|
||||
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
|
||||
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
|
||||
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
|
||||
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
|
||||
|
||||
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
|
||||
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
|
||||
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
|
||||
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
|
||||
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
|
||||
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
|
||||
|
||||
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
|
||||
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
|
||||
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
|
||||
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILLED"); }},
|
||||
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_TEST"); }},
|
||||
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_KILLED"); }},
|
||||
{GpuCounter::CulledPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CULLED") + get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_CLIPPED"); }},
|
||||
{GpuCounter::VisiblePrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "PRIM_VISIBLE"); }},
|
||||
{GpuCounter::InputPrimitives, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TRIANGLES") + get_counter_value(MALI_NAME_BLOCK_TILER, "LINES") + get_counter_value(MALI_NAME_BLOCK_TILER, "POINTS"); }},
|
||||
|
||||
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
|
||||
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
|
||||
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
|
||||
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
|
||||
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
|
||||
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
|
||||
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILLED"); }},
|
||||
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_TEST"); }},
|
||||
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_KILLED"); }},
|
||||
|
||||
{GpuCounter::ShaderComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "COMPUTE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderFragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_ACTIVE"); }},
|
||||
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TRIPIPE_ACTIVE"); }},
|
||||
{GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "ARITH_WORDS"); }},
|
||||
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_ISSUES"); }},
|
||||
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_ISSUES"); }},
|
||||
|
||||
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
|
||||
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
|
||||
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
|
||||
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
|
||||
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
|
||||
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
|
||||
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
|
||||
};
|
||||
|
||||
auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
|
||||
@@ -310,9 +437,25 @@ MaliProfiler::MaliProfiler(const GpuCounterSet &enabled_counters) :
|
||||
break;
|
||||
case mali_userspace::PRODUCT_ID_TSIX:
|
||||
case mali_userspace::PRODUCT_ID_TNOX:
|
||||
default:
|
||||
case mali_userspace::PRODUCT_ID_TGOX:
|
||||
case mali_userspace::PRODUCT_ID_TDVX:
|
||||
mappings_ = bifrost_mappings;
|
||||
break;
|
||||
case mali_userspace::PRODUCT_ID_TNAXa:
|
||||
case mali_userspace::PRODUCT_ID_TNAXb:
|
||||
case mali_userspace::PRODUCT_ID_TTRX:
|
||||
case mali_userspace::PRODUCT_ID_TOTX:
|
||||
case mali_userspace::PRODUCT_ID_TBOX:
|
||||
case mali_userspace::PRODUCT_ID_TBOXAE:
|
||||
mappings_ = valhall_mappings;
|
||||
break;
|
||||
case mali_userspace::PRODUCT_ID_TODX:
|
||||
case mali_userspace::PRODUCT_ID_TVIX:
|
||||
case mali_userspace::PRODUCT_ID_TGRX:
|
||||
case mali_userspace::PRODUCT_ID_TVAX:
|
||||
default:
|
||||
mappings_ = valhall_csf_mappings;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -336,24 +479,44 @@ void MaliProfiler::init()
|
||||
throw std::runtime_error("Failed to open /dev/mali0.");
|
||||
}
|
||||
|
||||
// Set API version
|
||||
{
|
||||
mali_userspace::kbase_uk_hwcnt_reader_version_check_args check; // NOLINT
|
||||
memset(&check, 0, sizeof(check));
|
||||
// Try matching Job Manager version IOCTL
|
||||
bool checked_version = true;
|
||||
mali_userspace::kbase_uk_hwcnt_reader_version_check_args version_check_args;
|
||||
version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION_JM;
|
||||
version_check_args.major = 10;
|
||||
version_check_args.minor = 2;
|
||||
|
||||
if (mali_userspace::mali_ioctl(fd_, check) != 0)
|
||||
if (mali_userspace::mali_ioctl(fd_, version_check_args) != 0)
|
||||
{
|
||||
mali_userspace::kbase_ioctl_version_check _check = {0, 0};
|
||||
if (ioctl(fd_, KBASE_IOCTL_VERSION_CHECK, &_check) < 0)
|
||||
mali_userspace::kbase_ioctl_version_check _version_check_args = {0, 0};
|
||||
if (ioctl(fd_, KBASE_IOCTL_VERSION_CHECK_JM, &_version_check_args) < 0)
|
||||
{
|
||||
throw std::runtime_error("Failed to get ABI version.");
|
||||
checked_version = false;
|
||||
}
|
||||
}
|
||||
else if (check.major < 10)
|
||||
|
||||
// Try matching CSF version IOCTL
|
||||
if (!checked_version)
|
||||
{
|
||||
throw std::runtime_error("Unsupported ABI version 10.");
|
||||
mali_userspace::kbase_uk_hwcnt_reader_version_check_args version_check_args;
|
||||
version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION_CSF;
|
||||
version_check_args.major = 1;
|
||||
version_check_args.minor = 4;
|
||||
|
||||
if (mali_userspace::mali_ioctl(fd_, version_check_args) != 0)
|
||||
{
|
||||
mali_userspace::kbase_ioctl_version_check _version_check_args = {0, 0};
|
||||
if (ioctl(fd_, KBASE_IOCTL_VERSION_CHECK_CSF, &_version_check_args) < 0)
|
||||
{
|
||||
close(fd_);
|
||||
throw std::runtime_error("Failed to check version.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
mali_userspace::kbase_uk_hwcnt_reader_set_flags flags; // NOLINT
|
||||
memset(&flags, 0, sizeof(flags));
|
||||
|
||||
Vendored
+64
-49
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019 ARM Limited.
|
||||
* Copyright (c) 2019-2022 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -36,7 +36,7 @@ namespace hwcpipe
|
||||
/** A Gpu profiler that uses Mali counter data. */
|
||||
class MaliProfiler : public GpuProfiler
|
||||
{
|
||||
public:
|
||||
public:
|
||||
explicit MaliProfiler(const GpuCounterSet &enabled_counters);
|
||||
virtual ~MaliProfiler() = default;
|
||||
|
||||
@@ -59,59 +59,74 @@ class MaliProfiler : public GpuProfiler
|
||||
virtual const GpuMeasurements &sample() override;
|
||||
virtual void stop() override;
|
||||
|
||||
private:
|
||||
GpuCounterSet enabled_counters_{};
|
||||
private:
|
||||
GpuCounterSet enabled_counters_ {};
|
||||
|
||||
const GpuCounterSet supported_counters_{
|
||||
GpuCounter::GpuCycles,
|
||||
GpuCounter::VertexComputeCycles,
|
||||
GpuCounter::FragmentCycles,
|
||||
GpuCounter::TilerCycles,
|
||||
GpuCounter::VertexComputeJobs,
|
||||
GpuCounter::Tiles,
|
||||
GpuCounter::TransactionEliminations,
|
||||
GpuCounter::FragmentJobs,
|
||||
GpuCounter::Pixels,
|
||||
GpuCounter::EarlyZTests,
|
||||
GpuCounter::EarlyZKilled,
|
||||
GpuCounter::LateZTests,
|
||||
GpuCounter::LateZKilled,
|
||||
GpuCounter::Instructions,
|
||||
GpuCounter::DivergedInstructions,
|
||||
GpuCounter::ShaderCycles,
|
||||
GpuCounter::ShaderArithmeticCycles,
|
||||
GpuCounter::ShaderLoadStoreCycles,
|
||||
GpuCounter::ShaderTextureCycles,
|
||||
GpuCounter::CacheReadLookups,
|
||||
GpuCounter::CacheWriteLookups,
|
||||
GpuCounter::ExternalMemoryReadAccesses,
|
||||
GpuCounter::ExternalMemoryWriteAccesses,
|
||||
GpuCounter::ExternalMemoryReadStalls,
|
||||
GpuCounter::ExternalMemoryWriteStalls,
|
||||
GpuCounter::ExternalMemoryReadBytes,
|
||||
GpuCounter::ExternalMemoryWriteBytes,
|
||||
const GpuCounterSet supported_counters_ {
|
||||
GpuCounter::GpuCycles,
|
||||
GpuCounter::VertexCycles,
|
||||
GpuCounter::ComputeCycles,
|
||||
GpuCounter::VertexComputeCycles,
|
||||
GpuCounter::FragmentCycles,
|
||||
GpuCounter::TilerCycles,
|
||||
GpuCounter::VertexJobs,
|
||||
GpuCounter::ComputeJobs,
|
||||
GpuCounter::VertexComputeJobs,
|
||||
GpuCounter::FragmentJobs,
|
||||
GpuCounter::Pixels,
|
||||
|
||||
GpuCounter::CulledPrimitives,
|
||||
GpuCounter::VisiblePrimitives,
|
||||
GpuCounter::InputPrimitives,
|
||||
|
||||
GpuCounter::Tiles,
|
||||
GpuCounter::TransactionEliminations,
|
||||
|
||||
GpuCounter::EarlyZTests,
|
||||
GpuCounter::EarlyZKilled,
|
||||
GpuCounter::LateZTests,
|
||||
GpuCounter::LateZKilled,
|
||||
|
||||
GpuCounter::Instructions,
|
||||
GpuCounter::DivergedInstructions,
|
||||
|
||||
GpuCounter::ShaderFragmentCycles,
|
||||
GpuCounter::ShaderComputeCycles,
|
||||
GpuCounter::ShaderCycles,
|
||||
GpuCounter::ShaderArithmeticCycles,
|
||||
GpuCounter::ShaderInterpolatorCycles,
|
||||
GpuCounter::ShaderLoadStoreCycles,
|
||||
GpuCounter::ShaderTextureCycles,
|
||||
|
||||
GpuCounter::CacheReadLookups,
|
||||
GpuCounter::CacheWriteLookups,
|
||||
GpuCounter::ExternalMemoryReadAccesses,
|
||||
GpuCounter::ExternalMemoryWriteAccesses,
|
||||
GpuCounter::ExternalMemoryReadStalls,
|
||||
GpuCounter::ExternalMemoryWriteStalls,
|
||||
GpuCounter::ExternalMemoryReadBytes,
|
||||
GpuCounter::ExternalMemoryWriteBytes,
|
||||
};
|
||||
|
||||
typedef std::function<double(void)> MaliValueGetter;
|
||||
std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> mappings_{};
|
||||
std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> mappings_ {};
|
||||
|
||||
const char *const device_{"/dev/mali0"};
|
||||
int num_cores_{0};
|
||||
int num_l2_slices_{0};
|
||||
int gpu_id_{0};
|
||||
uint32_t hw_ver_{0};
|
||||
int buffer_count_{16};
|
||||
size_t buffer_size_{0};
|
||||
uint8_t * sample_data_{nullptr};
|
||||
uint64_t timestamp_{0};
|
||||
const char *const *names_lut_{
|
||||
nullptr};
|
||||
std::vector<uint32_t> raw_counter_buffer_{};
|
||||
std::vector<unsigned int> core_index_remap_{};
|
||||
int fd_{-1};
|
||||
int hwc_fd_{-1};
|
||||
const char *const device_ {"/dev/mali0"};
|
||||
int num_cores_ {0};
|
||||
int num_l2_slices_ {0};
|
||||
int gpu_id_ {0};
|
||||
uint32_t hw_ver_ {0};
|
||||
int buffer_count_ {16};
|
||||
size_t buffer_size_ {0};
|
||||
uint8_t * sample_data_ {nullptr};
|
||||
uint64_t timestamp_ {0};
|
||||
const char *const * names_lut_ {nullptr};
|
||||
std::vector<uint32_t> raw_counter_buffer_ {};
|
||||
std::vector<unsigned int> core_index_remap_ {};
|
||||
int fd_ {-1};
|
||||
int hwc_fd_ {-1};
|
||||
|
||||
GpuMeasurements measurements_{};
|
||||
GpuMeasurements measurements_ {};
|
||||
|
||||
void init();
|
||||
void sample_counters();
|
||||
|
||||
+46
-20
@@ -26,26 +26,13 @@
|
||||
|
||||
#include <asm/unistd.h>
|
||||
#include <cstring>
|
||||
#include <linux/version.h>
|
||||
#include <stdexcept>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
/* Add std_to_string implementation as it is possible that Android does not provide it */
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
template <typename T>
|
||||
std::string std_to_string(T value)
|
||||
{
|
||||
std::ostringstream os ;
|
||||
os << value ;
|
||||
return os.str() ;
|
||||
}
|
||||
|
||||
|
||||
PmuCounter::PmuCounter() :
|
||||
_perf_config()
|
||||
_perf_config()
|
||||
{
|
||||
_perf_config.type = PERF_TYPE_HARDWARE;
|
||||
_perf_config.size = sizeof(perf_event_attr);
|
||||
|
||||
// Start disabled
|
||||
@@ -57,8 +44,8 @@ PmuCounter::PmuCounter() :
|
||||
_perf_config.inherit_stat = 1;
|
||||
}
|
||||
|
||||
PmuCounter::PmuCounter(uint64_t config) :
|
||||
PmuCounter()
|
||||
PmuCounter::PmuCounter(PmuEventInfo config) :
|
||||
PmuCounter()
|
||||
{
|
||||
open(config);
|
||||
}
|
||||
@@ -68,9 +55,10 @@ PmuCounter::~PmuCounter()
|
||||
close();
|
||||
}
|
||||
|
||||
void PmuCounter::open(uint64_t config)
|
||||
void PmuCounter::open(PmuEventInfo config)
|
||||
{
|
||||
_perf_config.config = config;
|
||||
_perf_config.config = config.event;
|
||||
_perf_config.type = config.type;
|
||||
open(_perf_config);
|
||||
}
|
||||
|
||||
@@ -133,12 +121,16 @@ std::string PmuCounter::config_to_str(const perf_event_attr &perf_config)
|
||||
return "PERF_COUNT_HW_BRANCH_MISSES";
|
||||
case PERF_COUNT_HW_BUS_CYCLES:
|
||||
return "PERF_COUNT_HW_BUS_CYCLES";
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
|
||||
case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
|
||||
return "PERF_COUNT_HW_STALLED_CYCLES_FRONTEND";
|
||||
case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
|
||||
return "PERF_COUNT_HW_STALLED_CYCLES_BACKEND";
|
||||
#endif
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
|
||||
case PERF_COUNT_HW_REF_CPU_CYCLES:
|
||||
return "PERF_COUNT_HW_REF_CPU_CYCLES";
|
||||
#endif
|
||||
default:
|
||||
return "UNKNOWN HARDWARE COUNTER";
|
||||
}
|
||||
@@ -160,16 +152,50 @@ std::string PmuCounter::config_to_str(const perf_event_attr &perf_config)
|
||||
return "PERF_COUNT_SW_PAGE_FAULTS_MIN";
|
||||
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
|
||||
return "PERF_COUNT_SW_PAGE_FAULTS_MAJ";
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33)
|
||||
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
|
||||
return "PERF_COUNT_SW_ALIGNMENT_FAULTS";
|
||||
case PERF_COUNT_SW_EMULATION_FAULTS:
|
||||
return "PERF_COUNT_SW_EMULATION_FAULTS";
|
||||
#endif
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
|
||||
case PERF_COUNT_SW_DUMMY:
|
||||
return "PERF_COUNT_SW_DUMMY";
|
||||
#endif
|
||||
default:
|
||||
return "UNKNOWN SOFTWARE COUNTER";
|
||||
}
|
||||
|
||||
case PERF_TYPE_RAW:
|
||||
switch (static_cast<PmuImplDefined>(perf_config.config))
|
||||
{
|
||||
case PmuImplDefined::L1_ACCESSES:
|
||||
return "L1_ACCESSES";
|
||||
case PmuImplDefined::INSTR_RETIRED:
|
||||
return "INSTR_RETIRED";
|
||||
case PmuImplDefined::L2_ACCESSES:
|
||||
return "L2_ACCESSES";
|
||||
case PmuImplDefined::L3_ACCESSES:
|
||||
return "L3_ACCESSES";
|
||||
case PmuImplDefined::BUS_READS:
|
||||
return "BUS_READS";
|
||||
case PmuImplDefined::BUS_WRITES:
|
||||
return "BUS_WRITES";
|
||||
case PmuImplDefined::MEM_READS:
|
||||
return "MEM_READS";
|
||||
case PmuImplDefined::MEM_WRITES:
|
||||
return "MEM_WRITES";
|
||||
case PmuImplDefined::ASE_SPEC:
|
||||
return "ASE_SPEC";
|
||||
case PmuImplDefined::VFP_SPEC:
|
||||
return "VFP_SPEC";
|
||||
case PmuImplDefined::CRYPTO_SPEC:
|
||||
return "CRYPTO_SPEC";
|
||||
default:
|
||||
return "UNKNOWN RAW COUNTER";
|
||||
}
|
||||
|
||||
default:
|
||||
return std_to_string(perf_config.config);
|
||||
return std::to_string(perf_config.config);
|
||||
}
|
||||
}
|
||||
|
||||
+51
-21
@@ -35,41 +35,71 @@
|
||||
|
||||
#include "hwcpipe_log.h"
|
||||
|
||||
enum class PmuImplDefined : uint64_t
|
||||
{
|
||||
L1_ACCESSES = 0x4,
|
||||
INSTR_RETIRED = 0x8,
|
||||
L2_ACCESSES = 0x16,
|
||||
L3_ACCESSES = 0x2b,
|
||||
BUS_READS = 0x60,
|
||||
BUS_WRITES = 0x61,
|
||||
MEM_READS = 0x66,
|
||||
MEM_WRITES = 0x67,
|
||||
ASE_SPEC = 0x74,
|
||||
VFP_SPEC = 0x75,
|
||||
CRYPTO_SPEC = 0x77,
|
||||
};
|
||||
|
||||
struct PmuEventInfo
|
||||
{
|
||||
uint64_t type;
|
||||
uint64_t event;
|
||||
|
||||
PmuEventInfo(uint64_t type, uint64_t event) :
|
||||
type(type),
|
||||
event(event)
|
||||
{}
|
||||
|
||||
PmuEventInfo(uint64_t type, PmuImplDefined event) :
|
||||
PmuEventInfo(type, static_cast<uint64_t>(event))
|
||||
{}
|
||||
};
|
||||
|
||||
/** Class provides access to CPU hardware counters. */
|
||||
class PmuCounter
|
||||
{
|
||||
public:
|
||||
public:
|
||||
/** Default constructor. */
|
||||
PmuCounter();
|
||||
|
||||
/** Create PMU counter with specified config.
|
||||
*
|
||||
* This constructor automatically calls @ref open with the default
|
||||
* configuration.
|
||||
*
|
||||
* @param[in] config Counter identifier.
|
||||
*/
|
||||
PmuCounter(uint64_t config);
|
||||
*
|
||||
* This constructor automatically calls @ref open with the default
|
||||
* configuration.
|
||||
*
|
||||
* @param[in] config Counter info.
|
||||
*/
|
||||
PmuCounter(PmuEventInfo config);
|
||||
|
||||
/** Default destructor. */
|
||||
~PmuCounter();
|
||||
|
||||
/** Get the counter value.
|
||||
*
|
||||
* @return Counter value casted to the specified type. */
|
||||
*
|
||||
* @return Counter value casted to the specified type. */
|
||||
template <typename T>
|
||||
T get_value() const;
|
||||
|
||||
/** Open the specified counter based on the default configuration.
|
||||
*
|
||||
* @param[in] config The default configuration.
|
||||
*/
|
||||
void open(uint64_t config);
|
||||
/** Open the specified counter based on the given configuration.
|
||||
*
|
||||
* @param[in] config The configuration.
|
||||
*/
|
||||
void open(PmuEventInfo config);
|
||||
|
||||
/** Open the specified configuration.
|
||||
*
|
||||
* @param[in] perf_config The specified configuration.
|
||||
*/
|
||||
*
|
||||
* @param[in] perf_config The specified configuration.
|
||||
*/
|
||||
void open(const perf_event_attr &perf_config);
|
||||
|
||||
/** Close the currently open counter. */
|
||||
@@ -83,15 +113,15 @@ class PmuCounter
|
||||
/** Print counter config ID. */
|
||||
std::string config_to_str(const perf_event_attr &perf_config);
|
||||
|
||||
private:
|
||||
private:
|
||||
perf_event_attr _perf_config;
|
||||
long _fd{-1};
|
||||
long _fd {-1};
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T PmuCounter::get_value() const
|
||||
{
|
||||
long long value{};
|
||||
long long value {};
|
||||
const ssize_t result = read(_fd, &value, sizeof(long long));
|
||||
|
||||
if (result == -1)
|
||||
|
||||
Vendored
+21
-9
@@ -28,17 +28,29 @@
|
||||
|
||||
namespace hwcpipe
|
||||
{
|
||||
const std::unordered_map<CpuCounter, uint64_t, CpuCounterHash> pmu_mappings{
|
||||
{CpuCounter::Cycles, PERF_COUNT_HW_CPU_CYCLES},
|
||||
{CpuCounter::Instructions, PERF_COUNT_HW_INSTRUCTIONS},
|
||||
{CpuCounter::CacheReferences, PERF_COUNT_HW_CACHE_REFERENCES},
|
||||
{CpuCounter::CacheMisses, PERF_COUNT_HW_CACHE_MISSES},
|
||||
{CpuCounter::BranchInstructions, PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
|
||||
{CpuCounter::BranchMisses, PERF_COUNT_HW_BRANCH_MISSES},
|
||||
const std::unordered_map<CpuCounter, PmuEventInfo, CpuCounterHash> pmu_mappings {
|
||||
{CpuCounter::Cycles, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES}},
|
||||
{CpuCounter::Instructions, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS}},
|
||||
{CpuCounter::CacheReferences, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES}},
|
||||
{CpuCounter::CacheMisses, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES}},
|
||||
{CpuCounter::BranchInstructions, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS}},
|
||||
{CpuCounter::BranchMisses, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES}},
|
||||
|
||||
{CpuCounter::L1Accesses, {PERF_TYPE_RAW, PmuImplDefined::L1_ACCESSES}},
|
||||
{CpuCounter::InstrRetired, {PERF_TYPE_RAW, PmuImplDefined::INSTR_RETIRED}},
|
||||
{CpuCounter::L2Accesses, {PERF_TYPE_RAW, PmuImplDefined::L2_ACCESSES}},
|
||||
{CpuCounter::L3Accesses, {PERF_TYPE_RAW, PmuImplDefined::L3_ACCESSES}},
|
||||
{CpuCounter::BusReads, {PERF_TYPE_RAW, PmuImplDefined::BUS_READS}},
|
||||
{CpuCounter::BusWrites, {PERF_TYPE_RAW, PmuImplDefined::BUS_WRITES}},
|
||||
{CpuCounter::MemReads, {PERF_TYPE_RAW, PmuImplDefined::MEM_READS}},
|
||||
{CpuCounter::MemWrites, {PERF_TYPE_RAW, PmuImplDefined::MEM_WRITES}},
|
||||
{CpuCounter::ASESpec, {PERF_TYPE_RAW, PmuImplDefined::ASE_SPEC}},
|
||||
{CpuCounter::VFPSpec, {PERF_TYPE_RAW, PmuImplDefined::VFP_SPEC}},
|
||||
{CpuCounter::CryptoSpec, {PERF_TYPE_RAW, PmuImplDefined::CRYPTO_SPEC}},
|
||||
};
|
||||
|
||||
PmuProfiler::PmuProfiler(const CpuCounterSet &enabled_counters) :
|
||||
enabled_counters_(enabled_counters)
|
||||
enabled_counters_(enabled_counters)
|
||||
{
|
||||
// Set up PMU counters
|
||||
for (const auto &counter : enabled_counters)
|
||||
@@ -77,7 +89,7 @@ void PmuProfiler::run()
|
||||
for (auto &pmu_counter : pmu_counters_)
|
||||
{
|
||||
pmu_counter.second.reset();
|
||||
prev_measurements_[pmu_counter.first] = Value{};
|
||||
prev_measurements_[pmu_counter.first] = Value {};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+27
-14
@@ -33,7 +33,7 @@ namespace hwcpipe
|
||||
/** A CPU profiler that uses PMU counter data. */
|
||||
class PmuProfiler : public CpuProfiler
|
||||
{
|
||||
public:
|
||||
public:
|
||||
explicit PmuProfiler(const CpuCounterSet &enabled_counters);
|
||||
virtual ~PmuProfiler() = default;
|
||||
|
||||
@@ -56,22 +56,35 @@ class PmuProfiler : public CpuProfiler
|
||||
virtual const CpuMeasurements &sample() override;
|
||||
virtual void stop() override;
|
||||
|
||||
private:
|
||||
CpuCounterSet enabled_counters_{};
|
||||
CpuCounterSet available_counters_{};
|
||||
private:
|
||||
CpuCounterSet enabled_counters_ {};
|
||||
CpuCounterSet available_counters_ {};
|
||||
|
||||
const CpuCounterSet supported_counters_{
|
||||
CpuCounter::Cycles,
|
||||
CpuCounter::Instructions,
|
||||
CpuCounter::CacheReferences,
|
||||
CpuCounter::CacheMisses,
|
||||
CpuCounter::BranchInstructions,
|
||||
CpuCounter::BranchMisses};
|
||||
const CpuCounterSet supported_counters_ {
|
||||
CpuCounter::Cycles,
|
||||
CpuCounter::Instructions,
|
||||
CpuCounter::CacheReferences,
|
||||
CpuCounter::CacheMisses,
|
||||
CpuCounter::BranchInstructions,
|
||||
CpuCounter::BranchMisses,
|
||||
|
||||
CpuMeasurements measurements_{};
|
||||
CpuMeasurements prev_measurements_{};
|
||||
CpuCounter::L1Accesses,
|
||||
CpuCounter::InstrRetired,
|
||||
CpuCounter::L2Accesses,
|
||||
CpuCounter::L3Accesses,
|
||||
CpuCounter::BusReads,
|
||||
CpuCounter::BusWrites,
|
||||
CpuCounter::MemReads,
|
||||
CpuCounter::MemWrites,
|
||||
CpuCounter::ASESpec,
|
||||
CpuCounter::VFPSpec,
|
||||
CpuCounter::CryptoSpec,
|
||||
};
|
||||
|
||||
std::unordered_map<CpuCounter, PmuCounter, CpuCounterHash> pmu_counters_{};
|
||||
CpuMeasurements measurements_ {};
|
||||
CpuMeasurements prev_measurements_ {};
|
||||
|
||||
std::unordered_map<CpuCounter, PmuCounter, CpuCounterHash> pmu_counters_ {};
|
||||
};
|
||||
|
||||
} // namespace hwcpipe
|
||||
|
||||
Reference in New Issue
Block a user