mirror of
https://github.com/aristocratos/btop.git
synced 2024-09-29 22:51:35 +02:00
Add DebugTimer class and change some Logger::error calls to Logger::debug
This commit is contained in:
parent
2e68c0b916
commit
1fee2bc08b
@ -541,6 +541,74 @@ namespace Tools {
|
|||||||
return (user != NULL ? user : "");
|
return (user != NULL ? user : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DebugTimer::DebugTimer(const string name, bool start, bool delayed_report) : name(name), delayed_report(delayed_report) {
|
||||||
|
if (start)
|
||||||
|
this->start();
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugTimer::~DebugTimer() {
|
||||||
|
if (running)
|
||||||
|
this->stop(true);
|
||||||
|
this->force_report();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugTimer::start() {
|
||||||
|
if (running) return;
|
||||||
|
running = true;
|
||||||
|
start_time = time_micros();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugTimer::stop(bool report) {
|
||||||
|
if (not running) return;
|
||||||
|
running = false;
|
||||||
|
elapsed_time = time_micros() - start_time;
|
||||||
|
if (report) this->report();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugTimer::reset(bool restart) {
|
||||||
|
running = false;
|
||||||
|
start_time = 0;
|
||||||
|
elapsed_time = 0;
|
||||||
|
if (restart) this->start();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugTimer::stop_rename_reset(const string &new_name, bool report, bool restart) {
|
||||||
|
this->stop(report);
|
||||||
|
name = new_name;
|
||||||
|
this->reset(restart);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugTimer::report() {
|
||||||
|
string report_line;
|
||||||
|
if (start_time == 0 and elapsed_time == 0)
|
||||||
|
report_line = format("DebugTimer::report() warning -> Timer [{}] has not been started!", name);
|
||||||
|
else if (running)
|
||||||
|
report_line = format(custom_locale, "Timer [{}] (running) currently at {:L} μs", name, time_micros() - start_time);
|
||||||
|
else
|
||||||
|
report_line = format(custom_locale, "Timer [{}] took {:L} μs", name, elapsed_time);
|
||||||
|
|
||||||
|
if (delayed_report)
|
||||||
|
report_buffer.emplace_back(report_line);
|
||||||
|
else
|
||||||
|
Logger::log_write(log_level, report_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugTimer::force_report() {
|
||||||
|
if (report_buffer.empty()) return;
|
||||||
|
for (const auto& line : report_buffer)
|
||||||
|
Logger::log_write(log_level, line);
|
||||||
|
report_buffer.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t DebugTimer::elapsed() {
|
||||||
|
if (running)
|
||||||
|
return time_micros() - start_time;
|
||||||
|
return elapsed_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DebugTimer::is_running() {
|
||||||
|
return running;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace Logger {
|
namespace Logger {
|
||||||
@ -572,7 +640,7 @@ namespace Logger {
|
|||||||
loglevel = v_index(log_levels, level);
|
loglevel = v_index(log_levels, level);
|
||||||
}
|
}
|
||||||
|
|
||||||
void log_write(const size_t level, const string& msg) {
|
void log_write(const Level level, const string& msg) {
|
||||||
if (loglevel < level or logfile.empty()) return;
|
if (loglevel < level or logfile.empty()) return;
|
||||||
atomic_lock lck(busy, true);
|
atomic_lock lck(busy, true);
|
||||||
lose_priv neutered{};
|
lose_priv neutered{};
|
||||||
|
@ -152,6 +152,12 @@ namespace Term {
|
|||||||
namespace Tools {
|
namespace Tools {
|
||||||
constexpr auto SSmax = std::numeric_limits<std::streamsize>::max();
|
constexpr auto SSmax = std::numeric_limits<std::streamsize>::max();
|
||||||
|
|
||||||
|
class MyNumPunct : public std::numpunct<char> {
|
||||||
|
protected:
|
||||||
|
virtual char do_thousands_sep() const { return '\''; }
|
||||||
|
virtual std::string do_grouping() const { return "\03"; }
|
||||||
|
};
|
||||||
|
|
||||||
size_t wide_ulen(const string& str);
|
size_t wide_ulen(const string& str);
|
||||||
size_t wide_ulen(const std::wstring& w_str);
|
size_t wide_ulen(const std::wstring& w_str);
|
||||||
|
|
||||||
@ -335,7 +341,6 @@ namespace Tools {
|
|||||||
|
|
||||||
//* Convert a celsius value to celsius, fahrenheit, kelvin or rankin and return tuple with new value and unit.
|
//* Convert a celsius value to celsius, fahrenheit, kelvin or rankin and return tuple with new value and unit.
|
||||||
auto celsius_to(const long long& celsius, const string& scale) -> tuple<long long, string>;
|
auto celsius_to(const long long& celsius, const string& scale) -> tuple<long long, string>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//* Simple logging implementation
|
//* Simple logging implementation
|
||||||
@ -349,13 +354,56 @@ namespace Logger {
|
|||||||
};
|
};
|
||||||
extern std::filesystem::path logfile;
|
extern std::filesystem::path logfile;
|
||||||
|
|
||||||
|
enum Level : size_t {
|
||||||
|
DISABLED = 0,
|
||||||
|
ERROR = 1,
|
||||||
|
WARNING = 2,
|
||||||
|
INFO = 3,
|
||||||
|
DEBUG = 4,
|
||||||
|
};
|
||||||
|
|
||||||
//* Set log level, valid arguments: "DISABLED", "ERROR", "WARNING", "INFO" and "DEBUG"
|
//* Set log level, valid arguments: "DISABLED", "ERROR", "WARNING", "INFO" and "DEBUG"
|
||||||
void set(const string& level);
|
void set(const string& level);
|
||||||
|
|
||||||
void log_write(const size_t level, const string& msg);
|
void log_write(const Level level, const string& msg);
|
||||||
inline void error(const string msg) { log_write(1, msg); }
|
inline void error(const string msg) { log_write(ERROR, msg); }
|
||||||
inline void warning(const string msg) { log_write(2, msg); }
|
inline void warning(const string msg) { log_write(WARNING, msg); }
|
||||||
inline void info(const string msg) { log_write(3, msg); }
|
inline void info(const string msg) { log_write(INFO, msg); }
|
||||||
inline void debug(const string msg) { log_write(4, msg); }
|
inline void debug(const string msg) { log_write(DEBUG, msg); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace Tools {
|
||||||
|
//* Creates a named timer that is started on construct (by default) and reports elapsed time in microseconds to Logger::debug() on destruct if running
|
||||||
|
//* Unless delayed_report is set to false, all reporting is buffered and delayed until DebugTimer is destructed or .force_report() is called
|
||||||
|
//* Usage example: Tools::DebugTimer timer(name:"myTimer", [start:true], [delayed_report:true]) // Create timer and start
|
||||||
|
//* timer.stop(); // Stop timer and report elapsed time
|
||||||
|
//* timer.stop_rename_reset("myTimer2"); // Stop timer, report elapsed time, rename timer, reset and restart
|
||||||
|
class DebugTimer {
|
||||||
|
uint64_t start_time{};
|
||||||
|
uint64_t elapsed_time{};
|
||||||
|
bool running{};
|
||||||
|
std::locale custom_locale = std::locale(std::locale::classic(), new Tools::MyNumPunct);
|
||||||
|
vector<string> report_buffer{};
|
||||||
|
public:
|
||||||
|
string name{};
|
||||||
|
bool delayed_report{};
|
||||||
|
Logger::Level log_level = Logger::DEBUG;
|
||||||
|
DebugTimer() = default;
|
||||||
|
DebugTimer(const string name, bool start = true, bool delayed_report = true);
|
||||||
|
~DebugTimer();
|
||||||
|
|
||||||
|
void start();
|
||||||
|
void stop(bool report = true);
|
||||||
|
void reset(bool restart = true);
|
||||||
|
//* Stops and reports (default), renames timer then resets and restarts (default)
|
||||||
|
void stop_rename_reset(const string& new_name, bool report = true, bool restart = true);
|
||||||
|
void report();
|
||||||
|
void force_report();
|
||||||
|
uint64_t elapsed();
|
||||||
|
bool is_running();
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -861,14 +861,14 @@ namespace Gpu {
|
|||||||
#if defined(GPU_NVIDIA)
|
#if defined(GPU_NVIDIA)
|
||||||
nvmlReturn_t result = nvmlInit();
|
nvmlReturn_t result = nvmlInit();
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::warning(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result));
|
Logger::debug(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//? Device count
|
//? Device count
|
||||||
result = nvmlDeviceGetCount(&device_count);
|
result = nvmlDeviceGetCount(&device_count);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -881,7 +881,7 @@ namespace Gpu {
|
|||||||
//? Device Handle
|
//? Device Handle
|
||||||
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t));
|
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t));
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -889,7 +889,7 @@ namespace Gpu {
|
|||||||
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
|
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
|
||||||
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
|
||||||
else {
|
else {
|
||||||
gpu_names[i] = string(name);
|
gpu_names[i] = string(name);
|
||||||
for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) {
|
for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) {
|
||||||
@ -902,14 +902,14 @@ namespace Gpu {
|
|||||||
unsigned int max_power;
|
unsigned int max_power;
|
||||||
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
|
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
|
||||||
else gpus[i].pwr_max_usage = max_power;
|
else gpus[i].pwr_max_usage = max_power;
|
||||||
|
|
||||||
//? Get temp_max
|
//? Get temp_max
|
||||||
unsigned int temp_max;
|
unsigned int temp_max;
|
||||||
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
|
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result));
|
||||||
else gpus[i].temp_max = (long long)temp_max;
|
else gpus[i].temp_max = (long long)temp_max;
|
||||||
}
|
}
|
||||||
initialized = true;
|
initialized = true;
|
||||||
@ -939,35 +939,39 @@ namespace Gpu {
|
|||||||
if (!initialized) return false;
|
if (!initialized) return false;
|
||||||
#if defined(GPU_NVIDIA)
|
#if defined(GPU_NVIDIA)
|
||||||
nvmlReturn_t result;
|
nvmlReturn_t result;
|
||||||
|
// DebugTimer gpu_nvidia("Nvidia Total");
|
||||||
for (unsigned int i = 0; i < device_count; ++i) {
|
for (unsigned int i = 0; i < device_count; ++i) {
|
||||||
//? GPU & memory utilization
|
//? GPU & memory utilization
|
||||||
|
// DebugTimer nvTimer("Nv utilization");
|
||||||
nvmlUtilization_t utilization;
|
nvmlUtilization_t utilization;
|
||||||
result = nvmlDeviceGetUtilizationRates(devices[i], &utilization);
|
result = nvmlDeviceGetUtilizationRates(devices[i], &utilization);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result));
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu);
|
gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu);
|
||||||
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory);
|
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nvTimer.stop_rename_reset("Nv clock");
|
||||||
//? Clock speeds
|
//? Clock speeds
|
||||||
unsigned int gpu_clock, mem_clock;
|
unsigned int gpu_clock, mem_clock;
|
||||||
|
|
||||||
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_GRAPHICS, &gpu_clock);
|
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_GRAPHICS, &gpu_clock);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result));
|
||||||
else gpus_slice[i].gpu_clock_speed = (long long)gpu_clock;
|
else gpus_slice[i].gpu_clock_speed = (long long)gpu_clock;
|
||||||
|
|
||||||
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_MEM, &mem_clock);
|
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_MEM, &mem_clock);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result));
|
||||||
else gpus_slice[i].mem_clock_speed = (long long)mem_clock;
|
else gpus_slice[i].mem_clock_speed = (long long)mem_clock;
|
||||||
|
|
||||||
|
// nvTimer.stop_rename_reset("Nv power");
|
||||||
//? Power usage & state
|
//? Power usage & state
|
||||||
unsigned int power;
|
unsigned int power;
|
||||||
result = nvmlDeviceGetPowerUsage(devices[i], &power);
|
result = nvmlDeviceGetPowerUsage(devices[i], &power);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result));
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].pwr_usage = (long long)power;
|
gpus_slice[i].pwr_usage = (long long)power;
|
||||||
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
||||||
@ -976,23 +980,25 @@ namespace Gpu {
|
|||||||
nvmlPstates_t pState;
|
nvmlPstates_t pState;
|
||||||
result = nvmlDeviceGetPowerState(devices[i], &pState);
|
result = nvmlDeviceGetPowerState(devices[i], &pState);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result));
|
||||||
else gpus_slice[i].pwr_state = static_cast<int>(pState);
|
else gpus_slice[i].pwr_state = static_cast<int>(pState);
|
||||||
|
|
||||||
|
// nvTimer.stop_rename_reset("Nv temp");
|
||||||
//? GPU temperature
|
//? GPU temperature
|
||||||
if (Config::getB("check_temp")) {
|
if (Config::getB("check_temp")) {
|
||||||
unsigned int temp;
|
unsigned int temp;
|
||||||
nvmlReturn_t result = nvmlDeviceGetTemperature(devices[i], NVML_TEMPERATURE_GPU, &temp);
|
nvmlReturn_t result = nvmlDeviceGetTemperature(devices[i], NVML_TEMPERATURE_GPU, &temp);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::error(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result));
|
||||||
else gpus_slice[i].temp.push_back((long long)temp);
|
else gpus_slice[i].temp.push_back((long long)temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nvTimer.stop_rename_reset("Nv mem");
|
||||||
//? Memory info
|
//? Memory info
|
||||||
nvmlMemory_t memory;
|
nvmlMemory_t memory;
|
||||||
result = nvmlDeviceGetMemoryInfo(devices[i], &memory);
|
result = nvmlDeviceGetMemoryInfo(devices[i], &memory);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result));
|
Logger::debug(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result));
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].mem_total = memory.total;
|
gpus_slice[i].mem_total = memory.total;
|
||||||
gpus_slice[i].mem_used = memory.used;
|
gpus_slice[i].mem_used = memory.used;
|
||||||
@ -1002,6 +1008,7 @@ namespace Gpu {
|
|||||||
gpus_slice[i].mem_used_percent.push_back(used_percent);
|
gpus_slice[i].mem_used_percent.push_back(used_percent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//nvTimer.stop_rename_reset("Nv pcie");
|
||||||
//? PCIe link speeds
|
//? PCIe link speeds
|
||||||
unsigned int tx,rx;
|
unsigned int tx,rx;
|
||||||
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
|
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
|
||||||
@ -1042,13 +1049,17 @@ namespace Gpu {
|
|||||||
rsmi_status_t result;
|
rsmi_status_t result;
|
||||||
|
|
||||||
result = rsmi_init(0);
|
result = rsmi_init(0);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::error("Failed to initialize ROCm SMI, AMD GPUs will not be detected");
|
Logger::debug("Failed to initialize ROCm SMI, AMD GPUs will not be detected");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
//? Device count
|
//? Device count
|
||||||
result = rsmi_num_monitor_devices(&device_count);
|
result = rsmi_num_monitor_devices(&device_count);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::error("ROCm SMI: Failed to fetch number of devices");
|
Logger::debug("ROCm SMI: Failed to fetch number of devices");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (device_count > 0) {
|
if (device_count > 0) {
|
||||||
gpus.resize(gpus.size() + device_count);
|
gpus.resize(gpus.size() + device_count);
|
||||||
@ -1061,21 +1072,21 @@ namespace Gpu {
|
|||||||
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
|
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
|
||||||
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get device name");
|
Logger::debug("ROCm SMI: Failed to get device name");
|
||||||
else gpu_names[offset] = string(name);
|
else gpu_names[offset] = string(name);
|
||||||
|
|
||||||
//? Power usage
|
//? Power usage
|
||||||
uint64_t max_power;
|
uint64_t max_power;
|
||||||
result = rsmi_dev_power_cap_get(i, 0, &max_power);
|
result = rsmi_dev_power_cap_get(i, 0, &max_power);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
|
Logger::debug("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
|
||||||
else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
|
else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
|
||||||
|
|
||||||
//? Get temp_max
|
//? Get temp_max
|
||||||
int64_t temp_max;
|
int64_t temp_max;
|
||||||
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
|
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110");
|
Logger::debug("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110");
|
||||||
else gpus[offset].temp_max = (long long)temp_max;
|
else gpus[offset].temp_max = (long long)temp_max;
|
||||||
}
|
}
|
||||||
initialized = true;
|
initialized = true;
|
||||||
@ -1110,14 +1121,14 @@ namespace Gpu {
|
|||||||
uint32_t utilization;
|
uint32_t utilization;
|
||||||
result = rsmi_dev_busy_percent_get(i, &utilization);
|
result = rsmi_dev_busy_percent_get(i, &utilization);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get GPU utilization");
|
Logger::debug("ROCm SMI: Failed to get GPU utilization");
|
||||||
else
|
else
|
||||||
gpus_slice[i].gpu_percent.push_back((long long)utilization);
|
gpus_slice[i].gpu_percent.push_back((long long)utilization);
|
||||||
|
|
||||||
//? Memory utilization
|
//? Memory utilization
|
||||||
result = rsmi_dev_memory_busy_percent_get(i, &utilization);
|
result = rsmi_dev_memory_busy_percent_get(i, &utilization);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get VRAM utilization");
|
Logger::debug("ROCm SMI: Failed to get VRAM utilization");
|
||||||
else
|
else
|
||||||
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization);
|
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization);
|
||||||
|
|
||||||
@ -1125,19 +1136,19 @@ namespace Gpu {
|
|||||||
rsmi_frequencies_t frequencies;
|
rsmi_frequencies_t frequencies;
|
||||||
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &frequencies);
|
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &frequencies);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get GPU clock speed: ");
|
Logger::debug("ROCm SMI: Failed to get GPU clock speed: ");
|
||||||
else gpus_slice[i].gpu_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
else gpus_slice[i].gpu_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
||||||
|
|
||||||
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &frequencies);
|
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &frequencies);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get VRAM clock speed: ");
|
Logger::debug("ROCm SMI: Failed to get VRAM clock speed: ");
|
||||||
else gpus_slice[i].mem_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
else gpus_slice[i].mem_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
||||||
|
|
||||||
//? Power usage & state
|
//? Power usage & state
|
||||||
uint64_t power;
|
uint64_t power;
|
||||||
result = rsmi_dev_power_ave_get(i, 0, &power);
|
result = rsmi_dev_power_ave_get(i, 0, &power);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get GPU power usage");
|
Logger::debug("ROCm SMI: Failed to get GPU power usage");
|
||||||
else
|
else
|
||||||
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
||||||
gpus_slice[i].pwr_state = 32; // NVML_PSTATE_UNKNOWN; won't display in GUI
|
gpus_slice[i].pwr_state = 32; // NVML_PSTATE_UNKNOWN; won't display in GUI
|
||||||
@ -1147,7 +1158,7 @@ namespace Gpu {
|
|||||||
int64_t temp;
|
int64_t temp;
|
||||||
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temp);
|
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temp);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::error("ROCm SMI: Failed to get GPU temperature");
|
Logger::debug("ROCm SMI: Failed to get GPU temperature");
|
||||||
else gpus_slice[i].temp.push_back((long long)temp/1000);
|
else gpus_slice[i].temp.push_back((long long)temp/1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1155,13 +1166,13 @@ namespace Gpu {
|
|||||||
uint64_t total, used;
|
uint64_t total, used;
|
||||||
result = rsmi_dev_memory_total_get(i, RSMI_MEM_TYPE_VRAM, &total);
|
result = rsmi_dev_memory_total_get(i, RSMI_MEM_TYPE_VRAM, &total);
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::error("ROCm SMI: Failed to get total VRAM");
|
Logger::debug("ROCm SMI: Failed to get total VRAM");
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].mem_total = total;
|
gpus_slice[i].mem_total = total;
|
||||||
|
|
||||||
result = rsmi_dev_memory_usage_get(i, RSMI_MEM_TYPE_VRAM, &used);
|
result = rsmi_dev_memory_usage_get(i, RSMI_MEM_TYPE_VRAM, &used);
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::error("ROCm SMI: Failed to get VRAM usage");
|
Logger::debug("ROCm SMI: Failed to get VRAM usage");
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].mem_used = used;
|
gpus_slice[i].mem_used = used;
|
||||||
|
|
||||||
@ -1174,7 +1185,7 @@ namespace Gpu {
|
|||||||
uint64_t tx, rx;
|
uint64_t tx, rx;
|
||||||
result = rsmi_dev_pci_throughput_get(i, &tx, &rx, 0);
|
result = rsmi_dev_pci_throughput_get(i, &tx, &rx, 0);
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::error("ROCm SMI: Failed to get PCIe throughput");
|
Logger::debug("ROCm SMI: Failed to get PCIe throughput");
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].pcie_tx = (long long)tx;
|
gpus_slice[i].pcie_tx = (long long)tx;
|
||||||
gpus_slice[i].pcie_rx = (long long)rx;
|
gpus_slice[i].pcie_rx = (long long)rx;
|
||||||
@ -1195,6 +1206,8 @@ namespace Gpu {
|
|||||||
auto collect(bool no_update) -> vector<gpu_info>& {
|
auto collect(bool no_update) -> vector<gpu_info>& {
|
||||||
if (Runner::stopping or (no_update and not gpus.empty())) return gpus;
|
if (Runner::stopping or (no_update and not gpus.empty())) return gpus;
|
||||||
|
|
||||||
|
// DebugTimer gpu_timer("GPU Total");
|
||||||
|
|
||||||
//* Collect data
|
//* Collect data
|
||||||
Nvml::collect(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
|
Nvml::collect(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
|
||||||
Rsmi::collect(gpus.data() + Nvml::device_count); // size = Rsmi::device_count
|
Rsmi::collect(gpus.data() + Nvml::device_count); // size = Rsmi::device_count
|
||||||
|
Loading…
Reference in New Issue
Block a user