// etabench // Copyright (C) 2022 xaizek <xaizek@posteo.net> // // This file is part of etabench. // // etabench is free software: you can redistribute it and/or modify // it under the terms of version 3 of the GNU General Public License // as published by the Free Software Foundation. // // etabench is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with etabench. If not, see <https://www.gnu.org/licenses/>. #include "Report.hpp" #include <cmath> #include <algorithm> #include <filesystem> #include <optional> #include <ranges> #include <string> #include <utility> #include <vector> #include <fmt/color.h> #include <fmt/core.h> #include <GnuPlotScripting/GnuPlotScripting.hpp> #include "utils/float.hpp" #include "utils/os.hpp" #include "utils/predicate.hpp" #include "core.hpp" namespace fs = std::filesystem; // Similarity (or rather "accuracy"?) measurement // // Similarity of an algorithm is an average of similarities of running it on // different speed profiles. // // A speed profile similarity is measured against an ideal ETA for the speed // profile via cosine similarity metric combined with magnitude correction. // // If we want to have similarity as a percentage, profile similarity should: // 1. Be symmetric? We control order, so this might not be required. // 2. Not ignore magnitude (cosine similarity alone does). // 3. Not ignore shifts. // // A non-normalized similarity will work too if we just sum similarities of // profiles, but they have to be monotonic and have common lower bound greater // or equal to 0. // Different enough colors for plot lines. static const int Colors[] = { 0xff2020, 0x20ff20, 0x2020ff, 0xffa500, 0xa0a0a0, 0xff1493, 0x42d4f4, 0xf032e6, 0x469990, 0x9A6324, 0x911eb4, 0x800000, 0x7fdfb4, 0x000075, }; Report::Report() { GnuPlotScripting::global_config().set_logger(); } void Report::addResult(const EtaAlg &alg, const Profile &profile, const std::vector<EtaPoint> &points) { AlgReport &ar = getAlg(alg); ProfileReport &pr = ar.profiles.emplace_back(profile.getName()); const int fullEta = points.back().time + 1; // XXX: can be empty? long long expectedInt = 0, actualInt = 0; double top = 0; double b1 = 0, b2 = 0; pr.points.reserve(points.size()); for (const EtaPoint &p : points) { EtaPointReport &r = pr.points.emplace_back(); r.time = p.time; r.estimate = p.estimate; r.speed = p.speed; int correctEta = fullEta - r.time; r.error = r.estimate - correctEta; top += 1.0*correctEta*r.estimate; expectedInt += correctEta; actualInt += r.estimate; b1 += 1.0*correctEta*correctEta; b2 += 1.0*r.estimate*r.estimate; } // Cosine similarity. if (!floatEq(b1, 0) && !floatEq(b2, 0)) { pr.similarity = top/(std::sqrt(b1)*std::sqrt(b2)); } else { pr.similarity = 0; } // Try to account for magnitude differences and still stay in [0; 1] range. auto [minInt, maxInt] = std::minmax(expectedInt, actualInt); pr.similarity *= 1.0f*minInt/maxInt; // Incremental average. ar.similarity += (pr.similarity - ar.similarity)/ar.profiles.size(); } AlgReport & Report::getAlg(const EtaAlg &alg) { const std::string &name = alg.getName(); for (AlgReport &r : algs) { if (r.name == name) { return r; } } return algs.emplace_back(name); } void Report::plot(const std::string &outDir, const std::string &montageFile) { using namespace GnuPlotScripting; if (algs.empty()) { return; } std::optional<TempDir> tempDir; std::string plotDir = outDir; if (!plotDir.empty()) { if (fs::exists(plotDir)) { throw std::runtime_error( fmt::format("Plotting directory '{}' already exists", plotDir) ); } fs::create_directory(plotDir); } else if (!montageFile.empty()) { tempDir.emplace("etabench-montage"); plotDir = tempDir->getPath(); } std::ranges::sort(algs, makeRPredicate(&AlgReport::similarity)); for (int p = 0; auto &pr : algs[0].profiles) { std::string root = fmt::format("{}/{}", plotDir, pr.name); fs::create_directory(root); std::vector<int> time; std::vector<int> speed; std::vector<int> points; const float fullEta = pr.points.back().time + 1; // XXX: can be empty? for (unsigned int i = 0; i < pr.points.size(); ++i) { time.push_back(pr.points[i].time); speed.push_back(pr.points[i].speed); float correctEta = fullEta - pr.points[i].time; points.push_back(correctEta); } Data_Vector ideal(time, points); Data_Vector profile(time, speed); Script_File script(fmt::format("{}/eta.gp", root), Script_File_Mode_Enum::Silent); script.free_form(fmt::runtime("set xlabel 'Time (s)'")); script.free_form(fmt::runtime("set grid")); script.free_form(fmt::runtime("set terminal pngcairo size 1024,768")); script.free_form(fmt::runtime("set output '{}/eta.png'"), root); script.free_form(fmt::runtime("set multiplot layout 2,1")); std::vector<Data_Vector> actuals; for (auto &alg : algs) { ProfileReport &pr = alg.profiles[p]; std::vector<float> actual; for (unsigned int i = 0; i < pr.points.size(); ++i) { actual.push_back(pr.points[i].estimate); } actuals.emplace_back(time, actual); } script.free_form(fmt::runtime("set title '{} Profile'"), pr.name); script.free_form(fmt::runtime("set ylabel 'Speed (b)'")); script.free_form(fmt::runtime("set style fill solid 0.5")); script.free_form(fmt::runtime("plot {} smooth freq w boxes " "lc rgb\"green\" notitle"), profile); script.cache(ideal); for (unsigned int i = 0; i < algs.size(); ++i) { script.cache(actuals[i]); } script.free_form(fmt::runtime("set title '{} Runs'"), pr.name); script.free_form(fmt::runtime("set ylabel 'ETA (s)'")); script.free_part_form(fmt::runtime("plot ")); for (int i = algs.size(); auto &alg : algs | std::views::reverse) { --i; script.free_part_form(fmt::runtime("{} lw 2 lc rgb \"#{:x}\"" " with lines title '{}', "), actuals[i], Colors[i % std::size(Colors)], alg.name); } script.free_part_form(fmt::runtime("{} lw 3 lc 'black' with lines" " title '{}'\n"), ideal, "ideal"); ++p; } if (!montageFile.empty()) { auto profNames = algs[0].profiles | std::views::transform(&ProfileReport::name); std::vector<std::string> names(profNames.begin(), profNames.end()); std::ranges::sort(names); std::vector<std::string> command = { "montage" }; for (const std::string &name : names) { command.emplace_back(fmt::format("{}/{}/eta.png", plotDir, name)); } command.insert(command.cend(), { "-geometry", "+2+2", "-frame", "5", "-mattecolor", "black", montageFile }); queryProc(std::move(command)); } } void Report::print(bool verbose) { std::ranges::sort(algs, makeRPredicate(&AlgReport::similarity)); if (verbose) { for (auto &alg : algs) { std::string algTitle = fmt::format("{} {:.2f}%", alg.name, alg.similarity*100); fmt::print("{}\n", algTitle); fmt::print("{:=<{}}\n\n", "", algTitle.size()); for (auto &profile : alg.profiles) { std::string profileTitle = fmt::format("{}::{} {:.2f}%", alg.name, profile.name, profile.similarity*100); fmt::print("{}\n", profileTitle); fmt::print("{:-<{}}\n\n", "", profileTitle.size()); print(profile); fmt::print("\n"); } } fmt::print("\n"); } std::string caption = "Profile \\ Alg"; auto profNameLens = algs[0].profiles | std::views::transform(&ProfileReport::name) | std::views::transform(&std::string::size); int maxProfNameLen = std::max(caption.length(), std::ranges::max(profNameLens)); // XXX: assumption is that every algorithm is run for every profile. int rankWidth = std::floor(std::log10(algs.size())) + 1; fmt::print(fmt::emphasis::bold, "{:<{}}", caption, maxProfNameLen + 2); for (int i = 0; i < (int)algs.size(); ++i) { fmt::print(fmt::emphasis::bold, "{:>{}} ", fmt::format("#{:<{}}", i + 1, rankWidth), 7 + rankWidth); } fmt::print("\n"); for (int i = 0; auto &profile : algs[0].profiles) { fmt::print("{:<{}} ", profile.name, maxProfNameLen); auto sims = algs | std::views::transform([i](AlgReport &ar) { return ar.profiles[i].similarity; }); std::vector<float> simRanks(sims.begin(), sims.end()); std::ranges::sort(simRanks, std::greater<>()); auto dups = std::ranges::unique(simRanks, &floatEq<0.0001f>); simRanks.erase(dups.begin(), dups.end()); for (auto &alg : algs) { float similarity = alg.profiles[i].similarity; auto pos = std::ranges::find_if(simRanks, [similarity](float a) { return floatEq<0.0001f>(a, similarity); }); int rank = pos - simRanks.begin(); fmt::text_style style; if (rank == 0) { style |= fmt::fg(fmt::color::light_green); } else if (rank == (int)simRanks.size() - 1) { style |= fmt::fg(fmt::color::pale_violet_red); } else { style |= fmt::fg(fmt::color::light_golden_rod_yellow); } fmt::print(style, "{:6.2f}:{:<{}} ", similarity*100, rank + 1, rankWidth); } fmt::print("\n"); ++i; } fmt::print(fmt::emphasis::bold, "\nAlgorithm ranking\n"); for (int i = 0; i < (int)algs.size(); ++i) { fmt::print(" {:{}}. {:5.2f}% - {}\n", i + 1, rankWidth, algs[i].similarity*100, algs[i].name); } } void Report::print(ProfileReport &pr) { const float fullEta = pr.points.back().time + 1; // XXX: can be empty? for (unsigned int i = 0; i < pr.points.size(); i += 20) { int limit = std::min<int>(pr.points.size(), i + 20); fmt::print("t: "); for (int j = i; j < limit; ++j) { fmt::print("[ {:02} ]", pr.points[j].time); } fmt::print("\n"); fmt::print("s: "); for (int j = i; j < limit; ++j) { fmt::print("< {:02} >", pr.points[j].speed); } fmt::print("\n"); fmt::print("E: "); for (int j = i; j < limit; ++j) { fmt::print("{{ {:02} }}", pr.points[j].estimate); } fmt::print("\n"); fmt::print("e: "); for (int j = i; j < limit; ++j) { fmt::print("{:^+6}", pr.points[j].error); } fmt::print("\n"); fmt::print("r: "); for (int j = i; j < limit; ++j) { float correctEta = fullEta - pr.points[j].time; fmt::print("{:^+6.0f}", 100*pr.points[j].error/correctEta); } fmt::print("\n"); fmt::print("\n"); } auto errors = pr.points | std::views::transform(&EtaPointReport::error); std::ranges::sort(errors, std::less<float>()); int nerrors = pr.points.size(); int median; if (nerrors % 2 == 1) { median = errors[nerrors/2]; } else { median = (errors[nerrors/2 - 1] + errors[nerrors/2])/2; } fmt::print(" Min error: {:5} ({:.2f}%)\n", errors.front(), 100*errors.front()/fullEta); fmt::print("Median error: {:5} ({:.2f}%)\n", median, 100*median/fullEta); fmt::print(" Max error: {:5} ({:.2f}%)\n", errors.back(), 100*errors.back()/fullEta); }