diff --git a/examples/heatmap/heatmap.cpp b/examples/heatmap/heatmap.cpp new file mode 100644 index 00000000..89a732b1 --- /dev/null +++ b/examples/heatmap/heatmap.cpp @@ -0,0 +1,45 @@ +// Compile with raptor-clang++ ./heatmap.cpp -O3 -g -o heatmap + +#include + +#include "raptor/raptor.h" + +template fty *__raptor_truncate_op_func(fty *, int, int, int, int); +template fty *__raptor_truncate_op_func(fty *, int, int, int); +template fty *__raptor_truncate_mem_func(fty *, int, int, int, int); + +#define frac 1e-4 +#define FROM 64 +#define TO 1, 5, 8 + +double foo (double in) { + double sum = in * 3.0; // Exact operation + double small = in * frac; // Ok for double, not for truncated + double cancel = in + small; + + return sum + cancel; +} + +int main (int argc, char * argv[]) { + + if (argc != 2) { + std::cerr << "Expected exactly one floating-point argument." << std::endl; + exit(1); + } + + double a = std::stod(argv[1]); + + double trunc = __raptor_expand_mem_value( + __raptor_truncate_mem_func(foo, FROM, TO)(__raptor_truncate_mem_value(a, FROM, TO)), + FROM, TO); + + double exact = a * 3.0 + a + a * frac; + + std::cout << "Exact: " << exact << "\n"; + std::cout << "Truncated: " << trunc << std::endl; + + __raptor_fprt_op_dump_status(10); + std::cout << __raptor_get_trunc_flop_count() << " ops were truncated." << std::endl; + + return 0; +} diff --git a/runtime/include/public/raptor/raptor.h b/runtime/include/public/raptor/raptor.h index 321e8614..8b011af8 100644 --- a/runtime/include/public/raptor/raptor.h +++ b/runtime/include/public/raptor/raptor.h @@ -23,6 +23,9 @@ void __raptor_fprt_delete_all(); long long __raptor_get_trunc_flop_count(); long long f_raptor_get_trunc_flop_count(); +void __raptor_fprt_op_dump_status(unsigned); +void f_raptor_fprt_op_dump_status(unsigned); + #define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ struct __raptor_logged_flops_##CPP_TY { \ CPP_TY *vals; \ diff --git a/runtime/ir/Mpfr.cpp b/runtime/ir/Mpfr.cpp index 3333faa6..02a1476e 100644 --- a/runtime/ir/Mpfr.cpp +++ b/runtime/ir/Mpfr.cpp @@ -190,6 +190,12 @@ void __raptor_fprt_ieee_16_count(int64_t exponent, int64_t significand, __RAPTOR_MPFR_ATTRIBUTES long long __raptor_get_trunc_flop_count(); +__RAPTOR_MPFR_ATTRIBUTES +void __raptor_fprt_op_dump_status(unsigned); + +__RAPTOR_MPFR_ATTRIBUTES +void f_raptor_fprt_op_dump_status(unsigned); + __RAPTOR_MPFR_ATTRIBUTES long long __raptor_get_double_flop_count(); @@ -267,6 +273,8 @@ void raptor_fprt_op_dump_status(int num); __RAPTOR_MPFR_ATTRIBUTES void raptor_fprt_op_clear(); +#define RAPTOR_FPRT_ENABLE_SHADOW_RESIDUALS + #ifdef RAPTOR_FPRT_ENABLE_SHADOW_RESIDUALS // #define SHADOW_ERR_REL 6.25e-1 // // #define SHADOW_ERR_ABS 6.25e-1 // If reference is 0. @@ -275,6 +283,8 @@ void raptor_fprt_op_clear(); // #define SHADOW_ERR_REL 6.0e-8 // // #define SHADOW_ERR_ABS 6.0e-8 // If reference is 0. +extern bool excl_trunc; + // TODO this is a bit sketchy if the user cast their float to int before calling // this. We need to detect these patterns #define __RAPTOR_MPFR_LROUND(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, RET, ARG1, \ @@ -427,11 +437,15 @@ void raptor_fprt_op_clear(); double err = __raptor_fprt_##FROM_TYPE##_abs_err(trunc, mc->shadow); \ if (!opdata[loc].count) \ opdata[loc].op = #LLVM_OP_NAME; \ + printf(#LLVM_OP_NAME": trunc = %e err = %e err/trunc = %e", trunc, err, err/trunc); \ if (trunc != 0 && err / trunc > SHADOW_ERR_REL) { \ ++opdata[loc].count_thresh; \ + printf(" (flagged)"); \ } else if (trunc == 0 && err > SHADOW_ERR_ABS) { \ ++opdata[loc].count_thresh; \ + printf(" (flagged)"); \ } \ + printf("\n"); \ opdata[loc].l1_err += err; \ ++opdata[loc].count; \ return __raptor_fprt_ptr_to_##FROM_TYPE(mc); \ diff --git a/runtime/obj/Counting.cpp b/runtime/obj/Counting.cpp index 1c78071c..5c4c23ba 100644 --- a/runtime/obj/Counting.cpp +++ b/runtime/obj/Counting.cpp @@ -102,82 +102,17 @@ bool __op_dump_cmp(std::pair &a, } __RAPTOR_MPFR_ATTRIBUTES -void raptor_fprt_op_dump_status(unsigned num) { - // int size, rank; - // MPI_Comm_size(MPI_COMM_WORLD, &size); - // MPI_Comm_rank(MPI_COMM_WORLD, &rank); +void __raptor_fprt_op_dump_status(unsigned num) { if (opdata.size() < num) num = opdata.size(); - // if (rank == 0) { std::cerr << "Information about top " << num << " operations." << std::endl; - // } - // std::vector key_recvcounts(size); - // std::vector key_displs(size); - // std::vector char_recvcounts(size); - // std::vector char_displs(size); - // std::vector key_chars; - // std::vector key_sizes; std::vector> od_vec; std::vector l1_vec; std::vector ct_vec, c_vec; - // // Synchronize keys between processes. - // // Build explicit char vector of keys. - // // Collect size of each individual key. - // for (auto& it : opdata) { - // int sz = 0; - // for (char* c = it.first, *c, ++c) { - // key_chars.push_back(c); - // ++sz; - // } - // key_sizes.push_back(sz); - // } - // assert(op_data.size() == key_sizes.size()); - - // key_recvcounts[rank] = op_data.size(); - // char_counts[rank] = key_chars.size(); - - // MPI_Allgather(MPI_IN_PLACE, 0, NULL, - // key_counts.data(), 1, MPI_UNSIGNED_LONG_LONG, - // MPI_COMM_WORLD); - // MPI_Allgather(MPI_IN_PLACE, 0, NULL, - // char_counts.data(), 1, MPI_UNSIGNED_LONG_LONG, - // MPI_COMM_WORLD); - - // unsigned long long key_recvcounts_sum = 0; - // unsigned long long char_recvcounts_sum = 0; - // for (unsigned long long i = 0; i < size; ++i) { - // key_displs[i] = key_recvcounts_sum; - // char_displs[i] = char_recvcounts_sum; - // key_recvcounts_sum += key_recvcounts[i]; - // char_recvcounts_sum += char_recvcounts[i]; - // } - // std::vector key_sizes_recv(key_recvcount_sum); - // std::vector key_chars_recv(char_recvcounts_sum); - - // MPI_Allgatherv(key_sizes.data(), key_sizes.size(), MPI_UNSIGNED_LONG_LONG, - // key_sizes_recv.data(), key_recvcounts.data(), - // key_displs.data(), MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD); - // MPI_Allgatherv(key_chars.data(), key_chars.size(), MPI_CHAR, - // key_chars_recv.data(), char_recvcounts.data(), - // char_displs.data(), MPI_CHAR, MPI_COMM_WORLD); - - // // Build strings - // std::vector keys; - // unsigned long long char_idx = 0; - // for (unsigned long long i = 0; i < key_recvcount_sum; ++i) { - // keys.push_back(std::string(key_chars[char_idx], key_sizes[i])); - // char_idx += key_sizes[i]; - // } - - // // Make sure every key is represented in local opdata map - // for (auto& key : keys) { - // opdata.insert(key, struct __raptor_op{"FILL", 0, 0, 0}); - // } - // The order of iteration over keys will be the same on all processes. for (auto &it : opdata) { od_vec.push_back(it); @@ -186,27 +121,6 @@ void raptor_fprt_op_dump_status(unsigned num) { c_vec.push_back(it.second.count); } - // Perform an allreduce over opdata elements stored in the vector. - // if (rank == 0) { - // MPI_Reduce(MPI_IN_PLACE, l1_vec.data(), od_vec.size(), MPI_DOUBLE, - // MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(MPI_IN_PLACE, ct_vec.data(), - // od_vec.size(), MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - // MPI_Reduce(MPI_IN_PLACE, c_vec.data(), od_vec.size(), MPI_DOUBLE, - // MPI_SUM, 0, MPI_COMM_WORLD); - // } else { - // MPI_Reduce(l1_vec.data(), NULL, od_vec.size(), MPI_DOUBLE, MPI_SUM, 0, - // MPI_COMM_WORLD); MPI_Reduce(ct_vec.data(), NULL, od_vec.size(), - // MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( c_vec.data(), NULL, - // od_vec.size(), MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - // } - - // if (rank == 0) { - // for (int i = 0; i < od_vec.size(); ++i) { - // od_vec[i].second.l1_err = l1_vec[i]; - // od_vec[i].second.count_thresh = ct_vec[i]; - // od_vec[i].second.count = c_vec[i]; - // } - std::sort(od_vec.begin(), od_vec.end(), __op_dump_cmp); auto end = od_vec.begin() + num; @@ -217,7 +131,11 @@ void raptor_fprt_op_dump_status(unsigned num) { << " Ignored " << it->second.count_ignore << " times." << std::endl; } - // } +} + +__RAPTOR_MPFR_ATTRIBUTES +void f_raptor_fprt_op_dump_status(unsigned num) { + return __raptor_fprt_op_dump_status(num); } long long __raptor_get_memory_access_trunc_store() {