#include #include #include #include #include using strint_it = std::unordered_map::iterator; std::string_view find_most_useful_string(std::string const & doc) { // measure all strings in doc in a naive way, finding the one with the most repetitions. // stop when repetitions * length^2 decreases std::unordered_map counts; int length = 1; std::string_view max_substr; for (int length = 1; length < doc.size() / 2; ++ length) { int max_count = 0; for (int offset = 0; offset < doc.size() - length; ++ offset) { std::string_view substr(doc.data() + offset, length); if (counts.count(substr)) continue; // already ran thru with this one int & count = counts[substr]; // reference to count for this substr for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2) { std::string_view substr2(doc.data() + offset2, length); if (substr == substr2) { ++ count; if (count > max_count) { max_count = count; max_substr = substr; } } } } if (max_count > 1) std::cout << "len:" << length << " str:" << max_substr << " ct:" << max_count << std::endl; } return max_substr; } int main() { std::ifstream stream("useful-string.cpp"); std::ostringstream sstr; sstr << stream.rdbuf(); find_most_useful_string(sstr.str()); }