std::string find_most_useful_string(std::string const & doc)
{
// measure all strings in doc in a naive way, finding the one with
the most repetitions.
// stop when repetitions * length^2 decreases
std::unordered_map<std::string, int> counts;
int length = 1;
for (int length = 1; length < doc.size() / 2; ++ length) {
int max_count = 0;
std::string_view max_substr;
for (int offset = 0; offset < doc.size() - length; ++ offset) {
std::stringview substr(doc.begin() + offset, doc.begin() + offset + length);
std::pair<strint_it, bool> insertion = counts.-nsert({substr, 0});
if (!insertion.second) continue; // already ran thru with this one
int & count = insertion.first->second; // reference to count for this substr
for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2) {
std::stringview substr2(doc.begin() + offset2, doc.begin() + offset2 + length);
if (substr == substr2) {
++ count;
if (count > max_count) {
max_count = count;
max_substr = substr;
}
}
}
}
std::cout << "len:" << length << " str:" << max_substr << " ct:" << max_count << std::endl;
}
}