#include <iostream>
#include <unordered_map>
#include <string_view>
#include <fstream>
#include <sstream>
using strint_it = std::unordered_map<std::string,int>::iterator;
std::string_view find_most_useful_string(std::string const & doc)
{
// measure all strings in doc in a naive way, finding the one with the most repetitions.
// stop when repetitions * length^2 decreases
std::unordered_map<std::string_view, int> counts;
int length = 1;
std::string_view max_substr;
for (int length = 1; length < doc.size() / 2; ++ length) {
int max_count = 0;
for (int offset = 0; offset < doc.size() - length; ++ offset) {
std::string_view substr(doc.data() + offset, length);
if (counts.count(substr)) continue; // already ran thru with this one
int & count = counts[substr]; // reference to count for this substr
for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2) {
std::string_view substr2(doc.data() + offset2, length);
if (substr == substr2) {
++ count;
if (count > max_count) {
max_count = count;
max_substr = substr;
}
}
}
}
if (max_count > 1)
std::cout << "len:" << length << " str:" << max_substr << " ct:" << max_count << std::endl;
}
return max_substr;
}
int main()
{
std::ifstream stream("useful-string.cpp");
std::ostringstream sstr;
sstr << stream.rdbuf();
find_most_useful_string(sstr.str());
}