[ot][spam][crazy][crazy][spam]
Karl Semich
0xloem at gmail.com
Wed May 25 13:21:56 PDT 2022
#include <iostream>
#include <unordered_map>
#include <string_view>
#include <fstream>
#include <sstream>
using strint_it = std::unordered_map<std::string,int>::iterator;
std::string_view find_most_useful_string(std::string const & doc)
{
// measure all strings in doc in a naive way, finding the one with the
most repetitions.
// stop when repetitions * length^2 decreases
std::unordered_map<std::string_view, int> counts;
int length = 1;
std::string_view max_substr;
for (int length = 1; length < doc.size() / 2; ++ length) {
int max_count = 0;
for (int offset = 0; offset < doc.size() - length; ++ offset) {
std::string_view substr(doc.data() + offset, length);
if (counts.count(substr)) continue; // already ran thru with this one
int & count = counts[substr]; // reference to count for this substr
for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2)
{
std::string_view substr2(doc.data() + offset2, length);
if (substr == substr2) {
++ count;
if (count > max_count) {
max_count = count;
max_substr = substr;
}
}
}
}
if (max_count > 1)
std::cout << "len:" << length << " str:" << max_substr << " ct:" <<
max_count << std::endl;
}
return max_substr;
}
int main()
{
std::ifstream stream("useful-string.cpp");
std::ostringstream sstr;
sstr << stream.rdbuf();
find_most_useful_string(sstr.str());
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: text/html
Size: 2095 bytes
Desc: not available
URL: <https://lists.cpunks.org/pipermail/cypherpunks/attachments/20220525/6903c5af/attachment.txt>
More information about the cypherpunks
mailing list