[ot][spam][crazy][crazy][spam]
Karl Semich
0xloem at gmail.com
Wed May 25 01:53:11 PDT 2022
this is a fun algorithm challenge for confused-beyond-words
using strint_it = std::unordered_map<std::string,int>::iterator;
std::string find_most_useful_string(std::string const & doc)
{
// measure all strings in doc in a naive way, finding the one with
the most repetitions.
// stop when repetitions * length^2 decreases
std::unordered_map<std::string, int> counts;
int length = 1;
for (int length = 1; length < doc.size() / 2; ++ length) {
int max_count = 0;
std::string_view max_substr;
for (int offset = 0; offset < doc.size() - length; ++ offset) {
std::stringview substr(doc.begin() + offset, doc.begin() + offset +
length);
std::pair<strint_it, bool> insertion = counts.-nsert({substr, 0});
if (!insertion.second) continue; // already ran thru with this one
int & count = insertion.first->second; // reference to count for this
substr
for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2)
{
std::stringview substr2(doc.begin() + offset2, doc.begin() +
offset2 + length);
if (substr == substr2) {
++ count;
if (count > max_count) {
max_count = count;
max_substr = substr;
}
}
}
}
std::cout << "len:" << length << " str:" << max_substr << " ct:" <<
max_count << std::endl;
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: text/html
Size: 2545 bytes
Desc: not available
URL: <https://lists.cpunks.org/pipermail/cypherpunks/attachments/20220525/c1aae882/attachment.txt>
More information about the cypherpunks
mailing list