[ot][spam][crazy][crazy][spam]

Karl Semich 0xloem at gmail.com
Wed May 25 13:21:56 PDT 2022


 #include <iostream>
#include <unordered_map>
#include <string_view>
#include <fstream>
#include <sstream>

using strint_it = std::unordered_map<std::string,int>::iterator;

std::string_view find_most_useful_string(std::string const & doc)
{
  // measure all strings in doc in a naive way, finding the one with the
most repetitions.
  // stop when repetitions * length^2 decreases

  std::unordered_map<std::string_view, int> counts;

  int length = 1;
  std::string_view max_substr;

  for (int length = 1; length < doc.size() / 2; ++ length) {

    int max_count = 0;

    for (int offset = 0; offset < doc.size() - length; ++ offset) {
      std::string_view substr(doc.data() + offset, length);

      if (counts.count(substr)) continue; // already ran thru with this one

      int & count = counts[substr]; // reference to count for this substr

      for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2)
{
        std::string_view substr2(doc.data() + offset2, length);
        if (substr == substr2) {
          ++ count;

          if (count > max_count) {
            max_count = count;
            max_substr = substr;
          }
        }
      }
    }
    if (max_count > 1)
        std::cout << "len:" << length << " str:" << max_substr << " ct:" <<
max_count << std::endl;
  }
  return max_substr;
}

int main()
{
    std::ifstream stream("useful-string.cpp");
    std::ostringstream sstr;
    sstr << stream.rdbuf();
    find_most_useful_string(sstr.str());
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: text/html
Size: 2095 bytes
Desc: not available
URL: <https://lists.cpunks.org/pipermail/cypherpunks/attachments/20220525/6903c5af/attachment.txt>


More information about the cypherpunks mailing list