
it looks like it could work :) i did a top_k of 16 and a very simple loop that fetched each index individually, it's miles faster, many avenues for tuning expecting to see garbage at the end tho :/ one of the interesting things is that it is actually downloading and storing these weights in sparse files ... it's really cool to see that near something useful it processes like 10 (maybe 20) layers and then gets ratelimited, which i haven't handled. this might be fixed with proper range headers. [it looks really interesting to implement sparse indices when nettensor does not yet support, but it's a different task from this, it will take looking near the tensor internals to see if the sparsity is exposed somewhere;;;;; def F_linear(input, weight, bias=None): #import pdb; pdb.set_trace() cls = NetTensor assert type(weight) is cls and type(input) is torch.Tensor assert bias is None or type(bias) is cls name = weight.safeslice.name.rsplit('.',1)[0] top_k = 16 #number_passes = math.ceil(weight.mem_usage_frac()) number_passes = math.ceil(weight[...,:top_k].mem_usage_frac()) input_mask_data = input.abs() while len(input_mask_data.shape) > 1: input_mask_data = input_mask_data.max(dim=-2).values top_k_indices = input_mask_data.sort(descending=True).indices[:top_k] top_k_indices = top_k_indices.sort().values input = input[...,top_k_indices] if number_passes == 1: product = torch.matmul( input, #weight.fetch(progress=name, validate_usage=False).T torch.stack([ weight[..., index].fetch(validate_usage=False) for index in top_k_indices.tolist() ], dim=-1).T, ) else: rows_at_once = math.ceil(weight.shape[0] / number_passes) product = torch.cat([ torch.matmul( input, # weight[offset : offset+rows_at_once].fetch(progress=f'row{offset}-{offset+rows_at_once}/{weight.shape[0]}', validate_usage=False).T torch.stack([ weight[offset : offset+rows_at_once, index].fetch(validate_usage=False) for index in top_k_indices.tolist() ], dim=-1).T, ) for offset in tqdm.tqdm(range( 0, weight.shape[0], rows_at_once ), desc=name, unit='blk', leave=False) ], dim=-1) if bias is None: return product File "/home/karl3/projects/httptransformer/netsafetensors.py", line 57, in read assert readsize == length ^^^^^^^^^^^^^^^^^^ AssertionError (Pdb) p buf[:readsize].tobytes().decode() '<!DOCTYPE html>\n<html class="" lang="en">\n<head>\n <meta charset="utf-8" />\n <meta\n name="viewport"\n content="width=device-width, initial-scale=1.0, user-scalable=no"\n />\n <meta\n name="description"\n content="We\'re on a journey to advance and democratize artificial intelligence through open source and open science."\n />\n <meta property="fb:app_id" content="1321688464574422" />\n <meta name="twitter:card" content="summary_large_image" />\n <meta name="twitter:site" content="@huggingface" />\n <meta\n property="og:title"\n content="Hugging Face - The AI community building the future."\n />\n <meta property="og:type" content="website" />\n\n <title>Hugging Face - The AI community building the future.</title>\n <style>\n body {\n margin: 0;\n }\n\n main {\n background-color: white;\n min-height: 100vh;\n padding: 7rem 1rem 8rem 1rem;\n text-align: center;\n font-family: Source Sans Pro, ui-sans-serif, system-ui, -apple-system,\n BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, Noto Sans,\n sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol,\n Noto Color Emoji;\n }\n\n img {\n width: 6rem;\n height: 6rem;\n margin: 0 auto 1rem;\n }\n\n h1 {\n font-size: 3.75rem;\n line-height: 1;\n color: rgba(31, 41, 55, 1);\n font-weight: 700;\n box-sizing: border-box;\n margin: 0 auto;\n }\n\n p, a {\n color: rgba(107, 114, 128, 1);\n font-size: 1.125rem;\n line-height: 1.75rem;\n max-width: 28rem;\n box-sizing: border-box;\n margin: 0 auto;\n }\n\n .dark main {\n background-color: rgb(11, 15, 25);\n }\n .dark h1 {\n color: rgb(209, 213, 219);\n }\n .dark p, .dark a {\n color: rgb(156, 163, 175);\n }\n </style>\n <script>\n // On page load or when changing themes, best to add inline in `head` to avoid FOUC\n const key = "_tb_global_settings";\n let theme = window.matchMedia("(prefers-color-scheme: dark)").matches\n ? "dark"\n : "light";\n try {\n const storageTheme = JSON.parse(window.localStorage.getItem(key)).theme;\n if (storageTheme) {\n theme = storageTheme === "dark" ? "dark" : "light";\n }\n } catch (e) {}\n if (theme === "dark") {\n document.documentElement.classList.add("dark");\n } else {\n document.documentElement.classList.remove("dark");\n }\n </script>\n</head>\n\n<body>\n<main>\n <img\n src="https://cdn-media.huggingface.co/assets/huggingface_logo.svg"\n alt=""\n />\n <div>\n <h1>429</h1>\n <p>We had to rate limit you. If you think it\'s an error, send us <a href="mailto:website@huggingface.co">an email</a></p>\n </div>\n</main>\n</body>\n</html>'