This goes in colab notebook, referenced at [git notes url in a thread]. import urllib.request import io import sentencepiece as spm # Loads model from URL as iterator and stores the model to BytesIO. model = io.BytesIO() with urllib.request.urlopen( '[1]https://raw.githubusercontent.com/google/sentencepiece/master/data/botch an.txt' ) as response: spm.SentencePieceTrainer.train( sentence_iterator=response, model_writer=model, vocab_size=1000) # Serialize the model as file. # with open('out.model', 'wb') as f: # f.write(model.getvalue()) # Directly load the model from serialized model. sp = spm.SentencePieceProcessor(model_proto=model.getvalue()) print(sp.encode('this is test')) References 1. https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt