Cross-posts happened to other thread. This looks useful: [1]https://github.com/google/sentencepiece/blob/master/python/README.md #training-without-local-filesystem import urllib.request import io import sentencepiece as spm # Loads model from URL as iterator and stores the model to BytesIO. model = io.BytesIO() with urllib.request.urlopen( '[2]https://raw.githubusercontent.com/google/sentencepiece/master/data/botch an.txt' ) as response: spm.SentencePieceTrainer.train( sentence_iterator=response, model_writer=model, vocab_size=1000) # Serialize the model as file. # with open('out.model', 'wb') as f: # f.write(model.getvalue()) # Directly load the model from serialized model. sp = spm.SentencePieceProcessor(model_proto=model.getvalue()) print(sp.encode('this is test')) References 1. https://github.com/google/sentencepiece/blob/master/python/README.md#training-without-local-filesystem 2. https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt