Tokenise characters in SpaCy
import spacy
nlp = spacy.load('en_core_web_md')
def tokenise(words):
res = []
for word in words:
doc = nlp(word)
for token in doc:
res.append(token.text)
return res
sample_words = ["hi and hello", "thank you and goodnight"]
result = tokenise(sample_words)
print(result)
['hi', 'and', 'hello', 'thank', 'you', 'and', 'goodnight']