Tokenise characters in SpaCy
import spacynlp = spacy.load('en_core_web_md')def tokenise(words):
res = []
for word in words:
doc = nlp(word)
for token in doc:
res.append(token.text)
return ressample_words = ["hi and hello", "thank you and goodnight"]result = tokenise(sample_words)
print(result)['hi', 'and', 'hello', 'thank', 'you', 'and', 'goodnight']