From d0a8934b7054fc20a5bc78f52fe05f33e89c9336 Mon Sep 17 00:00:00 2001 From: Houjun Liu Date: Sun, 22 Oct 2023 23:09:09 -0700 Subject: [PATCH] skip sample if we hit a sample which has no content --- stanza/models/pos/data.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/stanza/models/pos/data.py b/stanza/models/pos/data.py index 2c94377b57..c26e8d87da 100644 --- a/stanza/models/pos/data.py +++ b/stanza/models/pos/data.py @@ -1,3 +1,4 @@ +from os import set_inheritable import random import logging import torch @@ -196,6 +197,13 @@ def __getitem__(self, key): # get each character from the input sentnece chars = [w for sent in char for w in sent] + # augmentation, etc. resulted in nothing; we return a different sample + if len(chars) == 0: + if key == 0: + return self[-1] + else: + return self[key-1] + return DataSample(words, chars, upos, xpos, ufeats, pretrained, sample[6]), key def __iter__(self):