From 132072fe5c611e110adff9e588273a9057068060 Mon Sep 17 00:00:00 2001 From: Ivan Vankov Date: Thu, 27 Apr 2023 12:19:54 +0300 Subject: [PATCH 1/2] limit max col rank to 255 --- .../com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala index d6aba2e78b1fb9..32fd750ff36dc2 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala @@ -187,6 +187,7 @@ class TapasEncoder( protected val MAX_YEAR = 2120 protected val MIN_NUMBER_OF_ROWS_WITH_VALUES_PROPORTION = 0.5f + protected val MAX_COLUMN_RANK = 255 protected val ORDINAL_SUFFIXES: Array[String] = Array("st", "nd", "rd", "th") protected val NUMBER_WORDS: Array[String] = Array( @@ -525,9 +526,9 @@ class TapasEncoder( columnIds = setMaxSentenceLimit(emptyTokenTypes ++ columnIds ++ padding), rowIds = setMaxSentenceLimit(emptyTokenTypes ++ rowIds ++ padding), prevLabels = setMaxSentenceLimit(emptyTokenTypes ++ prevLabels ++ padding), - columnRanks = setMaxSentenceLimit(emptyTokenTypes ++ columnRanks ++ padding), + columnRanks = setMaxSentenceLimit(emptyTokenTypes ++ columnRanks.map(x => scala.math.min(x, MAX_COLUMN_RANK)) ++ padding), invertedColumnRanks = - setMaxSentenceLimit(emptyTokenTypes ++ invertedColumnRanks ++ padding), + setMaxSentenceLimit(emptyTokenTypes ++ invertedColumnRanks.map(x => scala.math.min(x, MAX_COLUMN_RANK)) ++ padding), numericRelations = setMaxSentenceLimit(emptyTokenTypes ++ numericRelations ++ padding)) } From 61301855d8e1b5dea94b882c88c0368194a01d5a Mon Sep 17 00:00:00 2001 From: Ivan Vankov Date: Thu, 27 Apr 2023 13:19:19 +0300 Subject: [PATCH 2/2] prettify --- .../johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala index 32fd750ff36dc2..d178bf11f06313 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/tapas/TapasEncoder.scala @@ -526,9 +526,10 @@ class TapasEncoder( columnIds = setMaxSentenceLimit(emptyTokenTypes ++ columnIds ++ padding), rowIds = setMaxSentenceLimit(emptyTokenTypes ++ rowIds ++ padding), prevLabels = setMaxSentenceLimit(emptyTokenTypes ++ prevLabels ++ padding), - columnRanks = setMaxSentenceLimit(emptyTokenTypes ++ columnRanks.map(x => scala.math.min(x, MAX_COLUMN_RANK)) ++ padding), - invertedColumnRanks = - setMaxSentenceLimit(emptyTokenTypes ++ invertedColumnRanks.map(x => scala.math.min(x, MAX_COLUMN_RANK)) ++ padding), + columnRanks = setMaxSentenceLimit( + emptyTokenTypes ++ columnRanks.map(x => scala.math.min(x, MAX_COLUMN_RANK)) ++ padding), + invertedColumnRanks = setMaxSentenceLimit(emptyTokenTypes ++ invertedColumnRanks.map(x => + scala.math.min(x, MAX_COLUMN_RANK)) ++ padding), numericRelations = setMaxSentenceLimit(emptyTokenTypes ++ numericRelations ++ padding)) }