Skip to content

Commit

Permalink
fix(predictions): TABLE, CELL & KEY_VALUE_SET blocks are not properly…
Browse files Browse the repository at this point in the history
… processed (#660)

* in the middle of predictioin bug fix

* fix processText

* fix processText

* stash

* added missing table, cell processing and keyValueSet processing

* Updated integration tests

* removed configuration file

* 1st: fix PR comments

* 2nd: fix PR comments

* removed one comment and changed the name of three functions

* some minor comments fix

* Added IdentifyTextResultTransformers+Tables.swift and IdentifyTextResultTransformers.swift

* removed Podfile.lock

* Added podfile.lock

* disable empty_enum_arguments

* fix one empty_enum_type
  • Loading branch information
ruiguoamz authored Aug 10, 2020
1 parent 62d8c0c commit 8b63b70
Show file tree
Hide file tree
Showing 17 changed files with 608 additions and 490 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class IdentifyResultTransformers {
points.append(point)
}
return Polygon(points: points)

}

static func processPolygon(_ textractPolygonPoints: [AWSTextractPoint]?) -> Polygon? {
Expand All @@ -64,7 +63,6 @@ class IdentifyResultTransformers {
points.append(point)
}
return Polygon(points: points)

}

// swiftlint:disable cyclomatic_complexity
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//
// Copyright 2018-2020 Amazon.com,
// Inc. or its affiliates. All Rights Reserved.
//
// SPDX-License-Identifier: Apache-2.0
//

import Foundation
import Amplify
import AWSTextract

extension IdentifyTextResultTransformers {

static func processKeyValues(keyValueBlocks: [AWSTextractBlock],
blockMap: [String: AWSTextractBlock]) -> [BoundedKeyValue] {
var keyValues = [BoundedKeyValue]()
for keyValueBlock in keyValueBlocks {
if let keyValue = processKeyValue(keyValueBlock, blockMap: blockMap) {
keyValues.append(keyValue)
}
}
return keyValues
}

static func processKeyValue(_ keyBlock: AWSTextractBlock,
blockMap: [String: AWSTextractBlock]) -> BoundedKeyValue? {
guard keyBlock.blockType == .keyValueSet,
keyBlock.entityTypes?.contains("KEY") ?? false,
let relationships = keyBlock.relationships else {
return nil
}

var keyText = ""
var valueText = ""
var valueSelected = false

for keyBlockRelationship in relationships {
guard let ids = keyBlockRelationship.ids else {
continue
}

switch keyBlockRelationship.types {
case .child:
keyText = processChildOfKeyValueSet(ids: ids, blockMap: blockMap)
case .value:
let valueResult = processValueOfKeyValueSet(ids: ids, blockMap: blockMap)
valueText = valueResult.0
valueSelected = valueResult.1
default:
break
}
}

guard let boundingBox = processBoundingBox(keyBlock.geometry?.boundingBox) else {
return nil
}

guard let polygon = processPolygon(keyBlock.geometry?.polygon) else {
return nil
}

return BoundedKeyValue(key: keyText,
value: valueText,
isSelected: valueSelected,
boundingBox: boundingBox,
polygon: polygon)
}

static func processChildOfKeyValueSet(ids: [String],
blockMap: [String: AWSTextractBlock]) -> String {
var keyText = ""
for keyId in ids {
guard let keyBlock = blockMap[keyId],
let text = keyBlock.text,
case .word = keyBlock.blockType else {
continue
}
keyText += text + " "
}
return keyText.trimmingCharacters(in: .whitespacesAndNewlines)
}

static func processValueOfKeyValueSet(ids: [String],
blockMap: [String: AWSTextractBlock]) -> (String, Bool) {
var valueText = ""
var isSelected = false
var selectionItemFound = false

for valueId in ids {
guard let valueBlock = blockMap[valueId],
let valueBlockRelations = valueBlock.relationships else {
continue
}

for valueBlockRelation in valueBlockRelations {
guard let wordBlockIds = valueBlockRelation.ids else {
break
}

for wordBlockId in wordBlockIds {
guard let wordBlock = blockMap[wordBlockId] else {
continue
}
let wordValueBlockType = wordBlock.blockType
let selectionStatus = wordBlock.selectionStatus

switch wordValueBlockType {
case .word:
if let text = wordBlock.text {
valueText += text + " "
}
case .selectionElement:
if !selectionItemFound {
selectionItemFound = true
//TODO: https://github.com/aws-amplify/amplify-ios/issues/695
// Support multiple selection items found in a KeyValueSet
isSelected = selectionStatus == .selected
} else {
Amplify.log.error("Multiple selection items found in KeyValueSet")
}
default: break
}
}
}
}
return (valueText.trimmingCharacters(in: .whitespacesAndNewlines), isSelected)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//
// Copyright 2018-2020 Amazon.com,
// Inc. or its affiliates. All Rights Reserved.
//
// SPDX-License-Identifier: Apache-2.0
//

import Foundation
import Amplify
import AWSTextract

extension IdentifyTextResultTransformers {

static func processTables(tableBlocks: [AWSTextractBlock],
blockMap: [String: AWSTextractBlock]) -> [Table] {
var tables = [Table]()
for tableBlock in tableBlocks {
if let table = processTable(tableBlock, blockMap: blockMap) {
tables.append(table)
}
}
return tables
}

static func processTable(_ tableBlock: AWSTextractBlock,
blockMap: [String: AWSTextractBlock]) -> Table? {

guard let relationships = tableBlock.relationships,
case .table = tableBlock.blockType else {
return nil
}
var table = Table()
var rows = Set<Int>()
var cols = Set<Int>()

for tableRelation in relationships {
guard let cellIds = tableRelation.ids else {
continue
}

for cellId in cellIds {
guard let cellBlock = blockMap[cellId],
let rowIndex = cellBlock.rowIndex,
let colIndex = cellBlock.columnIndex
else {
continue
}

// textract starts indexing at 1, so subtract it by 1.
let row = Int(truncating: rowIndex) - 1
let col = Int(truncating: colIndex) - 1

if !rows.contains(row),
!cols.contains(row),
let cell = constructTableCell(cellBlock, blockMap) {
table.cells.append(cell)
rows.insert(row)
cols.insert(col)
}
}
}
table.rows = rows.count
table.columns = cols.count
return table
}

static func constructTableCell(_ block: AWSTextractBlock, _ blockMap: [String: AWSTextractBlock]) -> Table.Cell? {
guard block.blockType == .cell,
let relationships = block.relationships,
let rowSpan = block.rowSpan,
let columnSpan = block.columnSpan,
let geometry = block.geometry,
let textractBoundingBox = geometry.boundingBox,
let texttractPolygon = geometry.polygon
else {
return nil
}

let selectionStatus = block.selectionStatus
var words = ""
var isSelected = false
var selectionItemFound = false

for cellRelation in relationships {
guard let wordOrSelectionIds = cellRelation.ids else {
continue
}

for wordOrSelectionId in wordOrSelectionIds {
let wordOrSelectionBlock = blockMap[wordOrSelectionId]

switch wordOrSelectionBlock?.blockType {
case .word:
guard let text = wordOrSelectionBlock?.text else {
return nil
}
words += text + " "
case .selectionElement:
if !selectionItemFound {
selectionItemFound = true
//TODO: https://github.com/aws-amplify/amplify-ios/issues/695
// Support multiple selection items found in a KeyValueSet
isSelected = selectionStatus == .selected
} else {
Amplify.log.error("Multiple selection items found in KeyValueSet")
}
default:
break
}
}
}

guard let boundingBox = processBoundingBox(textractBoundingBox),
let polygon = processPolygon(texttractPolygon) else {
return nil
}

return Table.Cell(text: words,
boundingBox: boundingBox,
polygon: polygon,
isSelected: isSelected,
rowSpan: Int(truncating: rowSpan),
columnSpan: Int(truncating: columnSpan))
}
}
Loading

0 comments on commit 8b63b70

Please sign in to comment.