Skip to content

Commit

Permalink
Merge pull request #43 from mimiro-io/feature/simple-property-rename
Browse files Browse the repository at this point in the history
A number of features added
  • Loading branch information
gra-moore authored Mar 2, 2023
2 parents 3f96f5b + cfe9d70 commit 4923d9a
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 17 deletions.
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,25 @@ Depending on storage type and security requirements the configuration of each da
"idProperty": "field1",
"refs": [
"field2"
]
],
"columnMappings": {
"Lat" : "latitude",
"Long." : "longitude"
},
"columnTypes" : {
"coordinates" : "float",
"latitude" : "float",
"longitude" : "float"
},
"columnConcats" : {
"coordinates" : ["Lat.", "Long."]
},
"listValueColumns" : {
"coordinates" : ","
},
"defaults" : {
"type" : "feature"
},
},
"flatFile": {
"fields": {
Expand Down Expand Up @@ -257,6 +275,11 @@ property name | description
`decode.idProperty` | UDA entities require an `id` field. This field declares which object key to fetch the id value from. value prefixes from correlating `propertyPrefix` settings are also applied to the id value.
`decode.refs` | list of object keys that should be placed into refs instead of props. prefixes from propertiesPrefixes are still applied.
`decode.defaultNamespace` | One of the defined namespaces under `decode.namespaces`. Will be used for all properties not specified under `decode.propertyPrefixes`.
`decode.defaults` | list of default values for properties. Each key value pair in this object will be added to each row before being converted to an entity.
`decode.listValueColumns` | list of object keys that should be interpreted as list values. The value of each key is the separator used to split the raw value. The split values will be added to the entity as an array.
`decode.columnTypes` | list of object keys that should be interpreted as a specific type. The value of each key is the type to be used. Supported types are `int`, `float`, and `bool`.
`decode.columnMappings` | list of object keys that should be renamed. The value of each key is the new name to be used. The mapping occurs before any prefixes are applied.
`decode.columnConcats` | list of object keys that should be concatenated. The value of each key is the list of fields to be concatenated. ',' is used as a separator. The concatenation occurs before any prefixes or mappings are applied.
`flatFile.fields` | Map of field configs. The key will be the property name in the output entity.
`flatFile.fields.substring` | A two-dimensional array to declare string indices to use in substring. i.e. [[0,5]]
`flatFile.fields.type` | Declare type of the parsed field. Available types are string,int,float,date. Default: string.
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182aff
github.com/maxbrunsfeld/counterfeiter/v6 v6.5.0 h1:rBhB9Rls+yb8kA4x5a/cWxOufWfXt24E+kq4YlbGj3g=
github.com/maxbrunsfeld/counterfeiter/v6 v6.5.0/go.mod h1:fJ0UAZc1fx3xZhU4eSHQDJ1ApFmTVhp5VTpV9tm2ogg=
github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
github.com/mimiro-io/internal-go-util v0.0.0-20230103152345-7047a6298002 h1:ml8fEVYaGMHFXyODPVGqEv0yPgMWnhcZ1V8XlZfB/FE=
github.com/mimiro-io/internal-go-util v0.0.0-20230103152345-7047a6298002/go.mod h1:pkhOI7DpjtgAU3OcGi8LK4psGqVBGrxdt1o4z7GxvQg=
github.com/mimiro-io/internal-go-util v0.0.0-20230104075648-dc4d57772066 h1:fieRtrTLC4yaiFHlVsnielIotSXPpxt9SCRosGFWCFQ=
github.com/mimiro-io/internal-go-util v0.0.0-20230104075648-dc4d57772066/go.mod h1:pkhOI7DpjtgAU3OcGi8LK4psGqVBGrxdt1o4z7GxvQg=
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
Expand Down
17 changes: 11 additions & 6 deletions internal/conf/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,17 @@ type StorageBackend struct {
LocalFileConfig *LocalFileConfig `json:"localfileconfig"`
}
type DecodeConfig struct {
Namespaces map[string]string `json:"namespaces"`
PropertyPrefixes map[string]string `json:"propertyPrefixes"`
Refs []string `json:"refs"`
IdProperty string `json:"idProperty"`
DefaultNamespace string `json:"defaultNamespace"`
IgnoreColumns []string `json:"ignoreColumns"`
Namespaces map[string]string `json:"namespaces"`
PropertyPrefixes map[string]string `json:"propertyPrefixes"`
Refs []string `json:"refs"`
IdProperty string `json:"idProperty"`
DefaultNamespace string `json:"defaultNamespace"`
IgnoreColumns []string `json:"ignoreColumns"`
ColumnMappings map[string]string `json:"columnMappings"`
ColumnTypes map[string]string `json:"columnTypes"`
ListValueColumns map[string]string `json:"listValueColumns"`
Defaults map[string]string `json:"defaults"`
ConcatColumns map[string][]string `json:"columnConcats"`
}
type LocalFileConfig struct {
RootFolder string `json:"rootfolder"`
Expand Down
114 changes: 108 additions & 6 deletions internal/encoder/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"strconv"
"strings"

"github.com/mimiro-io/objectstorage-datalayer/internal/conf"
"go.uber.org/zap"
"golang.org/x/exp/slices"
"io"
"strings"
)

type EncodingEntityReader interface {
Expand Down Expand Up @@ -45,15 +47,46 @@ func toEntityBytes(line map[string]interface{}, backend conf.StorageBackend) ([]
newProps := map[string]interface{}{}
newRefs := map[string]interface{}{}

// add defaults if defined - this overwrites any existing values
if backend.DecodeConfig != nil && backend.DecodeConfig.Defaults != nil {
for k, v := range backend.DecodeConfig.Defaults {
line[k] = v
}
}

// iterate the concat columns and concat them into new fields
if backend.DecodeConfig != nil && backend.DecodeConfig.ConcatColumns != nil {
for k, v := range backend.DecodeConfig.ConcatColumns {
var sb strings.Builder
first := true
for _, col := range v {
if val, ok := line[col]; ok {
if !first {
sb.WriteString(",")
}
first = false
sb.WriteString(val.(string))
}
}
line[k] = sb.String()
}
}

ignoreColums := backend.DecodeConfig.IgnoreColumns
for k, v := range line {
if slices.Contains(ignoreColums, k) {
continue
}
if isRef(backend, k) {
withPrefix(newRefs, backend, k, v)
_, err = withPrefix(newRefs, backend, k, v)
if err != nil {
return nil, err
}
} else {
withPrefix(newProps, backend, k, v)
_, err = withPrefix(newProps, backend, k, v)
if err != nil {
return nil, err
}
}
}

Expand Down Expand Up @@ -81,8 +114,77 @@ func isRef(backend conf.StorageBackend, k string) bool {
return false
}

func withPrefix(m map[string]interface{}, backend conf.StorageBackend, k string, v interface{}) string {
func withPrefix(m map[string]interface{}, backend conf.StorageBackend, k string, v interface{}) (string, error) {

if backend.DecodeConfig != nil {
if backend.DecodeConfig.ColumnMappings != nil {
if mapped, ok := backend.DecodeConfig.ColumnMappings[k]; ok {
k = mapped
}
}

// if the value is multivalue process that first
isMultiValue := false
if backend.DecodeConfig.ListValueColumns != nil {
if mapped, ok := backend.DecodeConfig.ListValueColumns[k]; ok {
sv := v.(string)
if sv != "" {
tv := strings.Split(sv, mapped)
vs := make([]string, 0)
for _, s := range tv {
vs = append(vs, strings.TrimSpace(s))
}
v = vs
}
isMultiValue = true
}
}

// convert the value to the correct type - if list then apply to all values
if backend.DecodeConfig.ColumnTypes != nil {
if mapped, ok := backend.DecodeConfig.ColumnTypes[k]; ok {
if isMultiValue {
sv := v.([]string)
if mapped == "int" {
iv := make([]int, 0)
for _, s := range sv {
vv, _ := strconv.Atoi(s)
iv = append(iv, vv)
}
v = iv
} else if mapped == "float" {
iv := make([]float64, 0)
for _, s := range sv {
vv, _ := strconv.ParseFloat(s, 64)
iv = append(iv, vv)
}
v = iv
} else if mapped == "bool" {
iv := make([]bool, 0)
for _, s := range sv {
vv, _ := strconv.ParseBool(s)
iv = append(iv, vv)
}
v = iv
} else {
return "", errors.New(fmt.Sprintf("Unsupported type %v for column %v", mapped, k))
}
} else {
sv := v.(string)
switch mapped {
case "int":
v, _ = strconv.Atoi(sv)
case "float":
v, _ = strconv.ParseFloat(sv, 64)
case "bool":
v, _ = strconv.ParseBool(sv)
default:
return "", errors.New(fmt.Sprintf("Unsupported type %v for column %v", mapped, k))
}
}
}
}

prefixConfig, exist := backend.DecodeConfig.PropertyPrefixes[k]
if exist {
keyPrefix, valuePrefix := prefixValues(prefixConfig)
Expand All @@ -92,7 +194,7 @@ func withPrefix(m map[string]interface{}, backend conf.StorageBackend, k string,
}
}

return k
return k, nil
}

func wrap(value interface{}, prefix string) interface{} {
Expand Down
117 changes: 115 additions & 2 deletions internal/encoder/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,129 @@ package encoder
import (
"encoding/csv"
"encoding/json"
"github.com/franela/goblin"
"github.com/mimiro-io/objectstorage-datalayer/internal/conf"
"io"
"strings"
"testing"

"github.com/franela/goblin"
"github.com/mimiro-io/objectstorage-datalayer/internal/conf"
)

func TestDecodeLine(t *testing.T) {
g := goblin.Goblin(t)
g.Describe("The toEntityBytes function", func() {
// column types
g.It("Should return mapped columns", func() {
input := `{"id": "1", "name": "Hank", "age": "42", "distance": "1.5"}`
expected := `{"id":"a:1", "deleted": false, "refs":{}, "props":{"a:id": "a:1", "a:name": "Hank", "a:age": 42, "a:distance": 1.5}}`
backend := conf.StorageBackend{StripProps: true, DecodeConfig: &conf.DecodeConfig{
Namespaces: nil,
PropertyPrefixes: map[string]string{"id": "a:a", "name": "a", "age": "a", "distance": "a"},
IdProperty: "id",
ColumnTypes: map[string]string{"age": "int", "distance": "float"},
}}
var m map[string]interface{}
json.Unmarshal([]byte(input), &m)
result, err := toEntityBytes(m, backend)
var resultMap map[string]interface{}
json.Unmarshal(result, &resultMap)
var expectedMap map[string]interface{}
json.Unmarshal([]byte(expected), &expectedMap)
g.Assert(err).IsNil()
g.Assert(resultMap).Eql(expectedMap)
})

// column mappings
g.It("Should return coerced datatype values", func() {
input := `{"id": "1", "name": "Hank"}`
expected := `{"id":"a:1", "deleted": false, "refs":{}, "props":{"a:id": "a:1", "a:fullname": "Hank"}}`
backend := conf.StorageBackend{StripProps: true, DecodeConfig: &conf.DecodeConfig{
Namespaces: nil,
PropertyPrefixes: map[string]string{"id": "a:a", "fullname": "a"},
IdProperty: "id",
ColumnMappings: map[string]string{"name": "fullname"},
}}
var m map[string]interface{}
json.Unmarshal([]byte(input), &m)
result, err := toEntityBytes(m, backend)
var resultMap map[string]interface{}
json.Unmarshal(result, &resultMap)
var expectedMap map[string]interface{}
json.Unmarshal([]byte(expected), &expectedMap)
g.Assert(err).IsNil()
g.Assert(resultMap).Eql(expectedMap)
})

// list columns
g.It("Should return list value from single value", func() {
input := `{"id": "1", "name": "Hank", "hobbies": "reading, writing"}`
expected := `{"id":"a:1", "deleted": false, "refs":{}, "props":{"a:id": "a:1", "a:fullname": "Hank", "a:hobbies": ["reading", "writing"]}}`
backend := conf.StorageBackend{StripProps: true, DecodeConfig: &conf.DecodeConfig{
Namespaces: nil,
PropertyPrefixes: map[string]string{"id": "a:a", "fullname": "a", "hobbies": "a"},
IdProperty: "id",
ListValueColumns: map[string]string{"hobbies": ","},
ColumnMappings: map[string]string{"name": "fullname"},
}}
var m map[string]interface{}
json.Unmarshal([]byte(input), &m)
result, err := toEntityBytes(m, backend)
var resultMap map[string]interface{}
json.Unmarshal(result, &resultMap)
var expectedMap map[string]interface{}
json.Unmarshal([]byte(expected), &expectedMap)
g.Assert(err).IsNil()
g.Assert(resultMap).Eql(expectedMap)
})

// default values
g.It("Should return set default value", func() {
input := `{"id": "1", "name": "Hank", "hobbies": "reading, writing"}`
expected := `{"id":"a:1", "deleted": false, "refs":{"rdf:type" : "schema:Person"}, "props":{"a:id": "a:1", "a:fullname": "Hank", "a:hobbies": ["reading", "writing"]}}`
backend := conf.StorageBackend{StripProps: true, DecodeConfig: &conf.DecodeConfig{
Namespaces: nil,
PropertyPrefixes: map[string]string{"id": "a:a", "type": "rdf:schema", "fullname": "a", "hobbies": "a"},
IdProperty: "id",
ListValueColumns: map[string]string{"hobbies": ","},
ColumnMappings: map[string]string{"name": "fullname"},
Defaults: map[string]string{"type": "Person"},
Refs: []string{"type"},
}}
var m map[string]interface{}
json.Unmarshal([]byte(input), &m)
result, err := toEntityBytes(m, backend)
var resultMap map[string]interface{}
json.Unmarshal(result, &resultMap)
var expectedMap map[string]interface{}
json.Unmarshal([]byte(expected), &expectedMap)
g.Assert(err).IsNil()
g.Assert(resultMap).Eql(expectedMap)
})

// column concats
g.It("Should return new column with concated values", func() {
input := `{"id": "1", "name": "Hank", "hobby1": "reading", "hobby2": "writing"}`
expected := `{"id":"a:1", "deleted": false, "refs":{"rdf:type" : "schema:Person"}, "props":{"a:id": "a:1", "a:name": "Hank", "a:hobbies": "reading,writing"}}`
backend := conf.StorageBackend{StripProps: true, DecodeConfig: &conf.DecodeConfig{
Namespaces: nil,
PropertyPrefixes: map[string]string{"id": "a:a", "type": "rdf:schema", "name": "a", "hobbies": "a"},
IdProperty: "id",
IgnoreColumns: []string{"hobby1", "hobby2"},
ConcatColumns: map[string][]string{"hobbies": {"hobby1", "hobby2"}},
Defaults: map[string]string{"type": "Person"},
Refs: []string{"type"},
}}
var m map[string]interface{}
json.Unmarshal([]byte(input), &m)
result, err := toEntityBytes(m, backend)
var resultMap map[string]interface{}
json.Unmarshal(result, &resultMap)
var expectedMap map[string]interface{}
json.Unmarshal([]byte(expected), &expectedMap)
g.Assert(err).IsNil()
g.Assert(resultMap).Eql(expectedMap)
})

g.It("Should return stripped entities with configured mappings in place", func() {
input := `{"id": "1", "name": "Hank"}`
expected := `{"id":"a:1", "deleted": false, "refs":{}, "props":{"a:id": "a:1", "a:name": "Hank"}}`
Expand Down

0 comments on commit 4923d9a

Please sign in to comment.