diff --git a/core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/stream/EmptyRowStream.java b/core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/stream/EmptyRowStream.java index 2c9bc61da9..7b0808dcee 100644 --- a/core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/stream/EmptyRowStream.java +++ b/core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/stream/EmptyRowStream.java @@ -23,13 +23,18 @@ import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; import java.util.Collections; +import java.util.Objects; public class EmptyRowStream implements RowStream { private final Header header; public EmptyRowStream() { - this.header = new Header(Field.KEY, Collections.emptyList()); + this(new Header(Field.KEY, Collections.emptyList())); + } + + public EmptyRowStream(Header header) { + this.header = Objects.requireNonNull(header); } @Override diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/FileStorage.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/FileStorage.java index da675cf918..209b2cc913 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/FileStorage.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/FileStorage.java @@ -38,6 +38,7 @@ import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; import cn.edu.tsinghua.iginx.engine.shared.operator.tag.TagFilter; import cn.edu.tsinghua.iginx.filestore.common.AbstractConfig; +import cn.edu.tsinghua.iginx.filestore.common.Configs; import cn.edu.tsinghua.iginx.filestore.common.FileStoreException; import cn.edu.tsinghua.iginx.filestore.common.Filters; import cn.edu.tsinghua.iginx.filestore.service.FileStoreConfig; @@ -48,6 +49,7 @@ import cn.edu.tsinghua.iginx.filestore.struct.FileStructureManager; import cn.edu.tsinghua.iginx.filestore.struct.legacy.filesystem.LegacyFilesystem; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.LegacyParquet; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; import cn.edu.tsinghua.iginx.filestore.thrift.DataBoundary; import cn.edu.tsinghua.iginx.filestore.thrift.DataUnit; import cn.edu.tsinghua.iginx.metadata.entity.ColumnsInterval; @@ -56,8 +58,8 @@ import cn.edu.tsinghua.iginx.thrift.AggregateType; import cn.edu.tsinghua.iginx.thrift.StorageEngineType; import cn.edu.tsinghua.iginx.utils.Pair; +import com.google.common.base.Strings; import com.typesafe.config.Config; -import com.typesafe.config.ConfigBeanFactory; import com.typesafe.config.ConfigFactory; import java.net.InetSocketAddress; import java.util.*; @@ -102,7 +104,7 @@ static FileStoreConfig toFileStoreConfig(StorageEngineMeta meta) throws StorageInitializationException { Config rawConfig = toConfig(meta); LOGGER.debug("storage of {} config: {}", meta, rawConfig); - FileStoreConfig fileStoreConfig = ConfigBeanFactory.create(rawConfig, FileStoreConfig.class); + FileStoreConfig fileStoreConfig = FileStoreConfig.of(rawConfig); LOGGER.debug("storage of {} will be initialized with {}", meta, fileStoreConfig); List problems = fileStoreConfig.validate(); if (!problems.isEmpty()) { @@ -112,26 +114,41 @@ static FileStoreConfig toFileStoreConfig(StorageEngineMeta meta) } static Config toConfig(StorageEngineMeta meta) throws StorageInitializationException { - HashMap reshapedParams = new HashMap<>(); + HashMap reshaped = new HashMap<>(); for (Map.Entry param : meta.getExtraParams().entrySet()) { String key = param.getKey(); String value = param.getValue(); if (key.contains(".")) { - reshapedParams.put(key, value); + reshaped.put(key, value); } } - reshapedParams.put("data.root", meta.getExtraParams().get("dir")); - reshapedParams.put("dummy.root", meta.getExtraParams().get("dummy_dir")); - reshapedParams.putIfAbsent("data.struct", LegacyParquet.NAME); - reshapedParams.putIfAbsent("dummy.struct", LegacyFilesystem.NAME); + Configs.put( + reshaped, + meta.getExtraParams().get("dir"), + FileStoreConfig.Fields.data, + StorageConfig.Fields.root); + Configs.put( + reshaped, + meta.getExtraParams().get("dummy_dir"), + FileStoreConfig.Fields.dummy, + StorageConfig.Fields.root); + Configs.put( + reshaped, + meta.getExtraParams().get("embedded_prefix"), + FileStoreConfig.Fields.dummy, + StorageConfig.Fields.config, + FileTreeConfig.Fields.prefix); + Configs.putIfAbsent( + reshaped, LegacyParquet.NAME, FileStoreConfig.Fields.data, StorageConfig.Fields.struct); + Configs.putIfAbsent( + reshaped, LegacyFilesystem.NAME, FileStoreConfig.Fields.dummy, StorageConfig.Fields.struct); boolean local = isLocal(meta); - reshapedParams.put("server", String.valueOf(local)); + reshaped.put(FileStoreConfig.Fields.serve, String.valueOf(local)); - Config config = - ConfigFactory.parseMap(reshapedParams, "storage engine initialization parameters"); + Config config = ConfigFactory.parseMap(reshaped, "storage engine initialization parameters"); if (local) { LOGGER.debug("storage of {} is local, ignore config for remote", meta); @@ -308,7 +325,7 @@ public List getColumns(Set patterns, TagFilter tagFilter) @Override public Pair getBoundaryOfStorage(String prefix) throws PhysicalException { - Map units = service.getUnits(prefix); + Map units = service.getUnits(Strings.emptyToNull(prefix)); DataBoundary boundary = units.get(unitOfDummy()); if (Objects.equals(boundary, new DataBoundary())) { throw new PhysicalTaskExecuteFailureException("no data"); diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/AbstractConfig.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/AbstractConfig.java index 94fbc6d834..3193fa913a 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/AbstractConfig.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/AbstractConfig.java @@ -18,6 +18,8 @@ package cn.edu.tsinghua.iginx.filestore.common; import com.google.common.collect.Range; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigBeanFactory; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -29,6 +31,10 @@ public abstract class AbstractConfig { public abstract List validate(); + public static C of(Config raw, Class clazz) { + return ConfigBeanFactory.create(raw, clazz); + } + public static class ValidationProblem { private final List reversedPath; private final String problem; diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Closeables.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Closeables.java new file mode 100644 index 0000000000..4887c02314 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Closeables.java @@ -0,0 +1,65 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import java.io.Closeable; +import java.io.IOException; + +public class Closeables { + + private Closeables() {} + + public static void close(Iterable ac) throws IOException { + if (ac == null) { + return; + } else if (ac instanceof Closeable) { + ((Closeable) ac).close(); + return; + } + + IOException exception = null; + for (Closeable closeable : ac) { + try { + if (closeable != null) { + closeable.close(); + } + } catch (IOException e) { + if (exception == null) { + exception = e; + } else if (e != exception) { + exception.addSuppressed(e); + } + } + } + if (exception != null) { + throw exception; + } + } + + public static Closeable closeAsIOException(AutoCloseable ac) { + return () -> { + try { + ac.close(); + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new IOException(e); + } + }; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Configs.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Configs.java new file mode 100644 index 0000000000..3fb7421a4f --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Configs.java @@ -0,0 +1,40 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import java.util.Map; + +public class Configs { + + private Configs() {} + + public static Object put(Map map, Object value, String... path) { + String joinedPath = String.join(".", path); + return map.put(joinedPath, value); + } + + public static String put(Map map, String value, String... path) { + String joinedPath = String.join(".", path); + return map.put(joinedPath, value); + } + + public static String putIfAbsent(Map map, String value, String... path) { + String joinedPath = String.join(".", path); + return map.putIfAbsent(joinedPath, value); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Fields.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Fields.java index ab553ebc88..c736efadad 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Fields.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Fields.java @@ -21,6 +21,7 @@ import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; import cn.edu.tsinghua.iginx.thrift.DataType; import java.util.Map; +import javax.annotation.Nullable; public class Fields { @@ -32,4 +33,8 @@ public static Field of(Column column) { DataType dataType = column.getDataType(); return new Field(name, dataType, tags); } + + public static Field addPrefix(Field field, @Nullable String prefix) { + return new Field(IginxPaths.join(prefix, field.getName()), field.getType(), field.getTags()); + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/FileStoreRowStream.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/FileStoreRowStream.java new file mode 100644 index 0000000000..24f7405f91 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/FileStoreRowStream.java @@ -0,0 +1,37 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; + +public abstract class FileStoreRowStream implements RowStream { + + @Override + public abstract Header getHeader() throws FileStoreException; + + @Override + public abstract void close() throws FileStoreException; + + @Override + public abstract boolean hasNext() throws FileStoreException; + + @Override + public abstract Row next() throws FileStoreException; +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Filters.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Filters.java index 61cd7cc359..7b6c7c666e 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Filters.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Filters.java @@ -17,6 +17,7 @@ */ package cn.edu.tsinghua.iginx.filestore.common; +import cn.edu.tsinghua.iginx.engine.logical.utils.LogicalFilterUtils; import cn.edu.tsinghua.iginx.engine.shared.KeyRange; import cn.edu.tsinghua.iginx.engine.shared.operator.filter.*; import cn.edu.tsinghua.iginx.metadata.entity.KeyInterval; @@ -24,10 +25,9 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; +import java.util.*; +import java.util.function.Function; +import java.util.function.Predicate; import javax.annotation.Nullable; public class Filters { @@ -153,4 +153,202 @@ public static RangeSet toRangeSet(@Nullable Filter filter) { throw new IllegalArgumentException("Unsupported filter type: " + filter.getType()); } } + + public static boolean match(@Nullable Filter filter, Predicate remain) { + if (isTrue(filter)) { + return true; + } + + boolean[] result = new boolean[] {true}; + + filter.accept( + new FilterVisitor() { + + private void test(Filter filter) { + if (!remain.test(filter)) { + result[0] = false; + } + } + + @Override + public void visit(KeyFilter filter) { + test(filter); + } + + @Override + public void visit(ValueFilter filter) { + test(filter); + } + + @Override + public void visit(PathFilter filter) { + test(filter); + } + + @Override + public void visit(AndFilter filter) {} + + @Override + public void visit(OrFilter filter) {} + + @Override + public void visit(NotFilter filter) {} + + @Override + public void visit(BoolFilter filter) { + test(filter); + } + + @Override + public void visit(ExprFilter filter) { + test(filter); + } + }); + + return result[0]; + } + + @Nullable + public static Filter superSet(@Nullable Filter filter, Predicate remain) { + return superSet( + filter, + (Filter f) -> { + if (f == null) { + return null; + } + if (remain.test(f)) { + return f; + } else { + return null; + } + }); + } + + public static Filter superSet(@Nullable Filter filter, Function transform) { + if (isTrue(filter) || isFalse(filter)) { + return transform.apply(filter); + } + + switch (filter.getType()) { + case Not: + case And: + case Or: + filter = LogicalFilterUtils.toCNF(filter); + } + + switch (filter.getType()) { + case Not: + throw new IllegalStateException("Not filter should be removed before calling superSet"); + case And: + { + AndFilter andFilter = (AndFilter) filter; + List children = new ArrayList<>(); + for (Filter child : andFilter.getChildren()) { + Filter superSet = superSet(child, transform); + if (!isTrue(superSet)) { + children.add(superSet); + } + } + if (children.isEmpty()) { + return null; + } else if (children.size() == 1) { + return children.get(0); + } else { + return new AndFilter(children); + } + } + case Or: + { + OrFilter orFilter = (OrFilter) filter; + List oldChildren = orFilter.getChildren(); + if (oldChildren.isEmpty()) { + throw new IllegalStateException("Or filter should not have empty children"); + } + List children = new ArrayList<>(); + for (Filter child : orFilter.getChildren()) { + Filter superSet = superSet(child, transform); + if (!isTrue(superSet)) { + children.add(superSet); + } + } + if (children.isEmpty()) { + return null; + } else if (children.size() == 1) { + return children.get(0); + } else { + return new OrFilter(children); + } + } + default: + return transform.apply(filter); + } + } + + public static Predicate nonKeyFilter() { + return filter -> filter.getType() == FilterType.Key; + } + + public static Set getPaths(Filter filter) { + return LogicalFilterUtils.getPathsFromFilter(filter); // Recursive + } + + public static Filter matchWildcard(@Nullable Filter filter, Set fields) { + return superSet( + filter, + (Filter f) -> { + if (f == null) { + return null; + } + switch (f.getType()) { + case Key: + case Bool: + return f; + case Path: + // TODO: Implement this + case Value: + // TODO: Implement this + default: + return null; + } + }); + } + + public static Predicate startWith(@Nullable String prefix) { + return (Filter f) -> { + if (f == null) { + return true; + } + switch (f.getType()) { + case Key: + case Bool: + return true; + case Path: + { + PathFilter pathFilter = (PathFilter) f; + String pathA = pathFilter.getPathA(); + String pathB = pathFilter.getPathB(); + if (Patterns.isWildcard(pathA) || Patterns.isWildcard(pathB)) { + return false; + } + return Patterns.startsWith(pathA, prefix) || Patterns.startsWith(pathB, prefix); + } + case Value: + { + ValueFilter valueFilter = (ValueFilter) f; + String path = valueFilter.getPath(); + if (Patterns.isWildcard(path)) { + return false; + } + return Patterns.startsWith(path, prefix); + } + default: + return false; + } + }; + } + + public static boolean equals(Filter filter, Filter superSetFilter) { + // TODO: Optimize this + return Objects.equals(filter, superSetFilter); + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/IginxPaths.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/IginxPaths.java new file mode 100644 index 0000000000..7d87e4cf5c --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/IginxPaths.java @@ -0,0 +1,97 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import com.google.common.collect.Iterables; +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.regex.Pattern; +import javax.annotation.Nullable; + +public class IginxPaths { + + public static final String DOT = "."; + + private IginxPaths() {} + + @Nullable + public static String join(String... paths) { + return join(Arrays.asList(paths)); + } + + @Nullable + public static String join(Iterable paths) { + Iterable nonNullPaths = Iterables.filter(paths, Objects::nonNull); + if (!nonNullPaths.iterator().hasNext()) { + return null; + } + return String.join(DOT, nonNullPaths); + } + + public static String get(Path path, String dot) { + List nodes = new ArrayList<>(); + for (Path fsNode : path) { + nodes.add(fsNode.toString().replace(DOT, dot)); + } + return join(nodes); + } + + public static Path toFilePath(@Nullable String path, String dot, FileSystem fs) { + if (path == null) { + return fs.getPath(""); + } + Pattern splitter = Pattern.compile(Pattern.quote(DOT)); + String[] nodes = splitter.split(path); + String[] fsNodes = new String[nodes.length]; + for (int i = 0; i < nodes.length; i++) { + fsNodes[i] = nodes[i].replace(dot, DOT); + } + return fs.getPath(fsNodes[0], Arrays.copyOfRange(fsNodes, 1, fsNodes.length)); + } + + public static String toStringPrefix(@Nullable String path) { + if (path == null) { + return ""; + } else { + return path + DOT; + } + } + + @Nullable + public static String fromStringPrefix(String path) { + if (path.isEmpty()) { + return null; + } else { + if (!path.endsWith(DOT)) { + throw new IllegalArgumentException("not empty string prefix must not end with a dot"); + } + return path.substring(0, path.length() - DOT.length()); + } + } + + public static String[] split(@Nullable String path) { + if (path == null) { + return new String[0]; + } + return path.split(Pattern.quote(DOT)); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Patterns.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Patterns.java index 25bfabd4d3..d893adf27c 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Patterns.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Patterns.java @@ -17,14 +17,21 @@ */ package cn.edu.tsinghua.iginx.filestore.common; +import cn.edu.tsinghua.iginx.utils.StringUtils; +import com.google.common.base.Strings; import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; import javax.annotation.Nullable; public class Patterns { private Patterns() {} + private static final String STAR = "*"; + public static boolean isAll(String pattern) { - return pattern.equals("*"); + return pattern.equals(STAR); } public static boolean isAll(@Nullable Collection patterns) { @@ -33,4 +40,97 @@ public static boolean isAll(@Nullable Collection patterns) { } return patterns.stream().anyMatch(Patterns::isAll); } + + public static List filterByPrefix( + @Nullable List patterns, @Nullable String subPrefix) { + if (patterns == null || subPrefix == null) { + return patterns; + } + return patterns.stream() + .filter(pattern -> startsWith(pattern, subPrefix)) + .collect(Collectors.toList()); + } + + public static boolean startsWith(String pattern, @Nullable String prefix) { + String patternStringPrefix = IginxPaths.toStringPrefix(pattern); + String prefixStringPrefix = IginxPaths.toStringPrefix(prefix); + + if (patternStringPrefix.startsWith(prefixStringPrefix)) { + return true; + } + + String commonPrefix = Strings.commonPrefix(patternStringPrefix, prefixStringPrefix); + String patternWithoutCommonPrefix = pattern.substring(commonPrefix.length()); + return patternWithoutCommonPrefix.startsWith(STAR); + } + + public static boolean startsWith(@Nullable List patterns, @Nullable String subPrefix) { + if (patterns == null || subPrefix == null) { + return true; + } + return patterns.stream().anyMatch(pattern -> startsWith(pattern, subPrefix)); + } + + private static final List ALL = Collections.singletonList(STAR); + + public static List all() { + return ALL; + } + + public static List nonNull(@Nullable List patterns) { + return patterns == null ? ALL : patterns; + } + + public static boolean match(Collection patterns, String name) { + if (Patterns.isAll(patterns)) { + return true; + } + if (patterns.contains(name)) { + return true; + } + return patterns.stream().anyMatch(pattern -> match(pattern, name)); + } + + public static boolean match(String patterns, String name) { + return StringUtils.match(name, patterns); + } + + public static boolean isWildcard(String path) { + return path.contains(STAR); + } + + public static boolean isEmpty(@Nullable List subPatterns) { + if (subPatterns == null) { + return false; + } + return subPatterns.isEmpty(); + } + + public static List nullToAll(@Nullable List patterns) { + return patterns == null ? all() : patterns; + } + + public static String suffix(String pattern, @Nullable String prefix) { + if (prefix == null) { + return pattern; + } + String stringPrefix = IginxPaths.toStringPrefix(prefix); + String patternStringPrefix = IginxPaths.toStringPrefix(pattern); + + int starIndex = patternStringPrefix.indexOf(STAR); + starIndex = starIndex == -1 ? patternStringPrefix.length() : starIndex; + String beforeWildcard = patternStringPrefix.substring(0, starIndex); + String wildcardSuffix = patternStringPrefix.substring(starIndex); + + if (beforeWildcard.startsWith(stringPrefix)) { + return IginxPaths.fromStringPrefix(patternStringPrefix.substring(stringPrefix.length())); + } else { + if (stringPrefix.startsWith(beforeWildcard)) { + if (!wildcardSuffix.isEmpty()) { + return IginxPaths.fromStringPrefix(wildcardSuffix); + } + } + throw new IllegalArgumentException(pattern + " does not start with " + prefix); + } + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Ranges.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Ranges.java index 05c005ea0a..974901a76f 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Ranges.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Ranges.java @@ -55,4 +55,8 @@ public static List toKeyRanges(RangeSet rangeset) { } return keyRanges; } + + public static boolean notEmpty(Range closedRange) { + return false; + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/RowStreams.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/RowStreams.java new file mode 100644 index 0000000000..ec8b6b9cdd --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/RowStreams.java @@ -0,0 +1,57 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.physical.memory.execute.stream.EmptyRowStream; +import cn.edu.tsinghua.iginx.engine.shared.data.read.*; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import java.util.List; +import javax.annotation.Nullable; + +public class RowStreams { + + private RowStreams() {} + + private static final EmptyRowStream EMPTY = new EmptyRowStream(); + + public static RowStream empty() { + return EMPTY; + } + + public static RowStream empty(Header header) { + return new EmptyRowStream(header); + } + + public static RowStream union(List rowStreams) throws PhysicalException { + if (rowStreams.isEmpty()) { + return empty(); + } else if (rowStreams.size() == 1) { + return rowStreams.get(0); + } else { + return new MergeFieldRowStreamWrapper(rowStreams); + } + } + + public static RowStream filtered(RowStream rowStream, @Nullable Filter filter) { + if (Filters.isTrue(filter)) { + return rowStream; + } + return new FilterRowStreamWrapper(rowStream, filter); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Strings.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Strings.java new file mode 100644 index 0000000000..e65eb7ff57 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/common/Strings.java @@ -0,0 +1,30 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import java.util.regex.Pattern; + +public class Strings { + private Strings() {} + + private static final Pattern PATTERN = Pattern.compile("\n"); + + public static String shiftWithNewline(String string) { + return PATTERN.matcher("\n" + string).replaceAll("\n "); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/AbstractFileFormat.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/AbstractFileFormat.java index 975ea2508a..419fef68b5 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/AbstractFileFormat.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/AbstractFileFormat.java @@ -34,11 +34,6 @@ public AbstractFileFormat(String formatName, String... extension) { this.extensions = Collections.unmodifiableCollection(extensions); } - @Override - public String getFormatName() { - return formatName; - } - @Override public List getExtensions() { return Lists.newArrayList(extensions); diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormat.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormat.java index 7704959cdf..531631e0e9 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormat.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormat.java @@ -17,16 +17,29 @@ */ package cn.edu.tsinghua.iginx.filestore.format; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.thrift.DataType; import com.typesafe.config.Config; +import java.io.Closeable; import java.io.IOException; import java.nio.file.Path; +import java.util.Collection; import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; public interface FileFormat { - String getFormatName(); + String getName(); List getExtensions(); - FileReader newRead(Path path, Config config) throws IOException; + Reader newReader(@Nullable String prefix, Path path, Config config) throws IOException; + + interface Reader extends Closeable { + Map find(Collection patterns) throws IOException; + + RowStream read(List fields, Filter filter) throws IOException; + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormatManager.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormatManager.java new file mode 100644 index 0000000000..94a4ad8fb0 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/FileFormatManager.java @@ -0,0 +1,113 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format; + +import java.util.Collection; +import java.util.Collections; +import java.util.ServiceLoader; +import java.util.concurrent.ConcurrentHashMap; +import javax.annotation.Nullable; +import javax.annotation.concurrent.ThreadSafe; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@ThreadSafe +public class FileFormatManager { + private static final Logger LOGGER = LoggerFactory.getLogger(FileFormatManager.class); + + private static final FileFormatManager INSTANCE = new FileFormatManager(); + + public static FileFormatManager getInstance() { + return INSTANCE; + } + + private final ConcurrentHashMap formats; + + private final ConcurrentHashMap extensionToFormat; + + private FileFormatManager() { + this.formats = new ConcurrentHashMap<>(); + this.extensionToFormat = new ConcurrentHashMap<>(); + loadSpi(FileFormatManager.class.getClassLoader()); + rebuildIndex(); + } + + public void loadSpi(ClassLoader loader) { + ServiceLoader serviceLoader = ServiceLoader.load(FileFormat.class, loader); + for (FileFormat spi : serviceLoader) { + LOGGER.debug("Discovered FileFormat {}", spi); + FileFormat replaced = register(spi); + if (replaced != null) { + LOGGER.warn( + "FileFormat {} is replaced by {} due to conflict name {}", + replaced, + spi, + spi.getName()); + } + } + } + + public void rebuildIndex() { + extensionToFormat.clear(); + for (FileFormat format : formats.values()) { + for (String extension : format.getExtensions()) { + String old = extensionToFormat.put(extension, format.getName()); + if (old != null) { + LOGGER.warn( + "Index of {} is replaced by {} due to conflict extension {}", + old, + format.getName(), + extension); + } + } + } + } + + public FileFormat register(FileFormat format) { + if (format.getName().contains(".")) { + LOGGER.warn("FileFormat name {} contains dot, ignored", format.getName()); + return null; + } + return formats.put(format.getName(), format); + } + + public Collection getAll() { + return Collections.unmodifiableCollection(formats.values()); + } + + @Nullable + public FileFormat getByName(@Nullable String name) { + if (name == null) { + return null; + } + return formats.get(name); + } + + @Nullable + public FileFormat getByExtension(@Nullable String extension) { + if (extension == null) { + return null; + } + return getByName(extensionToFormat.get(extension)); + } + + public FileFormat getByExtension(@Nullable String extension, FileFormat defaultFormat) { + FileFormat format = getByExtension(extension); + return format == null ? defaultFormat : format; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetReader.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetReader.java index bf140f41ba..ca3d5b5349 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetReader.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetReader.java @@ -23,6 +23,7 @@ import cn.edu.tsinghua.iginx.thrift.DataType; import cn.edu.tsinghua.iginx.utils.Pair; import com.google.common.collect.Range; +import java.io.Closeable; import java.io.IOException; import java.nio.file.Path; import java.util.*; @@ -46,7 +47,7 @@ import shaded.iginx.org.apache.parquet.schema.PrimitiveType; import shaded.iginx.org.apache.parquet.schema.Type; -public class IParquetReader implements AutoCloseable { +public class IParquetReader implements Closeable { private static final Logger LOGGER = LoggerFactory.getLogger(IParquetReader.class); private final ParquetRecordReader internalReader; @@ -105,7 +106,7 @@ public IRecord read() throws IOException { } @Override - public void close() throws Exception { + public void close() throws IOException { if (internalReader != null) { internalReader.close(); } @@ -186,12 +187,17 @@ public static DataType toIginxType(PrimitiveType primitiveType) { } } + public long getCurrentRowIndex() { + return internalReader.getCurrentRowIndex(); + } + public static class Builder { private final ParquetReadOptions.Builder optionsBuilder = ParquetReadOptions.builder(); private final InputFile localInputfile; private boolean skip = false; private Set fields; + private boolean hasKey; public Builder(LocalInputFile localInputFile) { this.localInputfile = localInputFile; @@ -218,7 +224,7 @@ private IParquetReader build(ParquetMetadata footer, ParquetReadOptions options) if (fields == null) { requestedSchema = schema; } else { - requestedSchema = ProjectUtils.projectMessageType(schema, fields); + requestedSchema = ProjectUtils.projectMessageType(schema, fields, hasKey); LOGGER.debug("project schema with {} as {}", fields, requestedSchema); } @@ -233,8 +239,23 @@ private IParquetReader build(ParquetMetadata footer, ParquetReadOptions options) return new IParquetReader(internalReader, requestedSchema, footer); } + @Override + public String toString() { + return "Builder{" + + "optionsBuilder=" + + optionsBuilder + + ", localInputfile=" + + localInputfile + + '}'; + } + public Builder project(Set fields) { + return project(fields, true); + } + + public Builder project(Set fields, boolean hasKey) { this.fields = Objects.requireNonNull(fields); + this.hasKey = hasKey; return this; } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetWriter.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetWriter.java index 5fea69c8d4..d4d5237bf1 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetWriter.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IParquetWriter.java @@ -18,9 +18,11 @@ package cn.edu.tsinghua.iginx.filestore.format.parquet; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.db.util.iterator.Scanner; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.util.Constants; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.util.exception.StorageException; +import java.io.Closeable; import java.io.IOException; import java.nio.file.Path; import shaded.iginx.org.apache.parquet.ParquetWriteOptions; @@ -35,7 +37,9 @@ import shaded.iginx.org.apache.parquet.schema.MessageType; import shaded.iginx.org.apache.parquet.schema.TypeUtil; -public class IParquetWriter implements AutoCloseable { +public class IParquetWriter implements Closeable { + + public static final String KEY_FIELD_NAME = Field.KEY.getName(); private final ParquetRecordWriter internalWriter; @@ -60,7 +64,7 @@ public void write(IRecord record) throws IOException { } @Override - public void close() throws Exception { + public void close() throws IOException { internalWriter.close(); } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IRecordDematerializer.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IRecordDematerializer.java index fb2ce21bca..eeff41b5a8 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IRecordDematerializer.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/IRecordDematerializer.java @@ -30,6 +30,9 @@ class IRecordDematerializer extends RecordDematerializer { + public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name"; + public static final String OBJECT_MODEL_NAME_VALUE = "iginx"; + private final MessageType schema; IRecordDematerializer(MessageType schema) { @@ -59,7 +62,7 @@ public MessageType getSchema() { @Override public Map getExtraMetaData() { - return Collections.emptyMap(); + return Collections.singletonMap(OBJECT_MODEL_NAME_PROP, OBJECT_MODEL_NAME_VALUE); } private void addGroup(GroupType groupType, IRecord record) { diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormat.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormat.java new file mode 100644 index 0000000000..c5a026524d --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormat.java @@ -0,0 +1,60 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.parquet; + +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import com.google.auto.service.AutoService; +import com.typesafe.config.Config; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import javax.annotation.Nullable; +import shaded.iginx.org.apache.parquet.hadoop.metadata.ParquetMetadata; + +@AutoService(FileFormat.class) +public class ParquetFormat implements FileFormat { + + public static final String NAME = "Parquet"; + + @Override + public String getName() { + return NAME; + } + + @Override + public String toString() { + return NAME; + } + + @Override + public List getExtensions() { + return Collections.singletonList("parquet"); + } + + @Override + public Reader newReader(@Nullable String prefix, Path path, Config config) throws IOException { + IParquetReader.Builder builder = IParquetReader.builder(path); + ParquetMetadata footer; + try (IParquetReader reader = builder.build()) { + footer = reader.getMeta(); + } + + return new ParquetFormatReader(prefix, builder, footer); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormatReader.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormatReader.java new file mode 100644 index 0000000000..109e0700a6 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormatReader.java @@ -0,0 +1,93 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.parquet; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.filestore.common.IginxPaths; +import cn.edu.tsinghua.iginx.filestore.common.Patterns; +import cn.edu.tsinghua.iginx.filestore.common.RowStreams; +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import cn.edu.tsinghua.iginx.thrift.DataType; +import java.io.IOException; +import java.util.*; +import javax.annotation.Nullable; +import shaded.iginx.org.apache.parquet.hadoop.metadata.ParquetMetadata; + +public class ParquetFormatReader implements FileFormat.Reader { + + private final String prefix; + private final IParquetReader.Builder builder; + private final ParquetMetadata footer; + private final Map fields = new HashMap<>(); + private final Map fieldToRawName = new HashMap<>(); + private final Map rawNameToField = new HashMap<>(); + + public ParquetFormatReader( + @Nullable String prefix, IParquetReader.Builder builder, ParquetMetadata footer) + throws IOException { + this.prefix = prefix; + this.builder = Objects.requireNonNull(builder); + this.footer = Objects.requireNonNull(footer); + initSchema(); + } + + private void initSchema() throws IOException { + List fields = ProjectUtils.toFields(footer.getFileMetaData().getSchema()); + for (Field field : fields) { + String rawName = field.getName(); + String fullName = IginxPaths.join(prefix, rawName); + this.fields.put(fullName, field.getType()); + this.fieldToRawName.put(fullName, rawName); + this.rawNameToField.put(rawName, fullName); + } + } + + @Override + public String toString() { + return "ParquetFormatReader{}"; + } + + @Override + public void close() throws IOException {} + + @Override + public Map find(Collection patterns) throws IOException { + Map result = new HashMap<>(); + for (String field : fields.keySet()) { + if (Patterns.match(patterns, field)) { + result.put(field, fields.get(field)); + } + } + return result; + } + + @Override + public RowStream read(List fields, Filter filter) throws IOException { + Set rawFields = new HashSet<>(); + for (String field : fields) { + rawFields.add(fieldToRawName.get(field)); + } + + IParquetReader reader = builder.project(rawFields, false).build(footer); + + RowStream rowStream = new ParquetFormatRowStream(reader, rawNameToField::get); + return RowStreams.filtered(rowStream, filter); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormatRowStream.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormatRowStream.java new file mode 100644 index 0000000000..da48352952 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetFormatRowStream.java @@ -0,0 +1,98 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.parquet; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.filestore.common.FileStoreException; +import cn.edu.tsinghua.iginx.filestore.common.FileStoreRowStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import javax.annotation.WillCloseWhenClosed; +import shaded.iginx.org.apache.parquet.schema.MessageType; + +public class ParquetFormatRowStream extends FileStoreRowStream { + + private final IParquetReader reader; + private final Header header; + private Row nextRow; + + public ParquetFormatRowStream( + @WillCloseWhenClosed IParquetReader reader, Function nameMapper) + throws IOException { + this.reader = reader; + this.header = new Header(Field.KEY, toFields(reader.getSchema(), nameMapper)); + this.nextRow = fetchNext(); + } + + private static List toFields(MessageType schema, Function nameMapper) { + List fields = ProjectUtils.toFields(schema); + List result = new ArrayList<>(); + for (Field field : fields) { + String rawName = field.getName(); + String fullName = nameMapper.apply(rawName); + result.add(new Field(fullName, field.getType())); + } + return result; + } + + private Row fetchNext() throws IOException { + IRecord record = reader.read(); + if (record == null) { + return null; + } + long key = reader.getCurrentRowIndex(); + return ProjectUtils.toRow(header, key, record); + } + + @Override + public Header getHeader() throws FileStoreException { + return header; + } + + @Override + public void close() throws FileStoreException { + try { + reader.close(); + } catch (IOException e) { + throw new FileStoreException(e); + } + } + + @Override + public boolean hasNext() throws FileStoreException { + return nextRow != null; + } + + @Override + public Row next() throws FileStoreException { + if (nextRow == null) { + throw new FileStoreException("No more rows"); + } + Row row = nextRow; + try { + nextRow = fetchNext(); + } catch (IOException e) { + throw new FileStoreException(e); + } + return row; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ProjectUtils.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ProjectUtils.java index 60b569873d..7a1e992c50 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ProjectUtils.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ProjectUtils.java @@ -18,10 +18,15 @@ package cn.edu.tsinghua.iginx.filestore.format.parquet; +import static cn.edu.tsinghua.iginx.filestore.format.parquet.IRecordDematerializer.OBJECT_MODEL_NAME_VALUE; +import static cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.manager.dummy.Storer.getParquetType; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.util.Constants; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; +import cn.edu.tsinghua.iginx.thrift.DataType; +import java.util.*; import javax.annotation.Nullable; import shaded.iginx.org.apache.parquet.schema.MessageType; import shaded.iginx.org.apache.parquet.schema.Type; @@ -30,9 +35,12 @@ public class ProjectUtils { private ProjectUtils() {} - static MessageType projectMessageType(MessageType schema, @Nullable Set fields) { + static MessageType projectMessageType( + MessageType schema, @Nullable Set fields, boolean hasKey) { Set schemaFields = new HashSet<>(Objects.requireNonNull(fields)); - schemaFields.add(Constants.KEY_FIELD_NAME); + if (hasKey) { + schemaFields.add(Constants.KEY_FIELD_NAME); + } Types.MessageTypeBuilder builder = Types.buildMessage(); for (String field : schemaFields) { @@ -47,4 +55,55 @@ static MessageType projectMessageType(MessageType schema, @Nullable Set return builder.named(schema.getName()); } + + public static List toFields(MessageType schema) { + List fields = new ArrayList<>(); + for (Type type : schema.getFields()) { + if (!type.isPrimitive()) { + throw new IllegalArgumentException("unsupported parquet type: " + type); + } + String rawName = type.getName(); + DataType iType = IParquetReader.toIginxType(type.asPrimitiveType()); + fields.add(new Field(rawName, iType)); + } + return fields; + } + + public static MessageType toMessageType(Header header) { + List parquetFields = new ArrayList<>(); + if (header.hasKey()) { + parquetFields.add( + getParquetType(Constants.KEY_FIELD_NAME, DataType.LONG, Type.Repetition.REQUIRED)); + } + for (Field field : header.getFields()) { + parquetFields.add(getParquetType(field.getName(), field.getType(), Type.Repetition.OPTIONAL)); + } + return new MessageType(OBJECT_MODEL_NAME_VALUE, parquetFields); + } + + public static IRecord toRecord(Row row) { + IRecord record = new IRecord(); + int offset = 0; + if (row.getHeader().hasKey()) { + record.add(0, row.getKey()); + offset++; + } + for (int i = 0; i < row.getValues().length; i++) { + if (row.getValues()[i] != null) { + record.add(i + offset, row.getValues()[i]); + } + } + return record; + } + + public static Row toRow(Header header, long key, IRecord record) { + if (!header.hasKey()) { + throw new IllegalArgumentException("header does not have key field"); + } + Object[] values = new Object[header.getFields().size()]; + for (Map.Entry entry : record) { + values[entry.getKey()] = entry.getValue(); + } + return new Row(header, key, values); + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawFormat.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawFormat.java new file mode 100644 index 0000000000..292258ae22 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawFormat.java @@ -0,0 +1,56 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.raw; + +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import com.google.auto.service.AutoService; +import com.typesafe.config.Config; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import javax.annotation.Nullable; + +@AutoService(FileFormat.class) +public class RawFormat implements FileFormat { + + public static final String NAME = "RawChunk"; + + public static final RawFormat INSTANCE = new RawFormat(); + + @Override + public String getName() { + return NAME; + } + + @Override + public String toString() { + return NAME; + } + + @Override + public List getExtensions() { + return Collections.emptyList(); + } + + @Override + public Reader newReader(@Nullable String prefix, Path path, Config config) throws IOException { + RawReaderConfig rawReaderConfig = RawReaderConfig.of(config); + return new RawReader(String.valueOf(prefix), path, rawReaderConfig); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawFormatRowStream.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawFormatRowStream.java new file mode 100644 index 0000000000..8f2643dd7b --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawFormatRowStream.java @@ -0,0 +1,135 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.raw; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.filestore.common.FileStoreException; +import cn.edu.tsinghua.iginx.filestore.common.FileStoreRowStream; +import cn.edu.tsinghua.iginx.filestore.common.Ranges; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Queue; +import javax.annotation.Nullable; + +public class RawFormatRowStream extends FileStoreRowStream { + + private final Header header; + private final FileChannel channel; + private final long pageSize; + private final Queue> keyRanges; + private long nextFetchKey = 0; + private boolean eof = false; + private Row nextRow; + + public RawFormatRowStream(Header header, Path path, long pageSize, RangeSet keyRanges) + throws IOException { + this.header = header; + this.channel = FileChannel.open(path, StandardOpenOption.READ); + this.pageSize = pageSize; + + this.keyRanges = new ArrayDeque<>(keyRanges.asRanges().size()); + for (Range range : keyRanges.asRanges()) { + Range closedRange = Ranges.toClosedLongRange(range); + if (!closedRange.isEmpty()) { + this.keyRanges.add(closedRange); + } + } + + this.nextRow = fetchNext(); + } + + @Override + public Header getHeader() throws FileStoreException { + return header; + } + + @Override + public void close() throws FileStoreException { + try { + channel.close(); + } catch (IOException e) { + throw new FileStoreException(e); + } + } + + @Override + public boolean hasNext() throws FileStoreException { + return nextRow != null; + } + + @Override + public Row next() throws FileStoreException { + if (!hasNext()) { + throw new NoSuchElementException(); + } + Row row = nextRow; + try { + nextRow = fetchNext(); + } catch (IOException e) { + throw new FileStoreException(e); + } + return row; + } + + public boolean needFetch() { + if (eof) { + return false; + } + while (!keyRanges.isEmpty()) { + Range range = keyRanges.peek(); + if (nextFetchKey < range.lowerEndpoint()) { + nextFetchKey = range.lowerEndpoint(); + } + if (nextFetchKey <= range.upperEndpoint()) { + return true; + } + keyRanges.poll(); + } + return false; + } + + @Nullable + public Row fetchNext() throws IOException { + if (!needFetch()) { + return null; + } + long currentKey = nextFetchKey++; + byte[] data = new byte[Math.toIntExact(pageSize)]; + ByteBuffer buffer = ByteBuffer.wrap(data); + channel.position(currentKey * pageSize); + channel.read(buffer); + if (buffer.remaining() > 0) { + eof = true; + if (buffer.position() == 0) { + return null; + } + data = Arrays.copyOf(data, buffer.position()); + } + Object[] values = new Object[] {data}; + return new Row(header, currentKey, values); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawReader.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawReader.java new file mode 100644 index 0000000000..c30f876742 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawReader.java @@ -0,0 +1,92 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.raw; + +import cn.edu.tsinghua.iginx.engine.physical.memory.execute.stream.EmptyRowStream; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.filestore.common.Filters; +import cn.edu.tsinghua.iginx.filestore.common.Patterns; +import cn.edu.tsinghua.iginx.filestore.common.RowStreams; +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import cn.edu.tsinghua.iginx.thrift.DataType; +import com.google.common.collect.RangeSet; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import java.util.function.Predicate; + +public class RawReader implements FileFormat.Reader { + + private final RawReaderConfig config; + private final Path path; + private final String fieldName; + private final Map schema; + private final Header header; + + public RawReader(String prefix, Path path, RawReaderConfig config) throws IOException { + this.config = config; + this.path = path; + this.fieldName = Objects.requireNonNull(prefix); + this.schema = Collections.singletonMap(fieldName, DataType.BINARY); + Field field = new Field(fieldName, DataType.BINARY); + this.header = new Header(Field.KEY, Collections.singletonList(field)); + } + + @Override + public String toString() { + return "RawReader{" + "config=" + config + '}'; + } + + @Override + public Map find(Collection patterns) throws IOException { + if (!Patterns.match(patterns, fieldName)) { + return Collections.emptyMap(); + } + return schema; + } + + @Override + public RowStream read(List fields, Filter filter) throws IOException { + if (fields.isEmpty()) { + return new EmptyRowStream(); + } + + if (!Objects.equals(fields, Collections.singletonList(fieldName))) { + throw new IllegalArgumentException("Unknown fields: " + fields); + } + + Predicate removeNonKeyFilter = Filters.nonKeyFilter(); + + Filter keyRangeFilter = Filters.superSet(filter, removeNonKeyFilter); + RangeSet keyRanges = Filters.toRangeSet(keyRangeFilter); + RowStream rowStream = + new RawFormatRowStream(header, path, config.getPageSize().toBytes(), keyRanges); + + if (!Filters.match(filter, removeNonKeyFilter)) { + rowStream = RowStreams.filtered(rowStream, filter); + } + + return rowStream; + } + + @Override + public void close() throws IOException {} +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawReaderConfig.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawReaderConfig.java new file mode 100644 index 0000000000..1b156d82dd --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/format/raw/RawReaderConfig.java @@ -0,0 +1,45 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.raw; + +import cn.edu.tsinghua.iginx.filestore.common.AbstractConfig; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigMemorySize; +import com.typesafe.config.Optional; +import java.util.Collections; +import java.util.List; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.experimental.FieldNameConstants; + +@Data +@EqualsAndHashCode(callSuper = true) +@FieldNameConstants +public class RawReaderConfig extends AbstractConfig { + + @Optional ConfigMemorySize pageSize = ConfigMemorySize.ofBytes(4096); + + @Override + public List validate() { + return Collections.emptyList(); + } + + public static RawReaderConfig of(Config config) { + return of(config, RawReaderConfig.class); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreConfig.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreConfig.java index fe8108ce2f..1c711c58ca 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreConfig.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreConfig.java @@ -20,6 +20,7 @@ import cn.edu.tsinghua.iginx.filestore.common.AbstractConfig; import cn.edu.tsinghua.iginx.filestore.service.rpc.client.ClientConfig; import cn.edu.tsinghua.iginx.filestore.service.storage.StorageConfig; +import com.typesafe.config.Config; import com.typesafe.config.Optional; import java.util.ArrayList; import java.util.List; @@ -32,7 +33,7 @@ @FieldNameConstants public class FileStoreConfig extends AbstractConfig { - boolean server; + boolean serve; @Optional ClientConfig client = new ClientConfig(); @@ -43,7 +44,7 @@ public class FileStoreConfig extends AbstractConfig { @Override public List validate() { List problems = new ArrayList<>(); - if (server) { + if (serve) { if (data == null && dummy == null) { problems.add( new ValidationProblem( @@ -60,4 +61,8 @@ public List validate() { } return problems; } + + public static FileStoreConfig of(Config config) { + return of(config, FileStoreConfig.class); + } } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreService.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreService.java index 6e5d571b43..dea8fe8bd9 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreService.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/FileStoreService.java @@ -43,7 +43,7 @@ public class FileStoreService implements Service { public FileStoreService(InetSocketAddress address, FileStoreConfig config) throws FileStoreException { - if (config.isServer()) { + if (config.isServe()) { this.service = new StorageService(config.getData(), config.getDummy()); Server temp = null; try { diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/storage/StorageService.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/storage/StorageService.java index 800929b8a2..cba903e671 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/storage/StorageService.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/service/storage/StorageService.java @@ -64,9 +64,15 @@ public StorageService(@Nullable StorageConfig dataConfig, @Nullable StorageConfi this.dataConfig = dataConfig; this.dummyConfig = dummyConfig; + LOGGER.debug("dataConfig: {}", dataConfig); + LOGGER.debug("dummyConfig: {}", dummyConfig); + this.dataStructure = getFileStructure(dataConfig); this.dummyStructure = getFileStructure(dummyConfig); + LOGGER.debug("dataStructure: {}", dataStructure); + LOGGER.debug("dummyStructure: {}", dummyStructure); + this.dataShared = getShared(dataConfig, dataStructure); this.dummyShared = getShared(dummyConfig, dummyStructure); diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/DataTarget.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/DataTarget.java index 1a9b75c4be..5f4c5dc6f8 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/DataTarget.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/DataTarget.java @@ -21,8 +21,10 @@ import cn.edu.tsinghua.iginx.engine.shared.operator.tag.TagFilter; import java.util.List; import lombok.Value; +import lombok.With; @Value +@With public class DataTarget { /** filter rows, null only if return all rows */ Filter filter; diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/FileStructure.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/FileStructure.java index f182de4769..e312c2ae53 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/FileStructure.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/FileStructure.java @@ -17,8 +17,6 @@ */ package cn.edu.tsinghua.iginx.filestore.struct; -import cn.edu.tsinghua.iginx.engine.shared.operator.filter.FilterType; -import cn.edu.tsinghua.iginx.thrift.AggregateType; import com.typesafe.config.Config; import java.io.Closeable; import java.io.IOException; @@ -31,10 +29,6 @@ public interface FileStructure { Closeable newShared(Config config) throws IOException; - boolean supportFilter(FilterType type); - - boolean supportAggregate(AggregateType type); - FileManager newReader(Path path, Closeable shared) throws IOException; boolean supportWrite(); diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/filesystem/LegacyFilesystem.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/filesystem/LegacyFilesystem.java index b1973e3429..b17d8c3183 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/filesystem/LegacyFilesystem.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/filesystem/LegacyFilesystem.java @@ -20,12 +20,11 @@ import cn.edu.tsinghua.iginx.auth.FilePermissionManager; import cn.edu.tsinghua.iginx.auth.entity.FileAccessType; import cn.edu.tsinghua.iginx.auth.utils.FilePermissionRuleNameFilters; -import cn.edu.tsinghua.iginx.engine.shared.operator.filter.FilterType; import cn.edu.tsinghua.iginx.filestore.struct.FileManager; import cn.edu.tsinghua.iginx.filestore.struct.FileStructure; import cn.edu.tsinghua.iginx.filestore.struct.legacy.filesystem.exec.LocalExecutor; import cn.edu.tsinghua.iginx.filestore.struct.legacy.filesystem.shared.Constant; -import cn.edu.tsinghua.iginx.thrift.AggregateType; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; import com.google.auto.service.AutoService; import com.typesafe.config.Config; import java.io.Closeable; @@ -57,16 +56,6 @@ public Closeable newShared(Config config) throws IOException { return new Shared(config); } - @Override - public boolean supportFilter(FilterType type) { - return true; - } - - @Override - public boolean supportAggregate(AggregateType type) { - return false; - } - @Override public FileManager newReader(Path path, Closeable shared) throws IOException { Shared s = (Shared) shared; @@ -90,8 +79,8 @@ private static class Shared implements Closeable { private final Map params = new HashMap<>(); public Shared(Config config) { - if (config.hasPath(Constant.INIT_ROOT_PREFIX)) { - params.put(Constant.INIT_ROOT_PREFIX, config.getString(Constant.INIT_ROOT_PREFIX)); + if (config.hasPath(FileTreeConfig.Fields.prefix)) { + params.put(Constant.INIT_ROOT_PREFIX, config.getString(FileTreeConfig.Fields.prefix)); } if (config.hasPath(Constant.INIT_INFO_MEMORY_POOL_SIZE)) { params.put( @@ -108,9 +97,6 @@ public Map getParams(Path path) { Map finalParams = new HashMap<>(params); Path absolutePath = checked.toAbsolutePath(); finalParams.put(Constant.INIT_INFO_DUMMY_DIR, absolutePath.toString()); - if (!finalParams.containsKey(Constant.INIT_ROOT_PREFIX)) { - finalParams.put(Constant.INIT_ROOT_PREFIX, absolutePath.getFileName().toString()); - } return Collections.unmodifiableMap(finalParams); } diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/parquet/LegacyParquet.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/parquet/LegacyParquet.java index b86c237f73..3f86c8a71c 100644 --- a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/parquet/LegacyParquet.java +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/legacy/parquet/LegacyParquet.java @@ -17,13 +17,11 @@ */ package cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet; -import cn.edu.tsinghua.iginx.engine.shared.operator.filter.FilterType; import cn.edu.tsinghua.iginx.filestore.struct.FileManager; import cn.edu.tsinghua.iginx.filestore.struct.FileStructure; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.manager.data.DataManager; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.util.Shared; import cn.edu.tsinghua.iginx.filestore.struct.legacy.parquet.util.StorageProperties; -import cn.edu.tsinghua.iginx.thrift.AggregateType; import com.google.auto.service.AutoService; import com.typesafe.config.Config; import java.io.Closeable; @@ -120,16 +118,6 @@ public Closeable newShared(Config config) throws IOException { return Shared.of(storageProperties); } - @Override - public boolean supportFilter(FilterType type) { - return true; - } - - @Override - public boolean supportAggregate(AggregateType type) { - return type == AggregateType.COUNT; - } - @Override public FileManager newReader(Path path, Closeable shared) throws IOException { return new LegacyParquetWrapper(new DataManager((Shared) shared, path), true); diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTree.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTree.java new file mode 100644 index 0000000000..9e4a84b002 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTree.java @@ -0,0 +1,79 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree; + +import cn.edu.tsinghua.iginx.filestore.struct.FileManager; +import cn.edu.tsinghua.iginx.filestore.struct.FileStructure; +import com.google.auto.service.AutoService; +import com.typesafe.config.Config; +import java.io.Closeable; +import java.io.IOException; +import java.nio.file.Path; +import lombok.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@AutoService(FileStructure.class) +public class FileTree implements FileStructure { + + private static final Logger LOGGER = LoggerFactory.getLogger(FileTree.class); + + public static final String NAME = "FileTree"; + + @Override + public String getName() { + return NAME; + } + + @Value + private static class Shared implements Closeable { + + FileTreeConfig config; + + @Override + public void close() throws IOException {} + } + + @Override + public String toString() { + return NAME; + } + + @Override + public Closeable newShared(Config config) throws IOException { + LOGGER.debug("Create shared instance with config: {}", config); + FileTreeConfig fileTreeConfig = FileTreeConfig.of(config); + return new Shared(fileTreeConfig); + } + + @Override + public FileManager newReader(Path path, Closeable shared) throws IOException { + LOGGER.debug("Create reader with path: {}", path); + return new FileTreeManager(path, ((Shared) shared).getConfig()); + } + + @Override + public boolean supportWrite() { + return false; + } + + @Override + public FileManager newWriter(Path path, Closeable shared) throws IOException { + throw new UnsupportedOperationException(); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeConfig.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeConfig.java new file mode 100644 index 0000000000..dfd415457c --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeConfig.java @@ -0,0 +1,74 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree; + +import cn.edu.tsinghua.iginx.filestore.common.AbstractConfig; +import com.typesafe.config.*; +import com.typesafe.config.Optional; +import java.util.*; +import lombok.*; +import lombok.experimental.FieldNameConstants; + +@Data +@With +@EqualsAndHashCode(callSuper = true) +@FieldNameConstants +@AllArgsConstructor +@NoArgsConstructor +public class FileTreeConfig extends AbstractConfig { + + @Optional String dot = "\\"; + + @Optional String prefix = null; + + @Optional Map formats = Collections.emptyMap(); + + @Override + public List validate() { + List problems = new ArrayList<>(); + if (validateNotNull(problems, Fields.dot, dot)) { + if (dot.contains(".")) { + problems.add(new InvalidFieldValidationProblem(Fields.dot, "dot cannot contain '.'")); + } + } + return problems; + } + + @SuppressWarnings("unchecked") + public static FileTreeConfig of(Config config) { + Config withoutFormats = config.withoutPath(Fields.formats); + FileTreeConfig fileTreeConfig = of(withoutFormats, FileTreeConfig.class); + + if (config.hasPath(Fields.formats)) { + ConfigValue value = config.getValue(Fields.formats); + if (value.valueType() == ConfigValueType.OBJECT) { + Map formatsRawConfig = (Map) value.unwrapped(); + Map formats = new HashMap<>(); + for (Map.Entry entry : formatsRawConfig.entrySet()) { + if (entry.getValue() instanceof Map) { + formats.put( + entry.getKey(), ConfigFactory.parseMap((Map) entry.getValue())); + } + } + fileTreeConfig.setFormats(formats); + } + } + + return fileTreeConfig; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeManager.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeManager.java new file mode 100644 index 0000000000..4b896518ab --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeManager.java @@ -0,0 +1,179 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.data.write.DataView; +import cn.edu.tsinghua.iginx.filestore.common.IginxPaths; +import cn.edu.tsinghua.iginx.filestore.common.RowStreams; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.struct.FileManager; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj.UnionFormatTree; +import cn.edu.tsinghua.iginx.filestore.thrift.DataBoundary; +import cn.edu.tsinghua.iginx.metadata.entity.KeyInterval; +import cn.edu.tsinghua.iginx.thrift.AggregateType; +import cn.edu.tsinghua.iginx.utils.StringUtils; +import com.google.common.base.Strings; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FileTreeManager implements FileManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(FileTreeManager.class); + + private final Path path; + private final FileTreeConfig config; + private final Querier.Builder builder; + + public FileTreeManager(Path path, FileTreeConfig config) throws IOException { + LOGGER.debug("Create Manager in {} with {}", path, config); + this.path = Objects.requireNonNull(path).normalize(); + this.config = config; + this.builder = new UnionFormatTree().create(config.getPrefix(), path, config); + } + + @Override + public DataBoundary getBoundary(@Nullable String requirePrefix) throws IOException { + LOGGER.debug("Getting boundary for {} with prefix {}", path, requirePrefix); + + DataBoundary boundary = new DataBoundary(); + + Map.Entry targetPrefixAndPath = getTargetPrefixAndPath(requirePrefix); + if (targetPrefixAndPath != null) { + String prefix = targetPrefixAndPath.getKey(); + Path afterPrefix = targetPrefixAndPath.getValue(); + + Map.Entry columnsInterval = getColumnsInterval(afterPrefix); + if (columnsInterval != null) { + boundary.setStartColumn(IginxPaths.join(prefix, columnsInterval.getKey())); + String endColumn = IginxPaths.join(prefix, columnsInterval.getValue()); + boundary.setEndColumn(endColumn); + if (endColumn != null) { + boundary.setEndColumn(StringUtils.nextString(endColumn)); + } + boundary.setStartKey(KeyInterval.getDefaultKeyInterval().getStartKey()); + boundary.setEndKey(KeyInterval.getDefaultKeyInterval().getEndKey()); + } + } + + return boundary; + } + + @Nullable + private Map.Entry getTargetPrefixAndPath(@Nullable String requiredPrefix) { + String embeddedStringPrefix = IginxPaths.toStringPrefix(config.getPrefix()); + String requiredStringPrefix = IginxPaths.toStringPrefix(requiredPrefix); + String commonStringPrefix = Strings.commonPrefix(embeddedStringPrefix, requiredStringPrefix); + + if (commonStringPrefix.length() + < Math.min(embeddedStringPrefix.length(), requiredStringPrefix.length())) { + LOGGER.warn("Prefix mismatch: {} vs {}", embeddedStringPrefix, requiredStringPrefix); + return null; + } + + String requiredStringPrefixWithoutCommon = + requiredStringPrefix.substring(commonStringPrefix.length()); + String requiredPrefixWithoutCommon = + IginxPaths.fromStringPrefix(requiredStringPrefixWithoutCommon); + + String targetPrefix = + IginxPaths.fromStringPrefix(embeddedStringPrefix + requiredStringPrefixWithoutCommon); + Path afterPrefix = + path.resolve( + IginxPaths.toFilePath( + requiredPrefixWithoutCommon, config.getDot(), path.getFileSystem())); + + return new AbstractMap.SimpleImmutableEntry<>(targetPrefix, afterPrefix); + } + + @Nullable + private Map.Entry getColumnsInterval(Path path) throws IOException { + if (Files.isRegularFile(path)) { + LOGGER.info("Path is a file: {}", path); + return new AbstractMap.SimpleImmutableEntry<>(null, null); + } + + try (Stream childStreamForMin = Files.list(path); + Stream childStreamForMax = Files.list(path)) { + String minChild = + childStreamForMin + .map(Path::getFileName) + .map(p -> IginxPaths.get(p, config.getDot())) + .min(Comparator.naturalOrder()) + .orElse(null); + String maxChild = + childStreamForMax + .map(Path::getFileName) + .map(p -> IginxPaths.get(p, config.getDot())) + .max(Comparator.naturalOrder()) + .orElse(null); + + if (minChild == null || maxChild == null) { + return null; + } + + LOGGER.debug("Start column: {}", minChild); + LOGGER.debug("End column: {}", maxChild); + return new AbstractMap.SimpleImmutableEntry<>(minChild, StringUtils.nextString(maxChild)); + } catch (NoSuchFileException e) { + LOGGER.warn("Directory does not exist: {}", path, e); + return null; + } + } + + @Override + public RowStream query(DataTarget target, @Nullable AggregateType aggregate) throws IOException { + if (aggregate != null) { + throw new UnsupportedOperationException("Aggregate not supported"); + } + LOGGER.debug("Querying {} ", target); + try (Querier querier = builder.build(target)) { + LOGGER.debug("Querier is built as: {}", querier); + List streams = querier.query(); + try { + return RowStreams.union(streams); + } catch (PhysicalException e) { + throw new IOException(e); + } + } + } + + @Override + public void close() throws IOException { + builder.close(); + } + + @Override + public void delete(DataTarget target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void insert(DataView data) throws IOException { + throw new UnsupportedOperationException(); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/AbstractQuerier.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/AbstractQuerier.java new file mode 100644 index 0000000000..4c5beb3979 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/AbstractQuerier.java @@ -0,0 +1,45 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query; + +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import java.nio.file.Path; +import lombok.Getter; + +@Getter +public abstract class AbstractQuerier implements Querier { + + private final Path path; + private final String prefix; + private final DataTarget target; + + protected AbstractQuerier() { + this(null, null, null); + } + + protected AbstractQuerier(Path path, String prefix, DataTarget target) { + this.path = path; + this.prefix = prefix; + this.target = target; + } + + @Override + public String toString() { + return getClass().getSimpleName() + "://" + path + "?prefix=" + prefix + "&target=" + target; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/Querier.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/Querier.java new file mode 100644 index 0000000000..52f130e8c7 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/Querier.java @@ -0,0 +1,40 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import java.io.Closeable; +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import javax.annotation.Nullable; + +public interface Querier extends Closeable { + + List query() throws IOException; + + interface Builder extends Closeable { + Querier build(DataTarget parentTarget) throws IOException; + + interface Factory { + Builder create(@Nullable String prefix, Path path, FileTreeConfig config) throws IOException; + } + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/Queriers.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/Queriers.java new file mode 100644 index 0000000000..3e5be1d46a --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/Queriers.java @@ -0,0 +1,127 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.filestore.common.Closeables; +import cn.edu.tsinghua.iginx.filestore.common.Filters; +import cn.edu.tsinghua.iginx.filestore.common.RowStreams; +import cn.edu.tsinghua.iginx.filestore.common.Strings; +import com.google.common.collect.Iterables; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +public class Queriers { + private Queriers() {} + + static class EmptyQuerier extends AbstractQuerier { + @Override + public void close() {} + + @Override + public String toString() { + return "EmptyQuerier{}"; + } + + @Override + public List query() throws IOException { + return Collections.emptyList(); + } + } + + private static final Querier EMPTY_QUERIER = new EmptyQuerier(); + + public static Querier empty() { + return EMPTY_QUERIER; + } + + static class FilteredQuerier extends AbstractQuerier { + private final Querier querier; + private final Filter filter; + + FilteredQuerier(Querier querier, Filter filter) { + this.querier = union(querier); + this.filter = Objects.requireNonNull(filter); + } + + @Override + public void close() throws IOException { + querier.close(); + } + + @Override + public String toString() { + return super.toString() + + "&filter=" + + filter + + "&querier=" + + Strings.shiftWithNewline(querier.toString()); + } + + @Override + public List query() throws IOException { + List rowStreams = querier.query(); + assert rowStreams.size() == 1; + return Collections.singletonList(RowStreams.filtered(rowStreams.get(0), filter)); + } + } + + public static Querier filtered(Querier querier, Filter filter) { + if (Filters.isTrue(filter)) { + return querier; + } + return new FilteredQuerier(querier, filter); + } + + static class UnionQuerier implements Querier { + private final Querier querier; + + UnionQuerier(Querier querier) { + this.querier = Objects.requireNonNull(querier); + } + + @Override + public void close() throws IOException { + querier.close(); + } + + @Override + public String toString() { + return super.toString() + "&querier=" + Strings.shiftWithNewline(querier.toString()); + } + + @Override + public List query() throws IOException { + List rowStreams = querier.query(); + try { + return Collections.singletonList(RowStreams.union(rowStreams)); + } catch (PhysicalException e) { + Closeables.close(Iterables.transform(rowStreams, Closeables::closeAsIOException)); + throw new IOException(e); + } + } + } + + public static Querier union(Querier querier) { + return new UnionQuerier(querier); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerier.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerier.java new file mode 100644 index 0000000000..fd01fb6b80 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerier.java @@ -0,0 +1,95 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.BoolFilter; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.filestore.common.Filters; +import cn.edu.tsinghua.iginx.filestore.common.Patterns; +import cn.edu.tsinghua.iginx.filestore.common.RowStreams; +import cn.edu.tsinghua.iginx.filestore.common.Strings; +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.AbstractQuerier; +import cn.edu.tsinghua.iginx.thrift.DataType; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import javax.annotation.Nullable; + +class FormatQuerier extends AbstractQuerier { + + private final FileFormat.Reader reader; + private final List patterns; + private final Filter filter; + + FormatQuerier(Path path, String prefix, DataTarget target, FileFormat.Reader reader) { + super(path, prefix, target); + this.reader = Objects.requireNonNull(reader); + this.patterns = Patterns.nonNull(target.getPatterns()); + this.filter = Filters.isTrue(target.getFilter()) ? new BoolFilter(true) : target.getFilter(); + } + + @Override + public void close() throws IOException { + reader.close(); + } + + @Override + public String toString() { + return super.toString() + "&reader=" + Strings.shiftWithNewline(reader.toString()); + } + + @Override + public List query() throws IOException { + RowStream rowStream = doQuery(); + if (rowStream == null) { + return Collections.emptyList(); + } else { + return Collections.singletonList(rowStream); + } + } + + @Nullable + private RowStream doQuery() throws IOException { + Map schema = reader.find(patterns); + if (schema.isEmpty()) { + return null; + } + if (Filters.isFalse(filter)) { + List fields = new ArrayList<>(); + schema.forEach((name, type) -> fields.add(new Field(name, type))); + Header header = new Header(Field.KEY, fields); + return RowStreams.empty(header); + } + Set fields = Filters.getPaths(filter); + if (fields.isEmpty()) { + return reader.read(new ArrayList<>(schema.keySet()), filter); + } + Map allSchema = reader.find(fields); + Filter superSetFilter = Filters.matchWildcard(filter, allSchema.keySet()); + RowStream rowStream = reader.read(new ArrayList<>(schema.keySet()), superSetFilter); + if (Filters.equals(filter, superSetFilter)) { + return rowStream; + } + return RowStreams.filtered(rowStream, filter); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerierBuilder.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerierBuilder.java new file mode 100644 index 0000000000..9613752701 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerierBuilder.java @@ -0,0 +1,51 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder; +import com.typesafe.config.Config; +import java.io.IOException; +import java.nio.file.Path; +import javax.annotation.Nullable; + +class FormatQuerierBuilder implements Builder { + + private final String prefix; + private final Path path; + private final FileFormat format; + private final Config config; + + FormatQuerierBuilder(@Nullable String prefix, Path path, FileFormat format, Config config) { + this.format = format; + this.prefix = prefix; + this.path = path; + this.config = config; + } + + @Override + public void close() throws IOException {} + + @Override + public Querier build(DataTarget subTarget) throws IOException { + FileFormat.Reader reader = format.newReader(prefix, path, config); + return new FormatQuerier(path, prefix, subTarget, reader); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerierBuilderFactory.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerierBuilderFactory.java new file mode 100644 index 0000000000..a43c31ccf8 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/FormatQuerierBuilderFactory.java @@ -0,0 +1,58 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.filestore.format.FileFormat; +import cn.edu.tsinghua.iginx.filestore.format.FileFormatManager; +import cn.edu.tsinghua.iginx.filestore.format.raw.RawFormat; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder.Factory; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import java.nio.file.Path; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FormatQuerierBuilderFactory implements Factory { + + private static final Logger LOGGER = LoggerFactory.getLogger(FormatQuerierBuilderFactory.class); + + @Override + public Builder create(@Nullable String prefix, Path path, FileTreeConfig config) { + String extension = getExtension(path); + FileFormat format = + FileFormatManager.getInstance().getByExtension(extension, RawFormat.INSTANCE); + Config configForFormat = + config.getFormats().getOrDefault(format.getName(), ConfigFactory.empty()); + LOGGER.debug( + "create {} querier for {} at '{}' with {}, ", format, path, prefix, configForFormat); + return new FormatQuerierBuilder(prefix, path, format, configForFormat); + } + + @Nullable + private static String getExtension(Path path) { + String fileName = path.getFileName().toString(); + int dotIndex = fileName.lastIndexOf('.'); + if (dotIndex == -1) { + return null; + } + return fileName.substring(dotIndex + 1); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerier.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerier.java new file mode 100644 index 0000000000..6af7e0d884 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerier.java @@ -0,0 +1,65 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.filestore.common.Closeables; +import cn.edu.tsinghua.iginx.filestore.common.Strings; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.AbstractQuerier; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +class UnionDirectoryQuerier extends AbstractQuerier { + + private final List queriers; + + UnionDirectoryQuerier(Path path, String prefix, DataTarget target, List subQueriers) { + super(path, prefix, target); + queriers = Objects.requireNonNull(subQueriers); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(super.toString()).append("&queriers="); + for (Querier querier : queriers) { + sb.append(Strings.shiftWithNewline(querier.toString())); + } + return sb.toString(); + } + + @Override + public void close() throws IOException { + Closeables.close(queriers); + queriers.clear(); + } + + @Override + public List query() throws IOException { + List rowStreams = new ArrayList<>(); + for (Querier querier : queriers) { + rowStreams.addAll(querier.query()); + } + return rowStreams; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerierBuilder.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerierBuilder.java new file mode 100644 index 0000000000..4820e96d4c --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerierBuilder.java @@ -0,0 +1,160 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.filestore.common.Closeables; +import cn.edu.tsinghua.iginx.filestore.common.Filters; +import cn.edu.tsinghua.iginx.filestore.common.IginxPaths; +import cn.edu.tsinghua.iginx.filestore.common.Patterns; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Queriers; +import java.io.IOException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class UnionDirectoryQuerierBuilder implements Builder { + + private static final Logger LOGGER = LoggerFactory.getLogger(UnionDirectoryQuerierBuilder.class); + + private final String prefix; + private final Path path; + private final Factory factory; + private final FileTreeConfig config; + + UnionDirectoryQuerierBuilder( + @Nullable String prefix, Path path, Factory factory, FileTreeConfig config) { + this.prefix = prefix; + this.path = path; + this.factory = factory; + this.config = config; + } + + @Override + public void close() throws IOException {} + + @Override + public String toString() { + return "TreeJoinQuerierBuilder{" + + "prefix='" + + prefix + + '\'' + + ", path=" + + path + + ", factory=" + + factory + + ", config=" + + config + + '}'; + } + + @Override + public Querier build(DataTarget target) throws IOException { + LOGGER.debug("{} enter {} at '{}'", target, path, prefix); + + Map matchedChildren = matchedChildren(target); + + boolean needPostFilter = false; + List subQueriers = new ArrayList<>(); + try { + for (Map.Entry entry : matchedChildren.entrySet()) { + String subPrefix = entry.getKey(); + Path subPath = entry.getValue(); + + DataTarget subTarget = extractTarget(target, subPrefix); + try (Builder subBuilder = factory.create(subPrefix, subPath, config)) { + Querier subQuerier = subBuilder.build(subTarget); + subQueriers.add(subQuerier); + } + if (!Filters.match(target.getFilter(), Filters.startWith(subPrefix))) { + needPostFilter = true; + } + } + } catch (IOException e) { + Closeables.close(subQueriers); + throw e; + } + + UnionDirectoryQuerier unionDirectoryQuerier = + new UnionDirectoryQuerier(path, prefix, target, subQueriers); + if (!needPostFilter) { + return unionDirectoryQuerier; + } + LOGGER.debug("set post filter for {}", target); + return Queriers.filtered(unionDirectoryQuerier, target.getFilter()); + } + + private DataTarget extractTarget(DataTarget target, String subPrefix) { + List subPatterns = Patterns.filterByPrefix(target.getPatterns(), subPrefix); + Filter subFilter = Filters.superSet(target.getFilter(), Filters.startWith(subPrefix)); + return target.withPatterns(subPatterns).withFilter(subFilter); + } + + private String subPrefix(String prefix, Path subpath) { + return IginxPaths.join(prefix, IginxPaths.get(subpath.getFileName(), config.getDot())); + } + + private Map matchedChildren(DataTarget target) throws IOException { + HashMap matchedChildren = new LinkedHashMap<>(); + for (String pattern : Patterns.nullToAll(target.getPatterns())) { + if (!Patterns.startsWith(pattern, prefix)) { + continue; + } + String patternSuffix = Patterns.suffix(pattern, prefix); + String[] subPatterns = IginxPaths.split(patternSuffix); + if (subPatterns.length == 0) { + continue; + } + + String nextPatternNode = subPatterns[0]; + if (Patterns.isWildcard(nextPatternNode)) { + return allChildren(); + } + + Path relativePath = + IginxPaths.toFilePath(nextPatternNode, config.getDot(), path.getFileSystem()); + Path subpath = path.resolve(relativePath); + if (!Files.exists(subpath)) { + continue; + } + + String subPrefix = subPrefix(prefix, subpath); + matchedChildren.put(subPrefix, subpath); + } + return matchedChildren; + } + + private Map allChildren() throws IOException { + HashMap matchedChildren = new LinkedHashMap<>(); + try (DirectoryStream children = Files.newDirectoryStream(path)) { + for (Path child : children) { + String subPrefix = subPrefix(prefix, child); + matchedChildren.put(subPrefix, child); + } + } + return matchedChildren; + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerierBuilderFactory.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerierBuilderFactory.java new file mode 100644 index 0000000000..98ec03c88d --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionDirectoryQuerierBuilderFactory.java @@ -0,0 +1,48 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder.Factory; +import java.nio.file.Path; +import java.util.Objects; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class UnionDirectoryQuerierBuilderFactory implements Factory { + + private static final Logger LOGGER = + LoggerFactory.getLogger(UnionDirectoryQuerierBuilderFactory.class); + + private final Factory factory; + + public UnionDirectoryQuerierBuilderFactory(Factory factory) { + this.factory = Objects.requireNonNull(factory); + if (factory == this) { + throw new IllegalArgumentException("Factory cannot be itself"); + } + } + + @Override + public Builder create(@Nullable String prefix, Path path, FileTreeConfig config) { + LOGGER.debug("create tree join querier for {} at '{}' with {}", path, prefix, config); + return new UnionDirectoryQuerierBuilder(prefix, path, factory, config); + } +} diff --git a/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionFormatTree.java b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionFormatTree.java new file mode 100644 index 0000000000..5f76bf38c1 --- /dev/null +++ b/dataSource/filestore/src/main/java/cn/edu/tsinghua/iginx/filestore/struct/tree/query/ftj/UnionFormatTree.java @@ -0,0 +1,46 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree.query.ftj; + +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder; +import cn.edu.tsinghua.iginx.filestore.struct.tree.query.Querier.Builder.Factory; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import javax.annotation.Nullable; + +public class UnionFormatTree implements Factory { + + private final Factory forRegularFile = new FormatQuerierBuilderFactory(); + private final Factory forDirectory = new UnionDirectoryQuerierBuilderFactory(this); + + @Override + public Builder create(@Nullable String prefix, Path path, FileTreeConfig config) + throws IOException { + if (Files.isDirectory(path)) { + return forDirectory.create(prefix, path, config); + } else if (Files.isRegularFile(path)) { + return forRegularFile.create(prefix, path, config); + } else if (!Files.exists(path)) { + throw new IOException("file does not exist: " + path); + } else { + throw new IllegalArgumentException("Unsupported file type: " + path); + } + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/common/FiltersTest.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/common/FiltersTest.java new file mode 100644 index 0000000000..434ca38096 --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/common/FiltersTest.java @@ -0,0 +1,43 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.common; + +import cn.edu.tsinghua.iginx.engine.shared.data.Value; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.*; +import java.util.Arrays; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class FiltersTest { + + @Test + void testSuperSet() { + AndFilter root = + new AndFilter( + Arrays.asList( + new OrFilter(Arrays.asList(new KeyFilter(Op.GE, 1), new KeyFilter(Op.LE, 3))), + new OrFilter( + Arrays.asList( + new ValueFilter("name", Op.E, new Value("Alice")), + new ValueFilter("age", Op.G, new Value(18)))))); + Filter superSet = Filters.superSet(root, Filters.nonKeyFilter()); + Filter expected = new OrFilter(Arrays.asList(new KeyFilter(Op.GE, 1), new KeyFilter(Op.LE, 3))); + + Assertions.assertEquals(expected, superSet); + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetTestUtils.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetTestUtils.java new file mode 100644 index 0000000000..a7bfd6f848 --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/format/parquet/ParquetTestUtils.java @@ -0,0 +1,41 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.format.parquet; + +import cn.edu.tsinghua.iginx.engine.physical.memory.execute.Table; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.filestore.test.DataValidator; +import com.google.common.io.MoreFiles; +import java.io.IOException; +import java.nio.file.Path; +import shaded.iginx.org.apache.parquet.schema.MessageType; + +public class ParquetTestUtils { + public static void createFile(Path path, Table table) throws IOException { + MoreFiles.createParentDirectories(path); + MessageType schema = ProjectUtils.toMessageType(table.getHeader()); + IParquetWriter.Builder writerBuilder = IParquetWriter.builder(path, schema); + try (IParquetWriter writer = writerBuilder.build()) { + for (Row row : table.getRows()) { + Row stringAsBinary = DataValidator.withStringAsBinary(row); + IRecord record = ProjectUtils.toRecord(stringAsBinary); + writer.write(record); + } + } + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/AbstractDummyTest.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/AbstractDummyTest.java new file mode 100644 index 0000000000..647241e593 --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/AbstractDummyTest.java @@ -0,0 +1,134 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.service.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.BoolFilter; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Filter; +import cn.edu.tsinghua.iginx.filestore.common.FileStoreException; +import cn.edu.tsinghua.iginx.filestore.service.Service; +import cn.edu.tsinghua.iginx.filestore.struct.DataTarget; +import cn.edu.tsinghua.iginx.filestore.test.DataValidator; +import cn.edu.tsinghua.iginx.filestore.thrift.DataBoundary; +import cn.edu.tsinghua.iginx.filestore.thrift.DataUnit; +import cn.edu.tsinghua.iginx.metadata.entity.ColumnsInterval; +import com.google.common.io.MoreFiles; +import com.google.common.io.RecursiveDeleteOption; +import com.typesafe.config.Config; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; +import javax.annotation.Nullable; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractDummyTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractDummyTest.class); + + protected final Path root; + private final StorageConfig config; + protected final DataUnit unit = new DataUnit(true); + + protected AbstractDummyTest(String type, Config config, String rootFileName) { + this.root = Paths.get("target", "test", UUID.randomUUID().toString(), rootFileName); + this.config = new StorageConfig(root.toString(), type, config); + } + + protected Service service; + + protected DataBoundary getBoundary(@Nullable String prefix) throws FileStoreException { + Map units = service.getUnits(prefix); + LOGGER.info("units: {}", units); + assertEquals(units.keySet(), Collections.singleton(unit)); + DataBoundary boundary = units.get(unit); + LOGGER.info("boundary of dummy data: {}", boundary); + return boundary; + } + + protected Header getSchema(String... pattern) throws PhysicalException { + return getSchema(Arrays.asList(pattern)); + } + + protected Header getSchema(List pattern) throws PhysicalException { + DataTarget target = new DataTarget(new BoolFilter(false), pattern, null); + LOGGER.info("get schema with {}", pattern); + try (RowStream stream = service.query(unit, target, null)) { + Header header = stream.getHeader(); + Header sorted = DataValidator.sort(header); + LOGGER.info("header with pattern {}: {}", pattern, sorted); + return sorted; + } + } + + protected List query(List pattern) throws PhysicalException { + return query(pattern, null); + } + + protected List query(List pattern, Filter filter) throws PhysicalException { + DataTarget target = new DataTarget(filter, pattern, null); + LOGGER.info("query with {} and {}", pattern, filter); + try (RowStream stream = service.query(unit, target, null)) { + List rows = DataValidator.toList(stream); + List normalized = DataValidator.normalize(rows); + LOGGER.info("rows with pattern {} and filter {}: {}", pattern, filter, normalized); + return normalized; + } + } + + protected static boolean isEmpty(DataBoundary boundary) { + return Objects.equals(new DataBoundary(), boundary); + } + + protected static boolean inBounds(DataBoundary boundary, String prefix) { + if (isEmpty(boundary)) { + return false; + } + ColumnsInterval columnsInterval = + new ColumnsInterval(boundary.getStartColumn(), boundary.getEndColumn()); + return columnsInterval.isContain(prefix); + } + + @BeforeEach + public void setUp() throws IOException { + MoreFiles.createParentDirectories(root); + } + + protected void reset() throws IOException, FileStoreException { + service = new StorageService(null, config); + } + + @AfterEach + public void tearDown() throws IOException, FileStoreException { + if (service != null) { + service.close(); + } + if (Files.exists(root)) { + MoreFiles.deleteRecursively(root, RecursiveDeleteOption.ALLOW_INSECURE); + } + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/FileTreeDummyTest.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/FileTreeDummyTest.java new file mode 100644 index 0000000000..2f04130cd2 --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/FileTreeDummyTest.java @@ -0,0 +1,439 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.service.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.shared.data.Value; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.AndFilter; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.KeyFilter; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Op; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.ValueFilter; +import cn.edu.tsinghua.iginx.filestore.common.Configs; +import cn.edu.tsinghua.iginx.filestore.format.raw.RawFormat; +import cn.edu.tsinghua.iginx.filestore.format.raw.RawReaderConfig; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTree; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import cn.edu.tsinghua.iginx.filestore.test.RowsBuilder; +import cn.edu.tsinghua.iginx.filestore.thrift.DataBoundary; +import cn.edu.tsinghua.iginx.thrift.DataType; +import com.google.common.io.MoreFiles; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class FileTreeDummyTest extends AbstractDummyTest { + + protected static final String DIR_NAME = "home"; + + public FileTreeDummyTest() { + super(FileTree.NAME, getConfig(), DIR_NAME); + } + + private static Config getConfig() { + Map map = new HashMap<>(); + Configs.put( + map, 8, FileTreeConfig.Fields.formats, RawFormat.NAME, RawReaderConfig.Fields.pageSize); + Configs.put(map, DIR_NAME, FileTreeConfig.Fields.prefix); + return ConfigFactory.parseMap(map); + } + + @Test + public void testEmptyDir() throws PhysicalException, IOException { + Files.createDirectories(root); + reset(); + + { + DataBoundary boundary = getBoundary(null); + assertEquals(new DataBoundary(), boundary); + } + { + DataBoundary boundary = getBoundary(DIR_NAME); + assertEquals(new DataBoundary(), boundary); + } + { + DataBoundary boundary = getBoundary("aaa"); + assertEquals(new DataBoundary(), boundary); + } + + { + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(new Header(Field.KEY, Collections.emptyList()), schema); + List rows = query(patterns); + assertEquals(Collections.emptyList(), rows); + } + } + + private static void createFile(Path path, String content) throws IOException { + MoreFiles.createParentDirectories(path); + try (OutputStream out = Files.newOutputStream(path, StandardOpenOption.CREATE)) { + out.write(content.getBytes()); + } + } + + @Test + public void testSingleFile() throws PhysicalException, IOException { + createFile(root, "first line: hello world"); + reset(); + + { + DataBoundary boundary = getBoundary(null); + assertTrue(inBounds(boundary, DIR_NAME)); + } + + { + DataBoundary boundary = getBoundary(DIR_NAME); + assertTrue(inBounds(boundary, DIR_NAME)); + } + + { + DataBoundary boundary = getBoundary("error"); + assertEquals(new DataBoundary(), boundary); + } + + { + DataBoundary boundary = getBoundary(null); + assertTrue(inBounds(boundary, DIR_NAME)); + } + + { + DataBoundary boundary = getBoundary(DIR_NAME); + assertTrue(inBounds(boundary, DIR_NAME)); + } + + { + DataBoundary boundary = getBoundary("error"); + assertEquals(new DataBoundary(), boundary); + } + + List expectedRows = + new RowsBuilder(DIR_NAME).add(0, "first li").add(1, "ne: hell").add(2, "o world").build(); + + { + List patterns = null; + Header schema = getSchema(patterns); + Field field = new Field(DIR_NAME, DataType.BINARY); + assertEquals(new Header(Field.KEY, Collections.singletonList(field)), schema); + List rows = query(patterns); + assertEquals(expectedRows, rows); + } + + { + List patterns = Collections.singletonList(DIR_NAME); + Header schema = getSchema(patterns); + Field field = new Field(DIR_NAME, DataType.BINARY); + assertEquals(new Header(Field.KEY, Collections.singletonList(field)), schema); + List rows = query(patterns); + assertEquals(expectedRows, rows); + } + + { + List patterns = Collections.singletonList("error"); + Header schema = getSchema(patterns); + assertEquals(new Header(Field.KEY, Collections.emptyList()), schema); + List rows = query(patterns); + assertEquals(Collections.emptyList(), rows); + } + } + + private static Header headerOf(String... fields) { + List list = new ArrayList<>(); + for (String field : fields) { + list.add(new Field(field, DataType.BINARY)); + } + return new Header(Field.KEY, list); + } + + private void createNestedFiles() throws IOException { + createFile(root.resolve("LICENSE"), "Apache License"); + createFile(root.resolve("README.md"), "this directory is for test"); + createFile(root.resolve("src/main/java/Main.java"), "public class Main {\n}"); + createFile(root.resolve("src/main/java/Tool.java"), "public class Tool {\n}"); + createFile(root.resolve("src/main/resources/config.properties"), "ip=127.0.0.1\nport=6667"); + createFile(root.resolve("src/test/java/Test.java"), "public class Test {\n}"); + Files.createDirectories(root.resolve("src/main/thrift")); + } + + @Test + public void testNullPrefix() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + DataBoundary boundary = getBoundary(null); + assertTrue(inBounds(boundary, "home.LICENSE")); + assertTrue(inBounds(boundary, "home.README\\md")); + assertTrue(inBounds(boundary, "home.src.main.java.Main\\java")); + assertTrue(inBounds(boundary, "home.src.main.java.Tool\\java")); + assertTrue(inBounds(boundary, "home.src.main.resources.config\\properties")); + assertTrue(inBounds(boundary, "home.src.test.java.Test\\java")); + } + + @Test + public void testSinglePrefix() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + DataBoundary boundary = getBoundary(DIR_NAME); + assertTrue(inBounds(boundary, "home.LICENSE")); + assertTrue(inBounds(boundary, "home.README\\md")); + assertTrue(inBounds(boundary, "home.src.main.java.Main\\java")); + assertTrue(inBounds(boundary, "home.src.main.java.Tool\\java")); + assertTrue(inBounds(boundary, "home.src.main.resources.config\\properties")); + assertTrue(inBounds(boundary, "home.src.test.java.Test\\java")); + } + + @Test + public void testNestedPrefix() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + DataBoundary boundary = getBoundary(DIR_NAME + ".src.main.java"); + assertFalse(inBounds(boundary, "home.LICENSE")); + assertFalse(inBounds(boundary, "home.README\\md")); + assertTrue(inBounds(boundary, "home.src.main.java.Main\\java")); + assertTrue(inBounds(boundary, "home.src.main.java.Tool\\java")); + assertFalse(inBounds(boundary, "home.src.main.resources.config\\properties")); + assertFalse(inBounds(boundary, "home.src.test.java.Test\\java")); + } + + @Test + public void testEmptyDirPrefix() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + DataBoundary boundary = getBoundary("home.src.main.thrift"); + assertEquals(new DataBoundary(), boundary); + } + + @Test + public void testNotExistedPrefix() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + DataBoundary boundary = getBoundary("error"); + assertEquals(new DataBoundary(), boundary); + } + + @Test + public void testUpperCasePrefix() throws PhysicalException, IOException { + createFile(root.resolve("async.hpp"), "#include "); + createFile(root.resolve("LICENSE"), "Apache License"); + createFile(root.resolve("readme.md"), "this directory is for test"); + reset(); + { + DataBoundary boundary = getBoundary(null); + assertTrue(inBounds(boundary, "home.async\\hpp")); + assertTrue(inBounds(boundary, "home.LICENSE")); + assertTrue(inBounds(boundary, "home.readme\\md")); + } + } + + @ParameterizedTest + @ValueSource( + strings = { + "home.target.flattened", + "home.target.*", + "home.target.*.flattened", + "homee.*", + "temp.flattened", + "temp.flattened.*", + "temp.*.flattened", + "temp.*.flattened.*.flattened", + "", + "home.src..*", + "..home.src.*", + "home.src.*.." + }) + public void testQueryNotExisted(String pattern) throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + List patterns = Collections.singletonList(pattern); + Header schema = getSchema(patterns); + assertEquals(new Header(Field.KEY, Collections.emptyList()), schema); + List rows = query(patterns); + assertEquals(Collections.emptyList(), rows); + } + + @Test + public void testQueryOneColumn() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + + Header justMainHeader = getSchema("home.src.main.java.Main\\java"); + List justMainData = + new RowsBuilder("home.src.main.java.Main\\java") + .add(0, "public c") + .add(1, "lass Mai") + .add(2, "n {\n}") + .build(); + + { + List patterns = Collections.singletonList("home.src.main.java.Main\\java"); + Header schema = getSchema(patterns); + assertEquals(justMainHeader, schema); + List rows = query(patterns); + assertEquals(justMainData, rows); + } + + { + List patterns = Collections.singletonList("*.Main\\java"); + Header schema = getSchema(patterns); + assertEquals(justMainHeader, schema); + List rows = query(patterns); + assertEquals(justMainData, rows); + } + + { + List patterns = Collections.singletonList("*.main.*.Main\\java"); + Header schema = getSchema(patterns); + assertEquals(justMainHeader, schema); + List rows = query(patterns); + assertEquals(justMainData, rows); + } + + { + List patterns = Collections.singletonList("home.*.main.*.Main\\java"); + Header schema = getSchema(patterns); + assertEquals(justMainHeader, schema); + List rows = query(patterns); + assertEquals(justMainData, rows); + } + } + + @Test + public void testQueryAllColumn() throws PhysicalException, IOException { + createNestedFiles(); + reset(); + // query all column + + Header allHeader = + headerOf( + "home.LICENSE", + "home.README\\md", + "home.src.main.java.Main\\java", + "home.src.main.java.Tool\\java", + "home.src.main.resources.config\\properties", + "home.src.test.java.Test\\java"); + + List allData = + new RowsBuilder( + "home.LICENSE", + "home.README\\md", + "home.src.main.java.Main\\java", + "home.src.main.java.Tool\\java", + "home.src.main.resources.config\\properties", + "home.src.test.java.Test\\java") + .add(0, "Apache L", "this dir", "public c", "public c", "ip=127.0", "public c") + .add(1, "icense", "ectory i", "lass Mai", "lass Too", ".0.1\npor", "lass Tes") + .add(2, null, "s for te", "n {\n}", "l {\n}", "t=6667", "t {\n}") + .add(3, null, "st", null, null, null, null) + .build(); + + { + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(allHeader, schema); + List rows = query(patterns); + assertEquals(allData, rows); + } + + { + List patterns = Collections.singletonList("*"); + Header schema = getSchema(patterns); + assertEquals(allHeader, schema); + List rows = query(patterns); + assertEquals(allData, rows); + } + + { + List patterns = Collections.singletonList("*.*"); + Header schema = getSchema(patterns); + assertEquals(allHeader, schema); + List rows = query(patterns); + assertEquals(allData, rows); + } + + { + List patterns = Collections.singletonList("home.*"); + Header schema = getSchema(patterns); + assertEquals(allHeader, schema); + List rows = query(patterns); + assertEquals(allData, rows); + } + + { + List allDataKey1to2 = Arrays.asList(allData.get(1), allData.get(2)); + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(allHeader, schema); + List rows = + query( + patterns, + new AndFilter(Arrays.asList(new KeyFilter(Op.GE, 1), new KeyFilter(Op.L, 3)))); + assertEquals(allDataKey1to2, rows); + } + + // test filter + List allDataKey0 = Collections.singletonList(allData.get(0)); + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(allHeader, schema); + { + List rows = + query( + patterns, + new ValueFilter( + "home.src.main.resources.config\\properties", Op.LIKE, new Value(".*[.].*"))); + assertEquals(allDataKey0, rows); + } + { + List rows = query(patterns, new ValueFilter("*", Op.LIKE, new Value(".*[.].*"))); + assertEquals(allDataKey0, rows); + } + { + List rows = query(patterns, new ValueFilter("home.*", Op.LIKE, new Value(".*[.].*"))); + assertEquals(allDataKey0, rows); + } + { + List rows = + query(patterns, new ValueFilter("*.config\\properties", Op.LIKE, new Value(".*[.].*"))); + assertEquals(allDataKey0, rows); + } + { + List rows = + query( + patterns, + new ValueFilter("home.*.main.*.config\\properties", Op.LIKE, new Value(".*[.].*"))); + assertEquals(allDataKey0, rows); + } + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/FileTreeParquetDummyTest.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/FileTreeParquetDummyTest.java new file mode 100644 index 0000000000..a289ceadd2 --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/service/storage/FileTreeParquetDummyTest.java @@ -0,0 +1,162 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.service.storage; + +import static cn.edu.tsinghua.iginx.thrift.DataType.*; +import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.physical.memory.execute.Table; +import cn.edu.tsinghua.iginx.engine.shared.data.Value; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.KeyFilter; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.Op; +import cn.edu.tsinghua.iginx.engine.shared.operator.filter.ValueFilter; +import cn.edu.tsinghua.iginx.filestore.common.Configs; +import cn.edu.tsinghua.iginx.filestore.format.parquet.ParquetTestUtils; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTree; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTreeConfig; +import cn.edu.tsinghua.iginx.filestore.test.TableBuilder; +import cn.edu.tsinghua.iginx.filestore.thrift.DataBoundary; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import org.junit.jupiter.api.Test; + +public class FileTreeParquetDummyTest extends AbstractDummyTest { + protected static final String DIR_NAME = "home.parquet"; + + public FileTreeParquetDummyTest() { + super(FileTree.NAME, getConfig(), DIR_NAME); + } + + private static Config getConfig() { + Map map = new HashMap<>(); + Configs.put(map, "home\\parquet", FileTreeConfig.Fields.prefix); + return ConfigFactory.parseMap(map); + } + + private void testSingleFile(Path path, String prefix) throws PhysicalException, IOException { + Table table = + new TableBuilder(false, null) + .names("deleted", "dist", "id", "income", "name", "year") + .types(BOOLEAN, FLOAT, LONG, DOUBLE, BINARY, INTEGER) + .row(false, 1.1f, 9001L, 2000.02, "Alice", 1993) + .row(true, 7.2f, 9002L, 1000.30, "Bob", 1991) + .row(false, 2.8f, 9003L, 5000.4, "Charlie", 1980) + .row(true, 1.4f, 9004L, 4000.5, "David", 1992) + .row(false, 6.5f, 9005L, 3000.6, "Eve", 2001) + .build(); + + ParquetTestUtils.createFile(path, table); + reset(); + + { + DataBoundary boundary = getBoundary(null); + assertTrue(inBounds(boundary, prefix + ".id")); + assertTrue(inBounds(boundary, prefix + ".name")); + assertTrue(inBounds(boundary, prefix + ".year")); + assertTrue(inBounds(boundary, prefix + ".dist")); + assertTrue(inBounds(boundary, prefix + ".income")); + assertTrue(inBounds(boundary, prefix + ".deleted")); + } + + { + Table expected = + new TableBuilder(true, prefix) + .names("deleted", "dist", "id", "income", "name", "year") + .types(BOOLEAN, FLOAT, LONG, DOUBLE, BINARY, INTEGER) + .key(0L, false, 1.1f, 9001L, 2000.02, "Alice", 1993) + .key(1L, true, 7.2f, 9002L, 1000.30, "Bob", 1991) + .key(2L, false, 2.8f, 9003L, 5000.4, "Charlie", 1980) + .key(3L, true, 1.4f, 9004L, 4000.5, "David", 1992) + .key(4L, false, 6.5f, 9005L, 3000.6, "Eve", 2001) + .build(); + + { + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(expected.getHeader(), schema); + List rows = query(patterns); + assertEquals(expected.getRows(), rows); + } + + List firstRowOfExpected = Collections.singletonList(expected.getRows().get(0)); + + { + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(expected.getHeader(), schema); + List rows = query(patterns, new ValueFilter(prefix + ".id", Op.E, new Value(9001L))); + assertEquals(firstRowOfExpected, rows); + } + + { + List patterns = null; + Header schema = getSchema(patterns); + assertEquals(expected.getHeader(), schema); + List rows = query(patterns, new KeyFilter(Op.L, 1L)); + assertEquals(firstRowOfExpected, rows); + } + } + + { + Table expected = + new TableBuilder(true, prefix) + .names("id", "name") + .types(LONG, BINARY) + .key(0L, 9001L, "Alice") + .key(1L, 9002L, "Bob") + .key(2L, 9003L, "Charlie") + .key(3L, 9004L, "David") + .key(4L, 9005L, "Eve") + .build(); + + { + List patterns = Arrays.asList(prefix + ".id", prefix + ".name"); + Header schema = getSchema(patterns); + assertEquals(expected.getHeader(), schema); + List rows = query(patterns); + assertEquals(expected.getRows(), rows); + } + } + + { + List patterns = Collections.singletonList(prefix + ".error"); + Header schema = getSchema(patterns); + assertEquals(new Header(Field.KEY, Collections.emptyList()), schema); + List rows = query(patterns); + assertEquals(Collections.emptyList(), rows); + } + } + + @Test + public void testSingleFileAsRoot() throws PhysicalException, IOException { + testSingleFile(root, "home\\parquet"); + } + + @Test + public void testNestedSingleFile() throws PhysicalException, IOException { + testSingleFile(root.resolve("user.parquet"), "home\\parquet.user\\parquet"); + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeConfigTest.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeConfigTest.java new file mode 100644 index 0000000000..d5eeb1203b --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/struct/tree/FileTreeConfigTest.java @@ -0,0 +1,106 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.struct.tree; + +import static org.junit.jupiter.api.Assertions.*; + +import cn.edu.tsinghua.iginx.filestore.common.AbstractConfig; +import cn.edu.tsinghua.iginx.filestore.format.raw.RawFormat; +import cn.edu.tsinghua.iginx.filestore.format.raw.RawReaderConfig; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +public class FileTreeConfigTest { + + @Test + public void testEmptyFormats() { + FileTreeConfig fileTreeConfig = FileTreeConfig.of(ConfigFactory.empty()); + + assertEquals(Collections.emptyMap(), fileTreeConfig.getFormats()); + } + + @Test + public void testFormats() { + Map rawConfigMap = new HashMap<>(); + rawConfigMap.put( + String.join( + ".", FileTreeConfig.Fields.formats, RawFormat.NAME, RawReaderConfig.Fields.pageSize), + 4096); + + Config rawConfig = ConfigFactory.parseMap(rawConfigMap); + + FileTreeConfig fileTreeConfig = FileTreeConfig.of(rawConfig); + + Map formats = fileTreeConfig.getFormats(); + Map expectedFormats = + Collections.singletonMap( + RawFormat.NAME, + ConfigFactory.parseMap( + Collections.singletonMap(RawReaderConfig.Fields.pageSize, 4096))); + assertEquals(expectedFormats, formats); + } + + @Test + public void testIgnoreInvalidFormats() { + Map rawConfigMap = new HashMap<>(); + rawConfigMap.put(String.join(".", FileTreeConfig.Fields.formats), 4096); + + Config rawConfig = ConfigFactory.parseMap(rawConfigMap); + FileTreeConfig fileTreeConfig = FileTreeConfig.of(rawConfig); + + assertEquals(Collections.emptyMap(), fileTreeConfig.getFormats()); + } + + @Test + public void testIgnoreInvalidFormatsField() { + Map rawConfigMap = new HashMap<>(); + rawConfigMap.put(String.join(".", FileTreeConfig.Fields.formats, RawFormat.NAME), 4096); + + Config rawConfig = ConfigFactory.parseMap(rawConfigMap); + FileTreeConfig fileTreeConfig = FileTreeConfig.of(rawConfig); + + assertEquals(Collections.emptyMap(), fileTreeConfig.getFormats()); + } + + @Test + public void testIgnoreInvalidDot() { + Map rawConfigMap = new HashMap<>(); + rawConfigMap.put(String.join(".", FileTreeConfig.Fields.dot), "."); + + Config rawConfig = ConfigFactory.parseMap(rawConfigMap); + FileTreeConfig fileTreeConfig = FileTreeConfig.of(rawConfig); + + { + List problemList = fileTreeConfig.validate(); + assertEquals(1, problemList.size()); + assertEquals("dot:'dot cannot be '.''", problemList.get(0).toString()); + } + + { + fileTreeConfig.setDot(null); + List problemList = fileTreeConfig.validate(); + assertEquals(1, problemList.size()); + assertEquals("dot:'missing field'", problemList.get(0).toString()); + } + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/DataValidator.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/DataValidator.java new file mode 100644 index 0000000000..d754912cdd --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/DataValidator.java @@ -0,0 +1,120 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.test; + +import cn.edu.tsinghua.iginx.engine.physical.exception.PhysicalException; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream; +import java.util.*; +import java.util.stream.Collectors; + +public class DataValidator { + public static List toList(RowStream stream) throws PhysicalException { + List rows = new ArrayList<>(); + while (stream.hasNext()) { + rows.add(stream.next()); + } + return rows; + } + + public static List withBinaryAsString(List row) { + List rows = new ArrayList<>(); + for (Row r : row) { + rows.add(withBinaryAsString(r)); + } + return rows; + } + + public static Map toMap(Row row) { + Map map = new HashMap<>(); + Header header = row.getHeader(); + Field keyField = header.getKey(); + if (keyField != null) { + map.put(keyField.getFullName(), row.getKey()); + } + List fields = header.getFields(); + for (int i = 0; i < fields.size(); i++) { + Object value = row.getValue(i); + if (value instanceof byte[]) { + value = new String((byte[]) value); + } + map.put(fields.get(i).getFullName(), value); + } + return map; + } + + public static Row withBinaryAsString(Row row) { + List values = new ArrayList<>(); + for (Object value : row.getValues()) { + if (value instanceof byte[]) { + values.add(new String((byte[]) value)); + } else { + values.add(value); + } + } + Header header = row.getHeader(); + if (header.getKey() != null) { + return new Row(row.getHeader(), row.getKey(), values.toArray()); + } else { + return new Row(row.getHeader(), values.toArray()); + } + } + + public static Header sort(Header header) { + List fields = new ArrayList<>(header.getFields()); + fields.sort(Comparator.comparing(Field::getFullName)); + return new Header(header.getKey(), fields); + } + + public static Row sort(Row row) { + Header header = sort(row.getHeader()); + + List values = new ArrayList<>(); + for (Field field : header.getFields()) { + values.add(row.getValue(field)); + } + + return new Row(header, row.getKey(), values.toArray()); + } + + public static List normalize(List rows) { + return rows.stream() + .map(DataValidator::sort) + .map(DataValidator::withBinaryAsString) + .collect(Collectors.toList()); + } + + public static Row withStringAsBinary(Row row) { + List values = new ArrayList<>(); + for (Object value : row.getValues()) { + if (value instanceof String) { + values.add(((String) value).getBytes()); + } else { + values.add(value); + } + } + Header header = row.getHeader(); + if (header.getKey() != null) { + return new Row(row.getHeader(), row.getKey(), values.toArray()); + } else { + return new Row(row.getHeader(), values.toArray()); + } + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/RowsBuilder.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/RowsBuilder.java new file mode 100644 index 0000000000..c5a86e3db2 --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/RowsBuilder.java @@ -0,0 +1,81 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.test; + +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.thrift.DataType; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +public class RowsBuilder { + private final List fields; + + public RowsBuilder(List header) { + this.fields = Objects.requireNonNull(header); + } + + public RowsBuilder(String... header) { + this(Arrays.asList(header)); + } + + private final List rows = new ArrayList<>(); + + public List build() { + return rows; + } + + private Header header = null; + + private void initHeader(Object... values) { + if (header != null) { + return; + } + List fields = new ArrayList<>(); + for (int i = 0; i < values.length; i++) { + fields.add(new Field(this.fields.get(i), parseType(values[i]))); + } + header = new Header(Field.KEY, fields); + } + + private static DataType parseType(Object object) { + if (object instanceof Integer) { + return DataType.INTEGER; + } else if (object instanceof Long) { + return DataType.LONG; + } else if (object instanceof Float) { + return DataType.FLOAT; + } else if (object instanceof Double) { + return DataType.DOUBLE; + } else if (object instanceof String) { + return DataType.BINARY; + } else { + throw new IllegalArgumentException("Unsupported type: " + object.getClass()); + } + } + + public RowsBuilder add(long key, Object... values) { + initHeader(values); + Row row = new Row(header, key, values); + rows.add(row); + return this; + } +} diff --git a/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/TableBuilder.java b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/TableBuilder.java new file mode 100644 index 0000000000..79a087a30a --- /dev/null +++ b/dataSource/filestore/src/test/java/cn/edu/tsinghua/iginx/filestore/test/TableBuilder.java @@ -0,0 +1,83 @@ +/* + * IGinX - the polystore system with high performance + * Copyright (C) Tsinghua University + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package cn.edu.tsinghua.iginx.filestore.test; + +import cn.edu.tsinghua.iginx.engine.physical.memory.execute.Table; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Field; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Header; +import cn.edu.tsinghua.iginx.engine.shared.data.read.Row; +import cn.edu.tsinghua.iginx.thrift.DataType; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +public class TableBuilder { + + private final Field keyField; + private final String prefix; + private List names = null; + private Header header = null; + private final List rows = new ArrayList<>(); + + public TableBuilder(boolean hasKey, String prefix) { + if (hasKey) { + keyField = Field.KEY; + } else { + keyField = null; + } + this.prefix = prefix; + } + + public TableBuilder names(String... names) { + this.names = Arrays.asList(names); + this.names.replaceAll( + name -> { + if (prefix != null) { + return prefix + "." + name; + } + return name; + }); + return this; + } + + public TableBuilder types(DataType... types) { + List fields = new ArrayList<>(); + for (int i = 0; i < names.size(); i++) { + fields.add(new Field(names.get(i), types[i])); + } + this.header = new Header(keyField, fields); + return this; + } + + public TableBuilder key(long key, Object... values) { + Row row = new Row(Objects.requireNonNull(header), key, values); + rows.add(row); + return this; + } + + public TableBuilder row(Object... values) { + Row row = new Row(Objects.requireNonNull(header), values); + rows.add(row); + return this; + } + + public Table build() { + return new Table(Objects.requireNonNull(header), rows); + } +} diff --git a/dataSource/filestore/src/test/resources/log4j2-test.properties b/dataSource/filestore/src/test/resources/log4j2-test.properties new file mode 100644 index 0000000000..2809e69c73 --- /dev/null +++ b/dataSource/filestore/src/test/resources/log4j2-test.properties @@ -0,0 +1,13 @@ +appender.console.name=ConsoleAppender +appender.console.type=Console +appender.console.target=SYSTEM_ERR +appender.console.layout.type=PatternLayout +appender.console.layout.pattern=%d{ISO8601} %-5p - %t [%C{1}:%L] - %m%n + +# Define Loggers +#root +rootLogger.level=info +rootLogger.appenderRef.console.ref=ConsoleAppender +#iginx +logger.iginx.name=cn.edu.tsinghua.iginx +logger.iginx.level=debug \ No newline at end of file diff --git a/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreCapacityExpansionIT.java b/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreCapacityExpansionIT.java index 068acca84b..6477a36cf3 100644 --- a/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreCapacityExpansionIT.java +++ b/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreCapacityExpansionIT.java @@ -22,8 +22,14 @@ import static org.junit.Assert.fail; import cn.edu.tsinghua.iginx.exception.SessionException; +import cn.edu.tsinghua.iginx.filestore.format.raw.RawFormat; +import cn.edu.tsinghua.iginx.filestore.struct.tree.FileTree; import cn.edu.tsinghua.iginx.integration.expansion.BaseCapacityExpansionIT; import cn.edu.tsinghua.iginx.integration.expansion.utils.SQLTestTools; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.StringJoiner; +import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -185,22 +191,55 @@ public void testShowColumns() { statement = "SHOW COLUMNS;"; expected = "Columns:\n" - + "+------------------------+--------+\n" - + "| Path|DataType|\n" - + "+------------------------+--------+\n" - + "| a.b.c.d.1\\txt| BINARY|\n" - + "| a.e.2\\txt| BINARY|\n" - + "| a.f.g.3\\txt| BINARY|\n" - + "| ln.wf02.status| BOOLEAN|\n" - + "| ln.wf02.version| BINARY|\n" - + "| mn.wf01.wt01.status| BINARY|\n" - + "|mn.wf01.wt01.temperature| BINARY|\n" - + "| nt.wf03.wt01.status2| BINARY|\n" - + "|nt.wf04.wt01.temperature| BINARY|\n" - + "| tm.wf05.wt01.status| BINARY|\n" - + "|tm.wf05.wt01.temperature| BINARY|\n" - + "+------------------------+--------+\n" - + "Total line number = 11\n"; + + "+--------------------------------------+--------+\n" + + "| Path|DataType|\n" + + "+--------------------------------------+--------+\n" + + "| a.Iris\\parquet| BINARY|\n" + + "| a.Iris\\parquet.petal.length| DOUBLE|\n" + + "| a.Iris\\parquet.petal.width| DOUBLE|\n" + + "| a.Iris\\parquet.sepal.length| DOUBLE|\n" + + "| a.Iris\\parquet.sepal.width| DOUBLE|\n" + + "| a.Iris\\parquet.variety| BINARY|\n" + + "| a.b.c.d.1\\txt| BINARY|\n" + + "| a.e.2\\txt| BINARY|\n" + + "| a.f.g.3\\txt| BINARY|\n" + + "| a.other.MT cars\\parquet| BINARY|\n" + + "| a.other.MT cars\\parquet.am| INTEGER|\n" + + "| a.other.MT cars\\parquet.carb| INTEGER|\n" + + "| a.other.MT cars\\parquet.cyl| INTEGER|\n" + + "| a.other.MT cars\\parquet.disp| DOUBLE|\n" + + "| a.other.MT cars\\parquet.drat| DOUBLE|\n" + + "| a.other.MT cars\\parquet.gear| INTEGER|\n" + + "| a.other.MT cars\\parquet.hp| INTEGER|\n" + + "| a.other.MT cars\\parquet.model| BINARY|\n" + + "| a.other.MT cars\\parquet.mpg| DOUBLE|\n" + + "| a.other.MT cars\\parquet.qsec| DOUBLE|\n" + + "| a.other.MT cars\\parquet.vs| INTEGER|\n" + + "| a.other.MT cars\\parquet.wt| DOUBLE|\n" + + "| a.other.price\\parquet| BINARY|\n" + + "| a.other.price\\parquet.airconditioning| BINARY|\n" + + "| a.other.price\\parquet.area| LONG|\n" + + "| a.other.price\\parquet.basement| BINARY|\n" + + "| a.other.price\\parquet.bathrooms| LONG|\n" + + "| a.other.price\\parquet.bedrooms| LONG|\n" + + "|a.other.price\\parquet.furnishingstatus| BINARY|\n" + + "| a.other.price\\parquet.guestroom| BINARY|\n" + + "| a.other.price\\parquet.hotwaterheating| BINARY|\n" + + "| a.other.price\\parquet.mainroad| BINARY|\n" + + "| a.other.price\\parquet.parking| LONG|\n" + + "| a.other.price\\parquet.prefarea| BINARY|\n" + + "| a.other.price\\parquet.price| LONG|\n" + + "| a.other.price\\parquet.stories| LONG|\n" + + "| ln.wf02.status| BOOLEAN|\n" + + "| ln.wf02.version| BINARY|\n" + + "| mn.wf01.wt01.status| BINARY|\n" + + "| mn.wf01.wt01.temperature| BINARY|\n" + + "| nt.wf03.wt01.status2| BINARY|\n" + + "| nt.wf04.wt01.temperature| BINARY|\n" + + "| tm.wf05.wt01.status| BINARY|\n" + + "| tm.wf05.wt01.temperature| BINARY|\n" + + "+--------------------------------------+--------+\n" + + "Total line number = 44\n"; SQLTestTools.executeAndCompare(session, statement, expected); statement = "SHOW COLUMNS nt.*;"; @@ -230,57 +269,225 @@ public void testShowColumns() { statement = "SHOW COLUMNS a.*;"; expected = "Columns:\n" - + "+-------------+--------+\n" - + "| Path|DataType|\n" - + "+-------------+--------+\n" - + "|a.b.c.d.1\\txt| BINARY|\n" - + "| a.e.2\\txt| BINARY|\n" - + "| a.f.g.3\\txt| BINARY|\n" - + "+-------------+--------+\n" - + "Total line number = 3\n"; + + "+--------------------------------------+--------+\n" + + "| Path|DataType|\n" + + "+--------------------------------------+--------+\n" + + "| a.Iris\\parquet| BINARY|\n" + + "| a.Iris\\parquet.petal.length| DOUBLE|\n" + + "| a.Iris\\parquet.petal.width| DOUBLE|\n" + + "| a.Iris\\parquet.sepal.length| DOUBLE|\n" + + "| a.Iris\\parquet.sepal.width| DOUBLE|\n" + + "| a.Iris\\parquet.variety| BINARY|\n" + + "| a.b.c.d.1\\txt| BINARY|\n" + + "| a.e.2\\txt| BINARY|\n" + + "| a.f.g.3\\txt| BINARY|\n" + + "| a.other.MT cars\\parquet| BINARY|\n" + + "| a.other.MT cars\\parquet.am| INTEGER|\n" + + "| a.other.MT cars\\parquet.carb| INTEGER|\n" + + "| a.other.MT cars\\parquet.cyl| INTEGER|\n" + + "| a.other.MT cars\\parquet.disp| DOUBLE|\n" + + "| a.other.MT cars\\parquet.drat| DOUBLE|\n" + + "| a.other.MT cars\\parquet.gear| INTEGER|\n" + + "| a.other.MT cars\\parquet.hp| INTEGER|\n" + + "| a.other.MT cars\\parquet.model| BINARY|\n" + + "| a.other.MT cars\\parquet.mpg| DOUBLE|\n" + + "| a.other.MT cars\\parquet.qsec| DOUBLE|\n" + + "| a.other.MT cars\\parquet.vs| INTEGER|\n" + + "| a.other.MT cars\\parquet.wt| DOUBLE|\n" + + "| a.other.price\\parquet| BINARY|\n" + + "| a.other.price\\parquet.airconditioning| BINARY|\n" + + "| a.other.price\\parquet.area| LONG|\n" + + "| a.other.price\\parquet.basement| BINARY|\n" + + "| a.other.price\\parquet.bathrooms| LONG|\n" + + "| a.other.price\\parquet.bedrooms| LONG|\n" + + "|a.other.price\\parquet.furnishingstatus| BINARY|\n" + + "| a.other.price\\parquet.guestroom| BINARY|\n" + + "| a.other.price\\parquet.hotwaterheating| BINARY|\n" + + "| a.other.price\\parquet.mainroad| BINARY|\n" + + "| a.other.price\\parquet.parking| LONG|\n" + + "| a.other.price\\parquet.prefarea| BINARY|\n" + + "| a.other.price\\parquet.price| LONG|\n" + + "| a.other.price\\parquet.stories| LONG|\n" + + "+--------------------------------------+--------+\n" + + "Total line number = 36\n"; SQLTestTools.executeAndCompare(session, statement, expected); } - @Override - protected void testQuerySpecialHistoryData() { + public static String getAddStorageParams(Map params) { + StringJoiner joiner = new StringJoiner(","); + for (Map.Entry entry : params.entrySet()) { + joiner.add(entry.getKey() + ":" + entry.getValue()); + } + return joiner.toString(); + } + + @Test + public void testDummy() { + testQuerySpecialHistoryData(); + } + + private void testQueryRawChunks() { + String statement = "select 1\\txt from a.*;"; + String expect = + "ResultSets:\n" + + "+---+---------------------------------------------------------------------------+\n" + + "|key| a.b.c.d.1\\txt|\n" + + "+---+---------------------------------------------------------------------------+\n" + + "| 0|979899100101102103104105106107108109110111112113114115116117118119120121122|\n" + + "+---+---------------------------------------------------------------------------+\n" + + "Total line number = 1\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + + statement = "select 2\\txt from a.*;"; + expect = + "ResultSets:\n" + + "+---+----------------------------------------------------+\n" + + "|key| a.e.2\\txt|\n" + + "+---+----------------------------------------------------+\n" + + "| 0|6566676869707172737475767778798081828384858687888990|\n" + + "+---+----------------------------------------------------+\n" + + "Total line number = 1\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + + statement = "select 3\\txt from a.*;"; + expect = + "ResultSets:\n" + + "+---+------------------------------------------+\n" + + "|key| a.f.g.3\\txt|\n" + + "+---+------------------------------------------+\n" + + "| 0|012345678910111213141516171819202122232425|\n" + + "+---+------------------------------------------+\n" + + "Total line number = 1\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + } + + private void testQueryParquets() { + String statement; + String expect; + + statement = "select petal.length from `a.Iris\\parquet` where key >= 10 and key <20;"; + expect = + "ResultSets:\n" + + "+---+---------------------------+\n" + + "|key|a.Iris\\parquet.petal.length|\n" + + "+---+---------------------------+\n" + + "| 10| 1.5|\n" + + "| 11| 1.6|\n" + + "| 12| 1.4|\n" + + "| 13| 1.1|\n" + + "| 14| 1.2|\n" + + "| 15| 1.5|\n" + + "| 16| 1.3|\n" + + "| 17| 1.4|\n" + + "| 18| 1.7|\n" + + "| 19| 1.5|\n" + + "+---+---------------------------+\n" + + "Total line number = 10\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + + statement = + "select `Iris\\parquet`.petal.length, other.`MT cars\\parquet`.mpg from a where key >= 10 and key <20;"; + expect = + "ResultSets:\n" + + "+---+---------------------------+---------------------------+\n" + + "|key|a.Iris\\parquet.petal.length|a.other.MT cars\\parquet.mpg|\n" + + "+---+---------------------------+---------------------------+\n" + + "| 10| 1.5| 17.8|\n" + + "| 11| 1.6| 16.4|\n" + + "| 12| 1.4| 17.3|\n" + + "| 13| 1.1| 15.2|\n" + + "| 14| 1.2| 10.4|\n" + + "| 15| 1.5| 10.4|\n" + + "| 16| 1.3| 14.7|\n" + + "| 17| 1.4| 32.4|\n" + + "| 18| 1.7| 30.4|\n" + + "| 19| 1.5| 33.9|\n" + + "+---+---------------------------+---------------------------+\n" + + "Total line number = 10\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + + statement = "select disp, furnishingstatus from a.* where key >= 10 and key <20;"; + expect = + "ResultSets:\n" + + "+---+----------------------------+--------------------------------------+\n" + + "|key|a.other.MT cars\\parquet.disp|a.other.price\\parquet.furnishingstatus|\n" + + "+---+----------------------------+--------------------------------------+\n" + + "| 10| 167.6| furnished|\n" + + "| 11| 275.8| semi-furnished|\n" + + "| 12| 275.8| semi-furnished|\n" + + "| 13| 275.8| furnished|\n" + + "| 14| 472.0| semi-furnished|\n" + + "| 15| 460.0| semi-furnished|\n" + + "| 16| 440.0| unfurnished|\n" + + "| 17| 78.7| furnished|\n" + + "| 18| 75.7| furnished|\n" + + "| 19| 71.1| semi-furnished|\n" + + "+---+----------------------------+--------------------------------------+\n" + + "Total line number = 10\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + + statement = + "select Iris\\parquet.petal.length from a where key < 50 and other.price\\parquet.furnishingstatus ==\"unfurnished\";"; + expect = + "ResultSets:\n" + + "+---+---------------------------+\n" + + "|key|a.Iris\\parquet.petal.length|\n" + + "+---+---------------------------+\n" + + "| 7| 1.5|\n" + + "| 9| 1.5|\n" + + "| 16| 1.3|\n" + + "| 21| 1.5|\n" + + "| 28| 1.4|\n" + + "| 30| 1.6|\n" + + "| 33| 1.4|\n" + + "| 38| 1.3|\n" + + "| 42| 1.3|\n" + + "| 48| 1.5|\n" + + "+---+---------------------------+\n" + + "Total line number = 10\n"; + SQLTestTools.executeAndCompare(session, statement, expect); + } + + private void testQueryLegacyFileSystem() { try { session.executeSql( "ADD STORAGEENGINE (\"127.0.0.1\", 6670, \"filestore\", \"dummy_dir:test/test/a, has_data:true, is_read_only:true, iginx_port:6888, chunk_size_in_bytes:1048576\");"); - String statement = "select 1\\txt from a.*;"; - String expect = - "ResultSets:\n" - + "+---+---------------------------------------------------------------------------+\n" - + "|key| a.b.c.d.1\\txt|\n" - + "+---+---------------------------------------------------------------------------+\n" - + "| 0|979899100101102103104105106107108109110111112113114115116117118119120121122|\n" - + "+---+---------------------------------------------------------------------------+\n" - + "Total line number = 1\n"; - SQLTestTools.executeAndCompare(session, statement, expect); - - statement = "select 2\\txt from a.*;"; - expect = - "ResultSets:\n" - + "+---+----------------------------------------------------+\n" - + "|key| a.e.2\\txt|\n" - + "+---+----------------------------------------------------+\n" - + "| 0|6566676869707172737475767778798081828384858687888990|\n" - + "+---+----------------------------------------------------+\n" - + "Total line number = 1\n"; - SQLTestTools.executeAndCompare(session, statement, expect); - - statement = "select 3\\txt from a.*;"; - expect = - "ResultSets:\n" - + "+---+------------------------------------------+\n" - + "|key| a.f.g.3\\txt|\n" - + "+---+------------------------------------------+\n" - + "| 0|012345678910111213141516171819202122232425|\n" - + "+---+------------------------------------------+\n" - + "Total line number = 1\n"; - SQLTestTools.executeAndCompare(session, statement, expect); + } catch (SessionException e) { LOGGER.error("test query for file system failed ", e); fail(); } + testQueryRawChunks(); + } + + @Override + protected void testQuerySpecialHistoryData() { + testQueryLegacyFileSystem(); + testQueryFileTree(); + } + + private void testQueryFileTree() { + Map params = new LinkedHashMap<>(); + params.put("dummy_dir", "test/test/a"); + params.put("has_data", "true"); + params.put("is_read_only", "true"); + params.put("iginx_port", "6888"); + // dummy.struct=LegacyFilesystem#dummy.config. + params.put("dummy.struct", FileTree.NAME); + params.put("dummy.config.formats." + RawFormat.NAME + ".pageSize", "1048576"); + String addStorageParams = getAddStorageParams(params); + + try { + session.executeSql( + "ADD STORAGEENGINE (\"127.0.0.1\", 6671, \"filestore\", \"" + addStorageParams + "\");"); + } catch (SessionException e) { + LOGGER.error("add storage engine failed ", e); + if (!e.getMessage().contains("repeatedly")) { + fail(); + } + } + + testQueryRawChunks(); + testQueryParquets(); } } diff --git a/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreHistoryDataGenerator.java b/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreHistoryDataGenerator.java index 68061fd49d..c364fdac23 100644 --- a/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreHistoryDataGenerator.java +++ b/test/src/test/java/cn/edu/tsinghua/iginx/integration/expansion/filestore/FileStoreHistoryDataGenerator.java @@ -22,10 +22,9 @@ import cn.edu.tsinghua.iginx.integration.expansion.BaseHistoryDataGenerator; import cn.edu.tsinghua.iginx.thrift.DataType; +import com.google.common.io.MoreFiles; import java.io.*; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.nio.file.*; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -122,9 +121,13 @@ private void writeSpecificDirectoriesAndFiles() { // │ └── 1.txt // ├── e // │ └── 2.txt - // └── f - // └── g - // └── 3.txt + // ├── f + // │ └── g + // │ └── 3.txt + // ├── Iris.parquet + // └── other + // ├── MT cars.parquet + // └── price.parquet StringBuilder content1 = new StringBuilder(); StringBuilder content2 = new StringBuilder(); StringBuilder content3 = new StringBuilder(); @@ -141,23 +144,39 @@ private void writeSpecificDirectoriesAndFiles() { createAndWriteFile(content1.toString().getBytes(), "test", "a", "b", "c", "d", "1.txt"); createAndWriteFile(content2.toString().getBytes(), "test", "a", "e", "2.txt"); createAndWriteFile(content3.toString().getBytes(), "test", "a", "f", "g", "3.txt"); + + String parquetResourceDir = "dummy/parquet/"; + copyFileFromResource( + parquetResourceDir + "Iris.parquet", Paths.get("test", "a", "Iris.parquet")); + copyFileFromResource( + parquetResourceDir + "MT cars.parquet", Paths.get("test", "a", "other", "MT cars.parquet")); + copyFileFromResource( + parquetResourceDir + "price.parquet", Paths.get("test", "a", "other", "price.parquet")); } - private void createAndWriteFile(byte[] content, String first, String... more) { - File file = new File(Paths.get(first, more).toString()); + private static void copyFileFromResource(String resourcePath, Path targetPath) { try { - if (file.exists()) { - LOGGER.info("file {} has existed", file.getAbsolutePath()); - return; - } - if (!file.getParentFile().mkdirs()) { - LOGGER.error("create directory {} failed", file.getParentFile().getAbsolutePath()); - return; - } - if (!file.exists() && !file.createNewFile()) { - LOGGER.error("create file {} failed", file.getAbsolutePath()); + MoreFiles.createParentDirectories(targetPath); + } catch (IOException e) { + LOGGER.error("create parent directories for {} failed", targetPath); + return; + } + try (InputStream is = + FileStoreHistoryDataGenerator.class.getClassLoader().getResourceAsStream(resourcePath)) { + if (is == null) { + LOGGER.error("resource {} not found", resourcePath); return; } + Files.copy(is, targetPath, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOGGER.error("copy file from resource {} to {} failed", resourcePath, targetPath); + } + } + + private void createAndWriteFile(byte[] content, String first, String... more) { + File file = new File(Paths.get(first, more).toString()); + try { + MoreFiles.createParentDirectories(file.toPath()); try (FileOutputStream fos = new FileOutputStream(file)) { fos.write(content); } diff --git a/test/src/test/resources/dummy/parquet/Iris.parquet b/test/src/test/resources/dummy/parquet/Iris.parquet new file mode 100644 index 0000000000..9224dead94 Binary files /dev/null and b/test/src/test/resources/dummy/parquet/Iris.parquet differ diff --git a/test/src/test/resources/dummy/parquet/MT cars.parquet b/test/src/test/resources/dummy/parquet/MT cars.parquet new file mode 100644 index 0000000000..13085cda45 Binary files /dev/null and b/test/src/test/resources/dummy/parquet/MT cars.parquet differ diff --git a/test/src/test/resources/dummy/parquet/price.parquet b/test/src/test/resources/dummy/parquet/price.parquet new file mode 100644 index 0000000000..18ee1aee33 Binary files /dev/null and b/test/src/test/resources/dummy/parquet/price.parquet differ