diff --git a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CompactExecutor.java b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CompactExecutor.java index b8d59775f8..ec8c0501c0 100644 --- a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CompactExecutor.java +++ b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CompactExecutor.java @@ -32,7 +32,8 @@ import io.pixelsdb.pixels.common.physical.StorageFactory; import io.pixelsdb.pixels.common.retina.RetinaService; import io.pixelsdb.pixels.common.utils.ConfigFactory; -import io.pixelsdb.pixels.common.utils.DateUtil; +import io.pixelsdb.pixels.common.utils.NetUtils; +import io.pixelsdb.pixels.common.utils.PixelsFileNameUtils; import io.pixelsdb.pixels.core.compactor.CompactLayout; import io.pixelsdb.pixels.core.compactor.PixelsCompactor; import net.sourceforge.argparse4j.inf.Namespace; @@ -40,7 +41,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -114,16 +117,52 @@ public void execute(Namespace ns, String command) throws Exception } } + // Issue #1305: obtain local hostname for the unified compact file naming. + String hostName = NetUtils.getLocalHostName(); + + /** + * Issue #1305: + * Group files by virtualNodeId before compaction. + * GC-eligible files (retina/ordered/compact) carry a real virtualNodeId and must + * not be mixed across groups to preserve create_ts/delete_ts monotonicity. + * single-type files fall into the VNODE_ID_NONE (-1) bucket and are compacted freely. + * copy-type files and unrecognised-format files are skipped. + */ + Map> groupedStatuses = new LinkedHashMap<>(); + for (Status status : statuses) + { + String path = status.getPath(); + PixelsFileNameUtils.PxlFileType fileType = PixelsFileNameUtils.extractFileType(path); + if (fileType == null) + { + System.err.println("Skipping file with unrecognized naming format: " + path); + continue; + } + if (fileType == PixelsFileNameUtils.PxlFileType.COPY) + { + System.err.println("Skipping copy file (test/benchmark data, not for compaction): " + path); + continue; + } + int vNodeId = PixelsFileNameUtils.extractVirtualNodeId(path); + groupedStatuses.computeIfAbsent(vNodeId, k -> new ArrayList<>()).add(status); + } + List targetPaths = layout.getCompactPaths(); ConcurrentLinkedQueue compactFiles = new ConcurrentLinkedQueue<>(); ConcurrentLinkedQueue compactPaths = new ConcurrentLinkedQueue<>(); int targetPathId = 0; - // compact + // Issue #1305: iterate over each virtualNodeId group independently to preserve + // the monotonicity invariants required by Storage GC. long startTime = System.currentTimeMillis(); - for (int i = 0, thdId = 0; i < statuses.size(); i += numRowGroupInBlock, ++thdId) + int thdId = 0; + for (Map.Entry> entry : groupedStatuses.entrySet()) { - if (i + numRowGroupInBlock > statuses.size()) + int vNodeId = entry.getKey(); + List groupStatuses = entry.getValue(); + int groupSize = groupStatuses.size(); + + for (int i = 0; i < groupSize; ++thdId) { /** * Issue #160: @@ -136,34 +175,38 @@ public void execute(Namespace ns, String command) throws Exception * and rebuild a pure compactLayout for the tail files as the * compactLayout in metadata does not work for the tail files. */ - numRowGroupInBlock = statuses.size() - i; - compactLayout = CompactLayout.buildPure(numRowGroupInBlock, numColumn); - } + // Issue #1305: use local batchSize/batchLayout instead of mutating + // numRowGroupInBlock/compactLayout, so they stay unchanged across batches and groups. + int batchSize = Math.min(numRowGroupInBlock, groupSize - i); + CompactLayout batchLayout = (batchSize < numRowGroupInBlock) + ? CompactLayout.buildPure(batchSize, numColumn) + : compactLayout; - List sourcePaths = new ArrayList<>(); - for (int j = 0; j < numRowGroupInBlock; ++j) - { - if (!statuses.get(i+j).getPath().endsWith("/")) + List sourcePaths = new ArrayList<>(); + for (int j = 0; j < batchSize; ++j) { - sourcePaths.add(statuses.get(i + j).getPath()); + if (!groupStatuses.get(i + j).getPath().endsWith("/")) + { + sourcePaths.add(groupStatuses.get(i + j).getPath()); + } } - } - Path targetPath = targetPaths.get(targetPathId++); - String targetDirPath = targetPath.getUri(); - targetPathId %= targetPaths.size(); - if (!targetDirPath.endsWith("/")) - { - targetDirPath += "/"; - } - String targetFileName = DateUtil.getCurTime() + "_compact.pxl"; - String targetFilePath = targetDirPath + targetFileName; + Path targetPath = targetPaths.get(targetPathId++); + String targetDirPath = targetPath.getUri(); + targetPathId %= targetPaths.size(); + if (!targetDirPath.endsWith("/")) + { + targetDirPath += "/"; + } + // Issue #1305: use unified naming format (hostName + vNodeId) instead of DateUtil timestamp only. + String targetFileName = PixelsFileNameUtils.buildCompactFileName(hostName, vNodeId); + String targetFilePath = targetDirPath + targetFileName; - System.out.println("(" + thdId + ") " + sourcePaths.size() + - " ordered files to be compacted into '" + targetFilePath + "'."); + System.out.println("(" + thdId + ") vNodeId=" + vNodeId + ", " + sourcePaths.size() + + " ordered files to be compacted into '" + targetFilePath + "'."); - PixelsCompactor.Builder compactorBuilder = PixelsCompactor.newBuilder() - .setSourcePaths(sourcePaths) + PixelsCompactor.Builder compactorBuilder = PixelsCompactor.newBuilder() + .setSourcePaths(sourcePaths) /** * Issue #192: * No need to deep copy compactLayout as it is never modified in-place @@ -173,40 +216,46 @@ public void execute(Namespace ns, String command) throws Exception * * Deep copy it if it is in-place modified in the future. */ - .setCompactLayout(compactLayout) - .setInputStorage(orderStorage) - .setOutputStorage(compactStorage) - .setPath(targetFilePath) - .setBlockSize(blockSize) - .setReplication(replication) - .setBlockPadding(false) - .setHasHiddenColumn(true); - - long threadStart = System.currentTimeMillis(); - compactExecutor.execute(() -> { - // Issue #192: run compaction in threads. - try - { - // build() spends some time to read file footers and should be called inside sub-thread. - PixelsCompactor pixelsCompactor = compactorBuilder.build(); - pixelsCompactor.compact(); - pixelsCompactor.close(); - File compactFile = new File(); - compactFile.setName(targetFileName); - compactFile.setType(File.Type.REGULAR); - compactFile.setNumRowGroup(pixelsCompactor.getNumRowGroup()); - compactFile.setPathId(targetPath.getId()); - compactFiles.offer(compactFile); - compactPaths.offer(targetPath); - } catch (IOException e) - { - System.err.println("write compact file '" + targetFilePath + "' failed"); - e.printStackTrace(); - return; - } - System.out.println("Compact file '" + targetFilePath + "' is built in " + - ((System.currentTimeMillis() - threadStart) / 1000.0) + "s"); - }); + .setCompactLayout(batchLayout) + .setInputStorage(orderStorage) + .setOutputStorage(compactStorage) + .setPath(targetFilePath) + .setBlockSize(blockSize) + .setReplication(replication) + .setBlockPadding(false) + .setHasHiddenColumn(true); + + final String finalTargetFileName = targetFileName; + final String finalTargetFilePath = targetFilePath; + final Path finalTargetPath = targetPath; + long threadStart = System.currentTimeMillis(); + + compactExecutor.execute(() -> { + // Issue #192: run compaction in threads. + try + { + PixelsCompactor pixelsCompactor = compactorBuilder.build(); + pixelsCompactor.compact(); + pixelsCompactor.close(); + File compactFile = new File(); + compactFile.setName(finalTargetFileName); + compactFile.setType(File.Type.REGULAR); + compactFile.setNumRowGroup(pixelsCompactor.getNumRowGroup()); + compactFile.setPathId(finalTargetPath.getId()); + compactFiles.offer(compactFile); + compactPaths.offer(finalTargetPath); + } catch (IOException e) + { + System.err.println("write compact file '" + finalTargetFilePath + "' failed"); + e.printStackTrace(); + return; + } + System.out.println("Compact file '" + finalTargetFilePath + "' is built in " + + ((System.currentTimeMillis() - threadStart) / 1000.0) + "s"); + }); + + i += batchSize; + } } // Issue #192: wait for the compaction to complete. diff --git a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CopyExecutor.java b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CopyExecutor.java index af1254dcba..b89a86e64a 100644 --- a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CopyExecutor.java +++ b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/executor/CopyExecutor.java @@ -24,7 +24,7 @@ import io.pixelsdb.pixels.common.physical.StorageFactory; import io.pixelsdb.pixels.common.utils.ConfigFactory; import io.pixelsdb.pixels.common.utils.Constants; -import io.pixelsdb.pixels.common.utils.DateUtil; +import io.pixelsdb.pixels.common.utils.PixelsFileNameUtils; import net.sourceforge.argparse4j.inf.Namespace; import org.apache.hadoop.io.IOUtils; @@ -82,8 +82,7 @@ public void execute(Namespace ns, String command) throws Exception continue; } String destPath = destination_ + - sourceName.substring(0, sourceName.indexOf(postfix)) + - "_copy_" + DateUtil.getCurTime() + postfix; + PixelsFileNameUtils.buildCopyFileName(sourceName); copyExecutor.execute(() -> { try { diff --git a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/IndexedPixelsConsumer.java b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/IndexedPixelsConsumer.java index df72df6627..5c7c2b3ad7 100644 --- a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/IndexedPixelsConsumer.java +++ b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/IndexedPixelsConsumer.java @@ -34,8 +34,8 @@ import io.pixelsdb.pixels.common.physical.Storage; import io.pixelsdb.pixels.common.physical.StorageFactory; import io.pixelsdb.pixels.common.utils.ConfigFactory; - import io.pixelsdb.pixels.common.utils.DateUtil; import io.pixelsdb.pixels.common.utils.IndexUtils; + import io.pixelsdb.pixels.common.utils.PixelsFileNameUtils; import io.pixelsdb.pixels.common.utils.RetinaUtils; import io.pixelsdb.pixels.core.PixelsWriter; import io.pixelsdb.pixels.core.TypeDescription; @@ -183,7 +183,7 @@ private PerVirtualNodeWriter initializeRetinaWriter(int bucketId) throws IOExcep { targetDirPath += "/"; } - String targetFileName = targetNode.getAddress() + "_" + DateUtil.getCurTime() + "_" + bucketId + ".pxl"; + String targetFileName = PixelsFileNameUtils.buildOrderedFileName(targetNode.getAddress(), targetNode.getVirtualNodeId()); String targetFilePath = targetDirPath + targetFileName; PixelsWriter pixelsWriter = getPixelsWriter(targetStorage, targetFilePath); diff --git a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/SimplePixelsConsumer.java b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/SimplePixelsConsumer.java index 16c3a8fe1f..c2e360ed45 100644 --- a/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/SimplePixelsConsumer.java +++ b/pixels-cli/src/main/java/io/pixelsdb/pixels/cli/load/SimplePixelsConsumer.java @@ -26,9 +26,9 @@ import io.pixelsdb.pixels.common.metadata.domain.Path; import io.pixelsdb.pixels.common.physical.Storage; import io.pixelsdb.pixels.common.physical.StorageFactory; - import io.pixelsdb.pixels.common.utils.Constants; - import io.pixelsdb.pixels.common.utils.DateUtil; - import io.pixelsdb.pixels.core.PixelsWriter; +import io.pixelsdb.pixels.common.utils.NetUtils; +import io.pixelsdb.pixels.common.utils.PixelsFileNameUtils; +import io.pixelsdb.pixels.core.PixelsWriter; import io.pixelsdb.pixels.core.PixelsWriterImpl; import io.pixelsdb.pixels.core.TypeDescription; import io.pixelsdb.pixels.core.vector.VectorizedRowBatch; @@ -126,7 +126,7 @@ private void initializePixelsWriter() throws MetadataException, IOException { targetDirPath += "/"; } - String targetFileName = Constants.LOAD_DEFAULT_RETINA_PREFIX + "_" + DateUtil.getCurTime() + ".pxl"; + String targetFileName = PixelsFileNameUtils.buildSingleFileName(NetUtils.getLocalHostName()); String targetFilePath = targetDirPath + targetFileName; pixelsWriter = PixelsWriterImpl.newBuilder() diff --git a/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/NetUtils.java b/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/NetUtils.java new file mode 100644 index 0000000000..a7d3514239 --- /dev/null +++ b/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/NetUtils.java @@ -0,0 +1,69 @@ +/* + * Copyright 2026 PixelsDB. + * + * This file is part of Pixels. + * + * Pixels is free software: you can redistribute it and/or modify + * it under the terms of the Affero GNU General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * Pixels is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Affero GNU General Public License for more details. + * + * You should have received a copy of the Affero GNU General Public + * License along with Pixels. If not, see + * . + */ +package io.pixelsdb.pixels.common.utils; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +/** + * Common network utilities. + * + * @author hank + * @create 2026-03-16 + */ +public final class NetUtils +{ + private static final Logger logger = LogManager.getLogger(NetUtils.class); + + private NetUtils() {} + + /** + * Returns the hostname of the local machine. + * + *

Resolution order: + *

    + *
  1. The {@code HOSTNAME} environment variable — reliable in container / k8s environments + * where {@link InetAddress#getLocalHost()} may return an unpredictable pod address.
  2. + *
  3. {@link InetAddress#getLocalHost()#getHostName()} — standard JVM resolution.
  4. + *
  5. {@code "localhost"} — last-resort fallback; a warning is logged.
  6. + *
+ * + * @return the local hostname, never {@code null} + */ + public static String getLocalHostName() + { + String hostName = System.getenv("HOSTNAME"); + if (hostName != null && !hostName.isEmpty()) + { + return hostName; + } + try + { + return InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) + { + logger.warn("Failed to resolve local hostname via InetAddress, falling back to 'localhost'", e); + return "localhost"; + } + } +} diff --git a/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/PixelsFileNameUtils.java b/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/PixelsFileNameUtils.java new file mode 100644 index 0000000000..621b54c206 --- /dev/null +++ b/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/PixelsFileNameUtils.java @@ -0,0 +1,309 @@ +/* + * Copyright 2026 PixelsDB. + * + * This file is part of Pixels. + * + * Pixels is free software: you can redistribute it and/or modify + * it under the terms of the Affero GNU General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * Pixels is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Affero GNU General Public License for more details. + * + * You should have received a copy of the Affero GNU General Public + * License along with Pixels. If not, see + * . + */ +package io.pixelsdb.pixels.common.utils; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Single source of truth for all {@code .pxl} file name construction and parsing. + * + *

Unified format

+ *
+ *   <hostName>_<yyyyMMddHHmmss>_<count>_<virtualNodeId>_<type>.pxl
+ * 
+ *
    + *
  • {@code hostName} — writer node hostname; guarantees cross-node uniqueness.
  • + *
  • {@code yyyyMMddHHmmss_count} — from {@link DateUtil#getCurTime()}; guarantees + * sub-second uniqueness within the same JVM.
  • + *
  • {@code virtualNodeId} — consistent-hash virtual node ID used by Storage GC to + * group files for rewrite; {@link #VNODE_ID_NONE} ({@code -1}) for {@code single} + * files which carry no vnode affinity.
  • + *
  • {@code type} — {@link PxlFileType} label; distinguishes the write path + * and determines GC eligibility.
  • + *
+ * + *

File types

+ * + * + * + * + * + * + * + *
TypeWriterGC eligible
retina{@code FileWriterManager} (CDC real-time path)yes
ordered{@code IndexedPixelsConsumer} (indexed batch load)yes
compact{@code CompactExecutor}yes
single{@code SimplePixelsConsumer} (non-indexed batch load)no
copy{@code CopyExecutor} (test/benchmark data amplification)no
+ * + *

The {@code \d{14}} timestamp constraint is the parsing anchor: it is long enough that + * no reasonable hostname component will accidentally match it, so group 1 (hostName, which + * may itself contain underscores) resolves unambiguously via regex backtracking. + * + *

Copy-rename operations use {@link #buildCopyFileName}: it parses the source file's + * {@code hostName} and {@code virtualNodeId}, generates a fresh timestamp via + * {@link DateUtil#getCurTime()}, and sets the type to {@link PxlFileType#COPY}. This + * keeps the resulting name fully compliant with the unified format while guaranteeing + * uniqueness across multiple copy iterations ({@code n > 1}). + */ +public final class PixelsFileNameUtils +{ + public static final String PXL_EXTENSION = ".pxl"; + + /** + * Sentinel virtualNodeId for {@link PxlFileType#SINGLE} files that have no + * vnode affinity. Using {@code -1} avoids collision with any real virtualNodeId + * (which is always a non-negative integer produced by consistent hashing). + */ + public static final int VNODE_ID_NONE = -1; + + // ------------------------------------------------------------------------- + // File type + // ------------------------------------------------------------------------- + + /** + * Enumerates the five recognised {@code .pxl} file types. + */ + public enum PxlFileType + { + RETINA("retina"), + ORDERED("ordered"), + COMPACT("compact"), + SINGLE("single"), + /** Test/benchmark copies produced by {@code CopyExecutor}; not GC-eligible. */ + COPY("copy"); + + private final String label; + + PxlFileType(String label) + { + this.label = label; + } + + public String getLabel() + { + return label; + } + + /** + * Returns the {@link PxlFileType} whose label equals {@code label}, + * or {@code null} if no match is found. + */ + public static PxlFileType fromLabel(String label) + { + for (PxlFileType t : values()) + { + if (t.label.equals(label)) + { + return t; + } + } + return null; + } + } + + // ------------------------------------------------------------------------- + // Pattern + // ------------------------------------------------------------------------- + + /** + * Compiled pattern matching the unified {@code .pxl} file name format. + *

+     *   <hostName>_<yyyyMMddHHmmss>_<count>_<virtualNodeId>_<type>.pxl
+     * 
+ * Capture groups: + *
    + *
  1. hostName (may contain underscores)
  2. + *
  3. timestamp — exactly 14 digits (yyyyMMddHHmmss)
  4. + *
  5. atomicCount
  6. + *
  7. virtualNodeId — non-negative integer, or {@code -1} for single files
  8. + *
  9. type label — one of {@code retina|ordered|compact|single|copy}
  10. + *
+ */ + private static final Pattern PXL_PATTERN = Pattern.compile( + "(?:.*/)?(.+)_(\\d{14})_(\\d+)_(-?\\d+)_(retina|ordered|compact|single|copy)\\.pxl$"); + + private PixelsFileNameUtils() {} + + // ------------------------------------------------------------------------- + // Builder — unified entry point + // ------------------------------------------------------------------------- + + /** + * Builds a {@code .pxl} file name in the unified format. + * + * @param hostName writer node hostname (may contain underscores) + * @param virtualNodeId vnode ID, or {@link #VNODE_ID_NONE} for {@link PxlFileType#SINGLE} + * @param type file type + * @return the constructed file name + */ + public static String buildPxlFileName(String hostName, int virtualNodeId, PxlFileType type) + { + return hostName + "_" + DateUtil.getCurTime() + "_" + virtualNodeId + "_" + type.getLabel() + PXL_EXTENSION; + } + + // ------------------------------------------------------------------------- + // Builder — semantic wrappers + // ------------------------------------------------------------------------- + + /** + * Builds a Retina file name (CDC real-time write path). + *

Format: {@code ____retina.pxl} + */ + public static String buildRetinaFileName(String hostName, int virtualNodeId) + { + return buildPxlFileName(hostName, virtualNodeId, PxlFileType.RETINA); + } + + /** + * Builds an Ordered file name (indexed batch load). + *

Format: {@code ____ordered.pxl} + */ + public static String buildOrderedFileName(String hostName, int virtualNodeId) + { + return buildPxlFileName(hostName, virtualNodeId, PxlFileType.ORDERED); + } + + /** + * Builds a Compact file name. + *

Format: {@code ____compact.pxl} + *

The {@code virtualNodeId} must match the virtualNodeId of all source ordered files + * in the compaction batch. + */ + public static String buildCompactFileName(String hostName, int virtualNodeId) + { + return buildPxlFileName(hostName, virtualNodeId, PxlFileType.COMPACT); + } + + /** + * Builds a Single file name (non-indexed batch load, excluded from Storage GC). + *

Format: {@code ___-1_single.pxl} + *

virtualNodeId is always {@link #VNODE_ID_NONE} ({@code -1}) because single files + * are not routed through consistent hashing. + */ + public static String buildSingleFileName(String hostName) + { + return buildPxlFileName(hostName, VNODE_ID_NONE, PxlFileType.SINGLE); + } + + /** + * Builds a copy file name derived from an existing {@code .pxl} file. + * + *

The resulting name conforms to the unified five-segment format: + *

+     *   <hostName>_<newTimestamp>_<newCount>_<virtualNodeId>_copy.pxl
+     * 
+ * {@code hostName} and {@code virtualNodeId} are inherited from {@code originalPath}; + * a fresh {@link DateUtil#getCurTime()} call guarantees uniqueness across multiple + * copy iterations ({@code n > 1}) of the same source file. + * + * @param originalPath original file name or absolute path (must match the unified format) + * @return the constructed copy file name + * @throws IllegalArgumentException if {@code originalPath} does not match the unified format + */ + public static String buildCopyFileName(String originalPath) + { + String hostName = extractHostName(originalPath); + if (hostName == null) + { + throw new IllegalArgumentException( + "Cannot build copy name: source file does not match the unified .pxl format: " + + originalPath); + } + int virtualNodeId = extractVirtualNodeId(originalPath); + return buildPxlFileName(hostName, virtualNodeId, PxlFileType.COPY); + } + + // ------------------------------------------------------------------------- + // Parsing + // ------------------------------------------------------------------------- + + /** + * Extracts the {@code virtualNodeId} from a {@code .pxl} file path. + * + * @param path absolute or relative file path + * @return the parsed virtualNodeId ({@link #VNODE_ID_NONE} for single-type files), + * or {@link #VNODE_ID_NONE} if the path does not match the unified format + */ + public static int extractVirtualNodeId(String path) + { + Matcher m = match(path); + if (m == null) + { + return VNODE_ID_NONE; + } + try + { + return Integer.parseInt(m.group(4)); + } catch (NumberFormatException e) + { + return VNODE_ID_NONE; + } + } + + /** + * Extracts the {@code hostName} from a {@code .pxl} file path. + * + * @param path absolute or relative file path + * @return the hostName, or {@code null} if the path does not match the unified format + */ + public static String extractHostName(String path) + { + Matcher m = match(path); + return (m != null) ? m.group(1) : null; + } + + /** + * Extracts the {@link PxlFileType} from a {@code .pxl} file path. + * + * @param path absolute or relative file path + * @return the file type, or {@code null} if the path does not match the unified format + */ + public static PxlFileType extractFileType(String path) + { + Matcher m = match(path); + return (m != null) ? PxlFileType.fromLabel(m.group(5)) : null; + } + + /** + * Returns {@code true} if the file at {@code path} is eligible for Storage GC, + * i.e. its type is one of {@link PxlFileType#RETINA}, {@link PxlFileType#ORDERED}, + * or {@link PxlFileType#COMPACT}. + * + *

{@link PxlFileType#SINGLE} and {@link PxlFileType#COPY} files, as well as + * unrecognised paths, return {@code false}. + */ + public static boolean isGcEligible(String path) + { + PxlFileType type = extractFileType(path); + return type == PxlFileType.RETINA || type == PxlFileType.ORDERED || type == PxlFileType.COMPACT; + } + + // ------------------------------------------------------------------------- + // Internal helpers + // ------------------------------------------------------------------------- + + private static Matcher match(String path) + { + if (path == null || path.isEmpty()) + { + return null; + } + Matcher m = PXL_PATTERN.matcher(path); + return m.matches() ? m : null; + } +} diff --git a/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/RetinaUtils.java b/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/RetinaUtils.java index 143a43d846..3545aa59ce 100644 --- a/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/RetinaUtils.java +++ b/pixels-common/src/main/java/io/pixelsdb/pixels/common/utils/RetinaUtils.java @@ -27,7 +27,7 @@ import io.pixelsdb.pixels.common.retina.RetinaService; import io.pixelsdb.pixels.daemon.NodeProto; - public class RetinaUtils +public class RetinaUtils { public static final String CHECKPOINT_PREFIX_GC = "vis_gc_"; public static final String CHECKPOINT_PREFIX_OFFLOAD = "vis_offload_"; @@ -107,10 +107,14 @@ public static RetinaService getRetinaServiceFromBucketId(int bucketId) return RetinaService.CreateInstance(retinaHost, getInstance().defaultRetinaPort); } - public static RetinaService getRetinaServiceFromPath(String path) + public static RetinaService getRetinaServiceFromPath(String path) { - String retinaHost = extractRetinaHostNameFromPath(path); - if(retinaHost == null || retinaHost.equals(Constants.LOAD_DEFAULT_RETINA_PREFIX)) + if (!PixelsFileNameUtils.isGcEligible(path)) + { + return RetinaService.Instance(); + } + String retinaHost = PixelsFileNameUtils.extractHostName(path); + if (retinaHost == null) { return RetinaService.Instance(); } @@ -126,19 +130,4 @@ public static String getCheckpointPrefix(String typePrefix, String hostname) { return typePrefix + hostname + "_"; } - - private static String extractRetinaHostNameFromPath(String path) - { - if (path == null || path.isEmpty()) { - return null; - } - int lastSlashIndex = path.lastIndexOf('/'); - String baseName = (lastSlashIndex == -1) ? path : path.substring(lastSlashIndex + 1); - int firstUnderscoreIndex = baseName.indexOf('_'); - if (firstUnderscoreIndex > 0) { - // The substring from the start of baseName up to (but not including) the first underscore is the hostname. - return baseName.substring(0, firstUnderscoreIndex); - } - return null; - } } diff --git a/pixels-core/src/test/java/io/pixelsdb/pixels/core/compactor/TestPixelsCompactor.java b/pixels-core/src/test/java/io/pixelsdb/pixels/core/compactor/TestPixelsCompactor.java index e02f6af155..cd0fa02783 100644 --- a/pixels-core/src/test/java/io/pixelsdb/pixels/core/compactor/TestPixelsCompactor.java +++ b/pixels-core/src/test/java/io/pixelsdb/pixels/core/compactor/TestPixelsCompactor.java @@ -26,7 +26,8 @@ import io.pixelsdb.pixels.common.physical.Status; import io.pixelsdb.pixels.common.physical.Storage; import io.pixelsdb.pixels.common.physical.StorageFactory; -import io.pixelsdb.pixels.common.utils.DateUtil; +import io.pixelsdb.pixels.common.utils.NetUtils; +import io.pixelsdb.pixels.common.utils.PixelsFileNameUtils; import io.pixelsdb.pixels.core.PixelsReader; import io.pixelsdb.pixels.core.PixelsReaderImpl; import io.pixelsdb.pixels.core.reader.PixelsReaderOption; @@ -69,7 +70,6 @@ public void testBasicCompact() List statuses = storage.listStatus("hdfs://node01:9000/pixels/pixels/test_105/v_0_order"); // compact - int NO = 0; for (int i = 0; i < statuses.size(); i += 16) { List sourcePaths = new ArrayList<>(); @@ -80,9 +80,7 @@ public void testBasicCompact() long start = System.currentTimeMillis(); String filePath = "hdfs://node01:9000/pixels/pixels/test_105/v_0_compact/" + - NO + "_" + - DateUtil.getCurTime() + - ".compact.pxl"; + PixelsFileNameUtils.buildCompactFileName(NetUtils.getLocalHostName(), 0); PixelsCompactor pixelsCompactor = PixelsCompactor.newBuilder() .setSourcePaths(sourcePaths) @@ -97,8 +95,6 @@ public void testBasicCompact() pixelsCompactor.compact(); pixelsCompactor.close(); - NO++; - System.out.println(((System.currentTimeMillis() - start) / 1000.0) + " s for [" + filePath + "]"); } } @@ -131,22 +127,18 @@ public void testRealCompact() layout.getVersion() + "_order"); // compact - int NO = 0; for (int i = 0; i + compact.getNumRowGroupInFile() < statuses.size(); i += compact.getNumRowGroupInFile()) { List sourcePaths = new ArrayList<>(); for (int j = 0; j < compact.getNumRowGroupInFile(); ++j) { - //System.out.println(statuses[i+j].getPath().toString()); sourcePaths.add(statuses.get(i + j).getPath()); } long start = System.currentTimeMillis(); String filePath = "hdfs://node01:9000/pixels/pixels/test_105/v_" + layout.getVersion() + "_compact/" + - NO + "_" + - DateUtil.getCurTime() + - ".compact.pxl"; + PixelsFileNameUtils.buildCompactFileName(NetUtils.getLocalHostName(), 0); PixelsCompactor pixelsCompactor = PixelsCompactor.newBuilder() .setSourcePaths(sourcePaths) @@ -161,8 +153,6 @@ public void testRealCompact() pixelsCompactor.compact(); pixelsCompactor.close(); - NO++; - System.out.println(((System.currentTimeMillis() - start) / 1000.0) + " s for [" + filePath + "]"); } } diff --git a/pixels-daemon/src/main/java/io/pixelsdb/pixels/daemon/heartbeat/HeartbeatWorker.java b/pixels-daemon/src/main/java/io/pixelsdb/pixels/daemon/heartbeat/HeartbeatWorker.java index f5629d4579..78201b6260 100644 --- a/pixels-daemon/src/main/java/io/pixelsdb/pixels/daemon/heartbeat/HeartbeatWorker.java +++ b/pixels-daemon/src/main/java/io/pixelsdb/pixels/daemon/heartbeat/HeartbeatWorker.java @@ -24,12 +24,11 @@ import io.pixelsdb.pixels.common.server.Server; import io.pixelsdb.pixels.common.utils.Constants; import io.pixelsdb.pixels.common.utils.EtcdUtil; +import io.pixelsdb.pixels.common.utils.NetUtils; import io.pixelsdb.pixels.daemon.NodeProto; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -55,20 +54,7 @@ public class HeartbeatWorker implements Server public HeartbeatWorker(NodeProto.NodeRole role) { this.role = role; - this.hostName = System.getenv("HOSTNAME"); - logger.debug("HostName from system env: {}", hostName); - if (hostName == null) - { - try - { - this.hostName = InetAddress.getLocalHost().getHostName(); - logger.debug("HostName from InetAddress: {}", hostName); - } catch (UnknownHostException e) - { - logger.debug("Hostname is null. Exit"); - return; - } - } + this.hostName = NetUtils.getLocalHostName(); logger.debug("HostName: {}", hostName); initialize(); } diff --git a/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/FileWriterManager.java b/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/FileWriterManager.java index dc63279c77..8a4960e1e7 100644 --- a/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/FileWriterManager.java +++ b/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/FileWriterManager.java @@ -25,7 +25,7 @@ import io.pixelsdb.pixels.common.metadata.domain.File; import io.pixelsdb.pixels.common.metadata.domain.Path; import io.pixelsdb.pixels.common.physical.Storage; -import io.pixelsdb.pixels.common.utils.DateUtil; +import io.pixelsdb.pixels.common.utils.PixelsFileNameUtils; import io.pixelsdb.pixels.core.PixelsWriter; import io.pixelsdb.pixels.core.PixelsWriterImpl; import io.pixelsdb.pixels.core.TypeDescription; @@ -76,7 +76,7 @@ public FileWriterManager(long tableId, TypeDescription schema, this.virtualNodeId = virtualNodeId; // Create pixels writer. - String targetFileName = hostName + "_" + DateUtil.getCurTime() + ".pxl"; + String targetFileName = PixelsFileNameUtils.buildRetinaFileName(hostName, virtualNodeId); String targetFilePath = targetOrderedDirPath.getUri() + "/" + targetFileName; try { diff --git a/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/RetinaResourceManager.java b/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/RetinaResourceManager.java index 040374e2b6..56b78883e8 100644 --- a/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/RetinaResourceManager.java +++ b/pixels-retina/src/main/java/io/pixelsdb/pixels/retina/RetinaResourceManager.java @@ -32,6 +32,7 @@ import io.pixelsdb.pixels.common.transaction.TransService; import io.pixelsdb.pixels.common.utils.CheckpointFileIO; import io.pixelsdb.pixels.common.utils.ConfigFactory; +import io.pixelsdb.pixels.common.utils.NetUtils; import io.pixelsdb.pixels.common.utils.RetinaUtils; import io.pixelsdb.pixels.core.PixelsProto; import io.pixelsdb.pixels.core.TypeDescription; @@ -40,8 +41,7 @@ import org.apache.logging.log4j.Logger; import java.io.*; -import java.net.InetAddress; -import java.net.UnknownHostException; + import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.file.Paths; @@ -123,18 +123,7 @@ private RetinaResourceManager() } this.gcExecutor = executor; totalVirtualNodeNum = Integer.parseInt(ConfigFactory.Instance().getProperty("node.virtual.num")); - this.retinaHostName = System.getenv("HOSTNAME"); - if (retinaHostName == null) - { - try - { - this.retinaHostName = InetAddress.getLocalHost().getHostName(); - logger.debug("HostName from InetAddress: {}", retinaHostName); - } catch (UnknownHostException e) - { - logger.error("Failed to get retina hostname", e); - } - } + this.retinaHostName = NetUtils.getLocalHostName(); } private static final class InstanceHolder